Program Listing for File text.hpp

Return to documentation for file (mcfp/text.hpp)

/*-
 * SPDX-License-Identifier: BSD-2-Clause
 *
 * Copyright (c) 2022 Maarten L. hekkelman
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#pragma once

#include <mcfp/detail/charconv.hpp>

namespace mcfp
{

template <typename T>
using charconv = typename detail::charconv<T>;

template <template<class...> class Op, class... Args>
constexpr inline bool is_detected_v = detail::is_detected_v<Op,Args...>;

// --------------------------------------------------------------------

class word_wrapper : public std::vector<std::string_view>
{
  public:
    word_wrapper(std::string_view text, size_t width)
        : m_width(width)
    {
        std::string_view::size_type line_start = 0, line_end = text.find('\n');

        for (;;)
        {
            auto line = text.substr(line_start, line_end - line_start);
            if (line.empty())
                this->push_back(line);
            else
            {
                auto lines = wrap_line(line);
                this->insert(this->end(), lines.begin(), lines.end());
            }

            if (line_end == std::string_view::npos)
                break;

            line_start = line_end + 1;
            line_end = text.find('\n', line_start);
        }
    }

  private:
    std::vector<std::string_view> wrap_line(std::string_view line)
    {
        std::vector<std::string_view> result;
        std::vector<size_t> offsets = { 0 };

        auto b = line.begin();
        while (b != line.end())
        {
            auto e = next_line_break(b, line.end());

            offsets.push_back(e - line.begin());

            b = e;
        }

        size_t count = offsets.size() - 1;

        std::vector<size_t> minima(count + 1, std::numeric_limits<size_t>::max());
        minima[0] = 0;
        std::vector<size_t> breaks(count + 1, 0);

        for (size_t i = 0; i < count; ++i)
        {
            size_t j = i + 1;
            while (j <= count)
            {
                size_t w = offsets[j] - offsets[i];

                if (w > m_width)
                    break;

                while (w > 0 and std::isspace(line[offsets[i] + w - 1]))
                    --w;

                size_t cost = minima[i];
                if (j < count) // last line may be shorter
                    cost += (m_width - w) * (m_width - w);

                if (cost < minima[j])
                {
                    minima[j] = cost;
                    breaks[j] = i;
                }

                ++j;
            }
        }

        size_t j = count;
        while (j > 0)
        {
            size_t i = breaks[j];
            result.push_back(line.substr(offsets[i], offsets[j] - offsets[i]));
            j = i;
        }

        reverse(result.begin(), result.end());

        return result;
    }

    std::string_view::const_iterator next_line_break(std::string_view::const_iterator text, std::string_view::const_iterator end)
    {
        if (text == end)
            return text;

        enum LineBreakClass
        {
            OP, // OpenPunctuation,
            CL, // ClosePunctuation,
            CP, // CloseParenthesis,
            QU, // Quotation,
            EX, // Exlamation,
            SY, // SymbolAllowingBreakAfter,
            IS, // InfixNumericSeparator,
            PR, // PrefixNumeric,
            PO, // PostfixNumeric,
            NU, // Numeric,
            AL, // Alphabetic,
            HY, // Hyphen,
            BA, // BreakAfter,
            CM, // CombiningMark,
            WJ, // WordJoiner,

            MB, // MandatoryBreak,
            SP, // Space,
        };

        static const LineBreakClass kASCII_LineBreakTable[128] = {
            CM, CM, CM, CM, CM, CM, CM, CM,
            CM, BA, MB, MB, MB, SP, CM, CM,
            CM, CM, CM, CM, CM, CM, CM, CM,
            CM, CM, CM, CM, CM, CM, CM, CM,
            SP, EX, QU, AL, PR, PO, AL, QU,
            OP, CP, AL, PR, IS, HY, IS, SY,
            NU, NU, NU, NU, NU, NU, NU, NU,
            NU, NU, IS, IS, AL, AL, AL, EX,
            AL, AL, AL, AL, AL, AL, AL, AL,
            AL, AL, AL, AL, AL, AL, AL, AL,
            AL, AL, AL, AL, AL, AL, AL, AL,
            AL, AL, AL, OP, PR, CP, AL, AL,
            AL, AL, AL, AL, AL, AL, AL, AL,
            AL, AL, AL, AL, AL, AL, AL, AL,
            AL, AL, AL, AL, AL, AL, AL, AL,
            AL, AL, AL, OP, BA, CL, AL, CM
        };

        enum BreakAction
        {
            DBK = 0, // direct break    (blank in table)
            IBK,     // indirect break  (% in table)
            PBK,     // prohibited break (^ in table)
            CIB,     // combining indirect break
            CPB      // combining prohibited break
        };

        static const BreakAction brkTable[15][15] = {
            //         OP   CL   CP   QU   EX   SY   IS   PR   PO   NU   AL   HY   BA   CM   WJ
            /* OP */ { PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, CPB, PBK },
            /* CL */ { DBK, PBK, PBK, IBK, PBK, PBK, PBK, IBK, IBK, DBK, DBK, IBK, IBK, CIB, PBK },
            /* CP */ { DBK, PBK, PBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, CIB, PBK },
            /* QU */ { PBK, PBK, PBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, CIB, PBK },
            /* EX */ { DBK, PBK, PBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, IBK, IBK, CIB, PBK },
            /* SY */ { DBK, PBK, PBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, DBK, IBK, IBK, CIB, PBK },
            /* IS */ { DBK, PBK, PBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, IBK, IBK, CIB, PBK },
            /* PR */ { IBK, PBK, PBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, IBK, IBK, CIB, PBK },
            /* PO */ { IBK, PBK, PBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, IBK, IBK, CIB, PBK },
            /* NU */ { DBK, PBK, PBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, CIB, PBK },
            /* AL */ { DBK, PBK, PBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, IBK, IBK, CIB, PBK },
            /* HY */ { DBK, PBK, PBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, DBK, IBK, IBK, CIB, PBK },
            /* BA */ { DBK, PBK, PBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, IBK, IBK, CIB, PBK },
            /* CM */ { DBK, PBK, PBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, IBK, IBK, CIB, PBK },
            /* WJ */ { IBK, PBK, PBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, CIB, PBK },
        };

        uint8_t ch = *text;

        LineBreakClass cls;

        if (ch < 128)
            cls = kASCII_LineBreakTable[ch];
        else
            cls = AL;

        if (cls == SP)
            cls = WJ;

        LineBreakClass ncls = cls;

        while (++text != end and cls != MB)
        {
            ch = *text;

            LineBreakClass lcls = ncls;

            if (ch < 128)
                ncls = kASCII_LineBreakTable[ch];
            else
                ncls = AL;

            if (ncls == MB)
            {
                ++text;
                break;
            }

            if (ncls == SP)
                continue;

            BreakAction brk = brkTable[cls][ncls];

            if (brk == DBK or (brk == IBK and lcls == SP))
                break;

            cls = ncls;
        }

        return text;
    }

    size_t m_width;
};

} // namespace mcfp