.. _program_listing_file_zeep_unicode-support.hpp: Program Listing for File unicode-support.hpp ============================================ |exhale_lsh| :ref:`Return to documentation for file ` (``zeep/unicode-support.hpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp // Copyright Maarten L. Hekkelman, Radboud University 2008-2013. // Copyright Maarten L. Hekkelman, 2014-2023 // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #pragma once #include #include #include #include #include #include #include namespace zeep { using unicode = char32_t; enum class encoding_type { ASCII, UTF8, UTF16BE, UTF16LE, ISO88591 }; constexpr bool is_single_byte_encoding(encoding_type enc) { return enc == encoding_type::ASCII or enc == encoding_type::ISO88591 or enc == encoding_type::UTF8; } void append(std::string& s, unicode ch); unicode pop_last_char(std::string& s); template std::tuple get_first_char(Iter ptr, Iter end); inline bool iequals(const std::string& a, const std::string& b) { bool equal = a.length() == b.length(); for (std::string::size_type i = 0; equal and i < a.length(); ++i) equal = std::toupper(a[i]) == std::toupper(b[i]); return equal; } // inlines inline void append(std::string& s, unicode uc) { if (uc < 0x080) s += (static_cast(uc)); else if (uc < 0x0800) { char ch[2] = { static_cast(0x0c0 | (uc >> 6)), static_cast(0x080 | (uc & 0x3f)) }; s.append(ch, 2); } else if (uc < 0x00010000) { char ch[3] = { static_cast(0x0e0 | (uc >> 12)), static_cast(0x080 | ((uc >> 6) & 0x3f)), static_cast(0x080 | (uc & 0x3f)) }; s.append(ch, 3); } else { char ch[4] = { static_cast(0x0f0 | (uc >> 18)), static_cast(0x080 | ((uc >> 12) & 0x3f)), static_cast(0x080 | ((uc >> 6) & 0x3f)), static_cast(0x080 | (uc & 0x3f)) }; s.append(ch, 4); } } inline unicode pop_last_char(std::string& s) { unicode result = 0; if (not s.empty()) { std::string::iterator ch = s.end() - 1; if ((*ch & 0x0080) == 0) { result = *ch; s.erase(ch); } else { int o = 0; do { result |= (*ch & 0x03F) << o; o += 6; --ch; } while (ch != s.begin() and (*ch & 0x0C0) == 0x080); switch (o) { case 6: result |= (*ch & 0x01F) << 6; break; case 12: result |= (*ch & 0x00F) << 12; break; case 18: result |= (*ch & 0x007) << 18; break; } s.erase(ch, s.end()); } } return result; } // I used to have this comment here: // // this code only works if the input is valid utf-8 // // That was a bad idea.... // template std::tuple get_first_char(Iter ptr, Iter end) { unicode result = static_cast(*ptr); ++ptr; if (result > 0x07f) { unsigned char ch[3]; if ((result & 0x0E0) == 0x0C0) { if (ptr >= end) throw zeep::exception("Invalid utf-8"); ch[0] = static_cast(*ptr); ++ptr; if ((ch[0] & 0x0c0) != 0x080) throw zeep::exception("Invalid utf-8"); result = ((result & 0x01F) << 6) | (ch[0] & 0x03F); } else if ((result & 0x0F0) == 0x0E0) { if (ptr + 1 >= end) throw zeep::exception("Invalid utf-8"); ch[0] = static_cast(*ptr); ++ptr; ch[1] = static_cast(*ptr); ++ptr; if ((ch[0] & 0x0c0) != 0x080 or (ch[1] & 0x0c0) != 0x080) throw zeep::exception("Invalid utf-8"); result = ((result & 0x00F) << 12) | ((ch[0] & 0x03F) << 6) | (ch[1] & 0x03F); } else if ((result & 0x0F8) == 0x0F0) { if (ptr + 2 >= end) throw zeep::exception("Invalid utf-8"); ch[0] = static_cast(*ptr); ++ptr; ch[1] = static_cast(*ptr); ++ptr; ch[2] = static_cast(*ptr); ++ptr; if ((ch[0] & 0x0c0) != 0x080 or (ch[1] & 0x0c0) != 0x080 or (ch[2] & 0x0c0) != 0x080) throw zeep::exception("Invalid utf-8"); result = ((result & 0x007) << 18) | ((ch[0] & 0x03F) << 12) | ((ch[1] & 0x03F) << 6) | (ch[2] & 0x03F); } } return std::make_tuple(result, ptr); } // -------------------------------------------------------------------- inline std::wstring convert_s2w(std::string_view s) { auto b = s.begin(); auto e = s.end(); std::wstring result; while (b != e) { const auto &[uc, i] = get_first_char(b, e); if (not uc) break; result += static_cast(uc); b = i; } return result; } inline std::string convert_w2s(std::wstring_view s) { std::string result; for (unicode ch : s) append(result, ch); return result; } // -------------------------------------------------------------------- inline std::string to_hex(uint32_t i) { char s[sizeof(i) * 2 + 3]; char* p = s + sizeof(s); *--p = 0; const char kHexChars[] = "0123456789abcdef"; while (i) { *--p = kHexChars[i & 0x0F]; i >>= 4; } *--p = 'x'; *--p = '0'; return p; } // -------------------------------------------------------------------- inline void trim(std::string& s) { std::string::iterator b = s.begin(); while (b != s.end() and *b > 0 and std::isspace(*b)) ++b; std::string::iterator e = s.end(); while (e > b and *(e - 1) > 0 and std::isspace(*(e - 1))) --e; if (b != s.begin() or e != s.end()) s = { b, e }; } // -------------------------------------------------------------------- inline bool starts_with(std::string_view s, std::string_view p) { return s.compare(0, p.length(), p) == 0; } // -------------------------------------------------------------------- inline bool ends_with(std::string_view s, std::string_view p) { return s.length() >= p.length() and s.compare(s.length() - p.length(), p.length(), p) == 0; } // -------------------------------------------------------------------- inline bool contains(std::string_view s, std::string_view p) { return s.find(p) != std::string_view::npos; } // -------------------------------------------------------------------- inline void split(std::vector& v, std::string_view s, std::string_view p, bool compress = false) { v.clear(); std::string_view::size_type i = 0; const auto e = s.length(); while (i <= e) { auto n = s.find(p, i); if (n > e) n = e; if (n > i or compress == false) v.emplace_back(s.substr(i, n - i)); if (n == std::string_view::npos) break; i = n + p.length(); } } // -------------------------------------------------------------------- inline void to_lower(std::string& s, const std::locale& loc = std::locale()) { for (char& ch: s) ch = std::tolower(ch, loc); } // -------------------------------------------------------------------- template > std::string join(const Container& v, std::string_view d) { std::string result; if (not v.empty()) { auto i = v.begin(); for (;;) { result += *i++; if (i == v.end()) break; result += d; } } return result; } // -------------------------------------------------------------------- inline void replace_all(std::string& s, std::string_view p, std::string_view r) { std::string::size_type i = 0; for (;;) { auto l = s.find(p, i); if (l == std::string::npos) break; s.replace(l, p.length(), r); i = l + r.length(); } } } // namespace xml