Implement utf8 encoding and replace std::codecvt with custom code in utf32 <-> utf8 conversions
This commit is contained in:
parent
6cf5eb008b
commit
7e2c2a4c6d
2 changed files with 44 additions and 4 deletions
|
|
@ -11,6 +11,8 @@ namespace psemek::util
|
||||||
std::string to_utf8(std::u32string const & str);
|
std::string to_utf8(std::u32string const & str);
|
||||||
std::u32string from_utf8(std::string const & str);
|
std::u32string from_utf8(std::string const & str);
|
||||||
|
|
||||||
|
char * append_utf8(char32_t c, char * out);
|
||||||
|
|
||||||
struct utf8_iterator
|
struct utf8_iterator
|
||||||
{
|
{
|
||||||
using value_type = char32_t;
|
using value_type = char32_t;
|
||||||
|
|
|
||||||
|
|
@ -11,16 +11,54 @@ namespace psemek::util
|
||||||
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
using converter = std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t>;
|
|
||||||
|
|
||||||
std::string to_utf8(std::u32string const & str)
|
std::string to_utf8(std::u32string const & str)
|
||||||
{
|
{
|
||||||
return converter{}.to_bytes(str);
|
std::string result;
|
||||||
|
result.resize(str.size() * 4);
|
||||||
|
|
||||||
|
char * out = result.data();
|
||||||
|
for (char32_t c : str)
|
||||||
|
out = append_utf8(c, out);
|
||||||
|
result.resize(out - result.data());
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::u32string from_utf8(std::string const & str)
|
std::u32string from_utf8(std::string const & str)
|
||||||
{
|
{
|
||||||
return converter{}.from_bytes(str);
|
std::u32string result;
|
||||||
|
result.reserve(str.size());
|
||||||
|
for (char32_t c : utf8_range(str))
|
||||||
|
result.push_back(c);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
char * append_utf8(char32_t c, char * out)
|
||||||
|
{
|
||||||
|
if (c < 0x80)
|
||||||
|
{
|
||||||
|
*out++ = c;
|
||||||
|
}
|
||||||
|
else if (c < 0x800)
|
||||||
|
{
|
||||||
|
*out++ = 0b11000000 | (c >> 6);
|
||||||
|
*out++ = 0b10000000 | (c & 0b00111111);
|
||||||
|
}
|
||||||
|
else if (c < 0x10000)
|
||||||
|
{
|
||||||
|
*out++ = 0b11100000 | (c >> 12);
|
||||||
|
*out++ = 0b10000000 | ((c >> 6) & 0b00111111);
|
||||||
|
*out++ = 0b10000000 | (c & 0b00111111);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
*out++ = 0b11110000 | (c >> 18);
|
||||||
|
*out++ = 0b10000000 | ((c >> 12) & 0b00111111);
|
||||||
|
*out++ = 0b10000000 | ((c >> 6) & 0b00111111);
|
||||||
|
*out++ = 0b10000000 | (c & 0b00111111);
|
||||||
|
}
|
||||||
|
|
||||||
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __clang__
|
#ifdef __clang__
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue