Fix utf8 parsing

This commit is contained in:
Nikita Lisitsa 2022-04-01 17:36:27 +03:00
parent 2b027e09e5
commit 50e2ebd362

View file

@ -82,20 +82,20 @@ namespace psemek::util
else if (is_2_byte(ptr[0]))
{
assert_middle(ptr + 1);
return (ptr[0] & 0b11111) | ((ptr[1] & 0b111111) << 5);
return (char32_t(ptr[0] & 0b11111) << 6) | (char32_t(ptr[1] & 0b111111));
}
else if (is_3_byte(ptr[0]))
{
assert_middle(ptr + 1);
assert_middle(ptr + 2);
return (ptr[0] & 0b1111) | ((ptr[1] & 0b111111) << 4) | ((ptr[2] & 0b111111) << 10);
return (char32_t(ptr[0] & 0b1111) << 12) | (char32_t(ptr[1] & 0b111111) << 6) | char32_t(ptr[2] & 0b111111);
}
else if (is_4_byte(ptr[0]))
{
assert_middle(ptr + 1);
assert_middle(ptr + 2);
assert_middle(ptr + 3);
return (ptr[0] & 0b111) | ((ptr[1] & 0b111111) << 3) | ((ptr[2] & 0b111111) << 9) | ((ptr[3] & 0b111111) << 15);
return (char32_t(ptr[0] & 0b111) << 18) | (char32_t(ptr[1] & 0b111111) << 12) | (char32_t(ptr[2] & 0b111111) << 6) | char32_t(ptr[3] & 0b111111);
}
else
throw invalid_utf8(ptr);