Fix utf8 parsing

This commit is contained in:
Nikita Lisitsa 2022-04-01 17:36:27 +03:00
parent 2b027e09e5
commit 50e2ebd362

View file

@ -82,20 +82,20 @@ namespace psemek::util
else if (is_2_byte(ptr[0])) else if (is_2_byte(ptr[0]))
{ {
assert_middle(ptr + 1); assert_middle(ptr + 1);
return (ptr[0] & 0b11111) | ((ptr[1] & 0b111111) << 5); return (char32_t(ptr[0] & 0b11111) << 6) | (char32_t(ptr[1] & 0b111111));
} }
else if (is_3_byte(ptr[0])) else if (is_3_byte(ptr[0]))
{ {
assert_middle(ptr + 1); assert_middle(ptr + 1);
assert_middle(ptr + 2); assert_middle(ptr + 2);
return (ptr[0] & 0b1111) | ((ptr[1] & 0b111111) << 4) | ((ptr[2] & 0b111111) << 10); return (char32_t(ptr[0] & 0b1111) << 12) | (char32_t(ptr[1] & 0b111111) << 6) | char32_t(ptr[2] & 0b111111);
} }
else if (is_4_byte(ptr[0])) else if (is_4_byte(ptr[0]))
{ {
assert_middle(ptr + 1); assert_middle(ptr + 1);
assert_middle(ptr + 2); assert_middle(ptr + 2);
assert_middle(ptr + 3); assert_middle(ptr + 3);
return (ptr[0] & 0b111) | ((ptr[1] & 0b111111) << 3) | ((ptr[2] & 0b111111) << 9) | ((ptr[3] & 0b111111) << 15); return (char32_t(ptr[0] & 0b111) << 18) | (char32_t(ptr[1] & 0b111111) << 12) | (char32_t(ptr[2] & 0b111111) << 6) | char32_t(ptr[3] & 0b111111);
} }
else else
throw invalid_utf8(ptr); throw invalid_utf8(ptr);