Fix utf8 parsing
This commit is contained in:
parent
2b027e09e5
commit
50e2ebd362
1 changed files with 3 additions and 3 deletions
|
|
@ -82,20 +82,20 @@ namespace psemek::util
|
|||
else if (is_2_byte(ptr[0]))
|
||||
{
|
||||
assert_middle(ptr + 1);
|
||||
return (ptr[0] & 0b11111) | ((ptr[1] & 0b111111) << 5);
|
||||
return (char32_t(ptr[0] & 0b11111) << 6) | (char32_t(ptr[1] & 0b111111));
|
||||
}
|
||||
else if (is_3_byte(ptr[0]))
|
||||
{
|
||||
assert_middle(ptr + 1);
|
||||
assert_middle(ptr + 2);
|
||||
return (ptr[0] & 0b1111) | ((ptr[1] & 0b111111) << 4) | ((ptr[2] & 0b111111) << 10);
|
||||
return (char32_t(ptr[0] & 0b1111) << 12) | (char32_t(ptr[1] & 0b111111) << 6) | char32_t(ptr[2] & 0b111111);
|
||||
}
|
||||
else if (is_4_byte(ptr[0]))
|
||||
{
|
||||
assert_middle(ptr + 1);
|
||||
assert_middle(ptr + 2);
|
||||
assert_middle(ptr + 3);
|
||||
return (ptr[0] & 0b111) | ((ptr[1] & 0b111111) << 3) | ((ptr[2] & 0b111111) << 9) | ((ptr[3] & 0b111111) << 15);
|
||||
return (char32_t(ptr[0] & 0b111) << 18) | (char32_t(ptr[1] & 0b111111) << 12) | (char32_t(ptr[2] & 0b111111) << 6) | char32_t(ptr[3] & 0b111111);
|
||||
}
|
||||
else
|
||||
throw invalid_utf8(ptr);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue