91 lines
1.6 KiB
C++
91 lines
1.6 KiB
C++
#pragma once
|
|
|
|
#include <psemek/util/exception.hpp>
|
|
|
|
#include <string>
|
|
#include <iterator>
|
|
|
|
namespace psemek::util
|
|
{
|
|
|
|
std::string to_utf8(std::u32string const & str);
|
|
std::u32string from_utf8(std::string const & str);
|
|
|
|
struct utf8_iterator
|
|
{
|
|
using value_type = char32_t;
|
|
using pointer = void;
|
|
using reference = void;
|
|
using difference_type = std::ptrdiff_t;
|
|
using iterator_category = std::bidirectional_iterator_tag;
|
|
|
|
char const * ptr;
|
|
|
|
utf8_iterator & operator ++();
|
|
utf8_iterator operator ++(int);
|
|
|
|
utf8_iterator & operator --();
|
|
utf8_iterator operator --(int);
|
|
|
|
char32_t operator *() const;
|
|
};
|
|
|
|
inline bool operator == (utf8_iterator const & i1, utf8_iterator const & i2)
|
|
{
|
|
return i1.ptr == i2.ptr;
|
|
}
|
|
|
|
inline bool operator != (utf8_iterator const & i1, utf8_iterator const & i2)
|
|
{
|
|
return i1.ptr != i2.ptr;
|
|
}
|
|
|
|
struct utf8_range
|
|
{
|
|
std::string_view str;
|
|
|
|
utf8_range(char const * begin)
|
|
: str(begin)
|
|
{}
|
|
|
|
utf8_range(char const * begin, char const * end)
|
|
: str(begin, end - begin)
|
|
{}
|
|
|
|
utf8_range(std::string_view str)
|
|
: str(str)
|
|
{}
|
|
|
|
utf8_range(utf8_range const &) = default;
|
|
|
|
auto begin() const
|
|
{
|
|
return utf8_iterator{str.data()};
|
|
}
|
|
|
|
auto end() const
|
|
{
|
|
return utf8_iterator{str.data() + str.size()};
|
|
}
|
|
|
|
std::size_t size() const
|
|
{
|
|
return std::distance(begin(), end());
|
|
}
|
|
};
|
|
|
|
struct invalid_utf8
|
|
: exception
|
|
{
|
|
invalid_utf8(char const * data, util::stacktrace stacktrace = {})
|
|
: exception("Invalid UTF-8 string", std::move(stacktrace))
|
|
, data_{data}
|
|
{}
|
|
|
|
char const * data() const { return data_; }
|
|
|
|
private:
|
|
char const * data_;
|
|
};
|
|
|
|
}
|