psemek/libs/util/include/psemek/util/unicode.hpp

91 lines
1.6 KiB
C++

#pragma once
#include <psemek/util/exception.hpp>
#include <string>
#include <iterator>
namespace psemek::util
{
std::string to_utf8(std::u32string const & str);
std::u32string from_utf8(std::string const & str);
struct utf8_iterator
{
using value_type = char32_t;
using pointer = void;
using reference = void;
using difference_type = std::ptrdiff_t;
using iterator_category = std::bidirectional_iterator_tag;
char const * ptr;
utf8_iterator & operator ++();
utf8_iterator operator ++(int);
utf8_iterator & operator --();
utf8_iterator operator --(int);
char32_t operator *() const;
};
inline bool operator == (utf8_iterator const & i1, utf8_iterator const & i2)
{
return i1.ptr == i2.ptr;
}
inline bool operator != (utf8_iterator const & i1, utf8_iterator const & i2)
{
return i1.ptr != i2.ptr;
}
struct utf8_range
{
std::string_view str;
utf8_range(char const * begin)
: str(begin)
{}
utf8_range(char const * begin, char const * end)
: str(begin, end - begin)
{}
utf8_range(std::string_view str)
: str(str)
{}
utf8_range(utf8_range const &) = default;
auto begin() const
{
return utf8_iterator{str.data()};
}
auto end() const
{
return utf8_iterator{str.data() + str.size()};
}
std::size_t size() const
{
return std::distance(begin(), end());
}
};
struct invalid_utf8
: exception
{
invalid_utf8(char const * data, util::stacktrace stacktrace = {})
: exception("Invalid UTF-8 string", std::move(stacktrace))
, data_{data}
{}
char const * data() const { return data_; }
private:
char const * data_;
};
}