Add parser combinator library (wip) with an example

This commit is contained in:
Nikita Lisitsa 2020-11-01 22:02:32 +03:00
parent dfc8f13936
commit 02f028b0c9
6 changed files with 604 additions and 0 deletions

87
examples/parser.cpp Normal file
View file

@ -0,0 +1,87 @@
#include <psemek/parser/primitives.hpp>
#include <iostream>
#include <fstream>
#include <iterator>
#include <psemek/util/clock.hpp>
#include <psemek/gfx/obj_parser.hpp>
template <typename Stream>
Stream & operator << (Stream & s, std::monostate)
{
return s << "()";
}
template <typename Stream>
Stream & operator << (Stream & s, psemek::parser::end_token)
{
return s << "(end)";
}
template <typename Stream>
Stream & operator << (Stream & s, psemek::parser::ws_token)
{
return s << "(ws)";
}
template <typename Stream>
Stream & operator << (Stream & s, psemek::parser::newline_token)
{
return s << "(newline)";
}
template <typename Stream, typename T>
Stream & operator << (Stream & s, std::optional<T> const & x)
{
if (x)
return s << *x;
return s << "(none)";
}
template <typename Stream, typename T>
Stream & operator << (Stream & s, std::vector<T> const & v)
{
s << '[';
for (std::size_t i = 0; i < v.size(); ++i)
{
if (i > 0) s << ", ";
s << v[i];
}
return s << ']';
}
template <typename Stream, typename T, std::size_t ... I>
void print_tuple (Stream & s, T const & t, std::index_sequence<I...>)
{
((s << (I == 0 ? "" : ", ") << std::get<I>(t)), ...);
}
template <typename Stream, typename ... Ts>
Stream & operator << (Stream & s, std::tuple<Ts...> const & t)
{
s << '(';
print_tuple(s, t, std::make_index_sequence<sizeof...(Ts)>{});
return s << ')';
}
template <typename Stream, typename T, typename ... Ts>
Stream & operator << (Stream & s, std::variant<T, Ts...> const & v)
{
auto visitor = [&s](auto const & x){ s << x; };
std::visit(visitor, v);
return s;
}
int main()
{
using namespace psemek::parser;
auto const p = map(concat(integer<int>, ws, one_of(ch('+'), ch('-')), ws, integer<int>), [](auto const & t){
auto id = [](auto x){ return x; };
return std::make_tuple(std::get<0>(t), std::visit(id, std::get<2>(t)), std::get<4>(t));
});
std::cout << p.parse("45 + 67") << std::endl;
}

View file

@ -0,0 +1,6 @@
file(GLOB_RECURSE PSEMEK_PARSER_HEADERS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "include/*.hpp")
file(GLOB_RECURSE PSEMEK_PARSER_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "source/*.cpp")
add_library(psemek-parser ${PSEMEK_PARSER_HEADERS} ${PSEMEK_PARSER_SOURCES})
target_include_directories(psemek-parser PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
target_link_libraries(psemek-parser PUBLIC psemek-util)

View file

@ -0,0 +1,264 @@
#pragma once
#include <psemek/parser/parser.hpp>
#include <vector>
#include <optional>
#include <tuple>
namespace psemek::parser
{
namespace detail
{
struct concat_tag{};
template <std::size_t I, typename Buffer, typename Res>
bool concat_helper(std::integral_constant<std::size_t, I>, Buffer &, Res &, error &)
{
return true;
}
template <std::size_t I, typename Buffer, typename Res, typename P, typename ... Ps>
bool concat_helper(std::integral_constant<std::size_t, I>, Buffer & buf, Res & res, error & e, P const & p, Ps const & ... ps)
{
auto r = p.apply(buf);
if (r.index() == 1)
{
e = std::get<1>(r);
return false;
}
std::get<I>(res) = std::move(std::get<0>(r));
return concat_helper(std::integral_constant<std::size_t, I + 1>{}, buf, res, e, ps...);
}
template <typename Buffer, typename ... Ps>
auto concat_impl(Buffer & buf, Ps const & ... ps)
-> result<std::tuple<decltype(detail::result_type(ps, buf))...>>
{
std::tuple<decltype(detail::result_type(ps, buf))...> res;
error e;
auto it = buf.it;
if (!concat_helper(std::integral_constant<std::size_t, 0>{}, buf, res, e, ps...))
{
buf.it = it;
return e;
}
return res;
}
struct one_of_tag{};
template <std::size_t I, typename Buffer, typename Res>
bool one_of_helper(std::integral_constant<std::size_t, I>, Buffer &, Res &, error &)
{
return false;
}
template <std::size_t I, typename Buffer, typename Res, typename P, typename ... Ps>
bool one_of_helper(std::integral_constant<std::size_t, I>, Buffer & buf, Res & res, error & e, P const & p, Ps const & ... ps)
{
auto r = p.apply(buf);
if (r.index() == 0)
{
res.template emplace<I>(std::move(std::get<0>(r)));
return true;
}
e = std::get<1>(r);
return one_of_helper(std::integral_constant<std::size_t, I + 1>{}, buf, res, e, ps...);
}
template <typename Buffer, typename ... Ps>
auto one_of_impl(Buffer & buf, Ps const & ... ps)
-> result<std::variant<decltype(detail::result_type(ps, buf))...>>
{
std::variant<decltype(detail::result_type(ps, buf))...> res;
error e;
auto it = buf.it;
if (!one_of_helper(std::integral_constant<std::size_t, 0>{}, buf, res, e, ps...))
{
buf.it = it;
return e;
}
return res;
}
}
template <typename P, typename F>
auto map(P && p, F && f)
{
return make_parser([p = std::forward<P>(p), f = std::forward<F>(f)](auto & buffer)
-> result<decltype(f(detail::result_type(p, buffer)))>
{
auto res = p.apply(buffer);
if (res.index() == 1)
return std::get<1>(res);
return f(std::get<0>(res));
});
}
template <typename P, typename F>
auto guard(P && p, F && f, std::string message = {})
{
if (message.empty())
message = "guard failed";
return make_parser([p = std::forward<P>(p), f = std::forward<F>(f), message = std::move(message)](auto & buffer)
-> result<decltype(detail::result_type(p, buffer))>
{
auto it = buffer.it;
auto res = p.apply(buffer);
if (res.index() == 1)
return std::get<1>(res);
if (!f(std::get<0>(res)))
{
buffer.it = it;
return error{buffer.offset(), message.data()};
}
return std::get<0>(res);
});
}
template <typename P>
auto maybe(P && p)
{
return make_parser([p = std::forward<P>(p)](auto & buffer)
-> result<std::optional<decltype(detail::result_type(p, buffer))>>
{
auto res = p.apply(buffer);
if (res.index() == 1)
return std::nullopt;
return std::get<0>(res);
});
}
template <typename P>
auto from_to(P && p, std::size_t min_count, std::optional<std::size_t> max_count, std::string message = {})
{
if (message.empty())
message = "(unknown)";
auto msg = std::string("expected at least ") + std::to_string(min_count) + std::string(" ") + message;
return make_parser([p = std::forward<P>(p), min_count, max_count, msg = std::move(msg)](auto & buffer)
-> result<std::vector<decltype(detail::result_type(p, buffer))>>
{
auto it = buffer.it;
std::vector<decltype(detail::result_type(p, buffer))> res;
while (true)
{
if (max_count && res.size() >= *max_count) break;
auto pos = buffer.it;
auto r = p.apply(buffer);
if (r.index() == 1) break;
if (buffer.it == pos)
throw grammar_error("infinite loop");
res.push_back(std::move(std::get<0>(r)));
}
if (res.size() < min_count)
{
buffer.it = it;
return error{buffer.offset(), msg.data()};
}
return res;
});
}
template <typename P>
auto many(P && p)
{
return from_to(std::forward<P>(p), 0, std::nullopt);
}
template <typename P>
auto at_least(P && p, std::size_t count, std::string message = {})
{
return from_to(std::forward<P>(p), count, std::nullopt, std::move(message));
}
template <typename P>
auto exactly(P && p, std::size_t count, std::string message = {})
{
return from_to(std::forward<P>(p), count, count, std::move(message));
}
template <typename ... Ps>
auto concat(Ps && ... ps)
{
return make_parser([... ps = std::forward<Ps>(ps)](auto & buffer)
-> result<std::tuple<decltype(detail::result_type(ps, buffer)) ...>>
{
return detail::concat_impl(buffer, ps...);
}, detail::concat_tag{});
}
template <typename ... Ps>
auto one_of(Ps && ... ps)
{
return make_parser([... ps = std::forward<Ps>(ps)](auto & buffer)
-> result<std::variant<decltype(detail::result_type(ps, buffer)) ...>>
{
return detail::one_of_impl(buffer, ps...);
}, detail::one_of_tag{});
}
template <typename P, typename F, typename A>
auto fold(P && p, F && f, A && a)
{
return make_parser([p = std::forward<P>(p), f = std::forward<F>(f), a = std::forward<A>(a)](auto & buffer)
-> result<std::remove_cvref_t<A>>
{
auto accum = a;
while (true)
{
auto pos = buffer.it;
auto res = p.apply(buffer);
if (res.index() == 1)
return accum;
if (pos == buffer.it)
throw grammar_error("infinite loop");
accum = f(accum, std::get<0>(res));
}
});
}
template <typename P, typename F>
auto fold(P && p, F && f)
{
return make_parser([p = std::forward<P>(p), f = std::forward<F>(f)](auto & buffer)
-> result<decltype(detail::result_type(p, buffer))>
{
auto res0 = p.apply(buffer);
if (res0.index() == 1)
return std::get<1>(res0);
auto accum = std::move(std::get<0>(res0));
while (true)
{
auto pos = buffer.it;
auto res = p.apply(buffer);
if (res.index() == 1)
return accum;
if (pos == buffer.it)
throw grammar_error("infinite loop");
accum = f(std::move(accum), std::move(std::get<0>(res)));
}
});
}
struct skip_token{};
template <typename P>
auto skip_while(P && p)
{
return fold(std::forward<P>(p), [](auto const &, auto const &){ return skip_token{}; }, skip_token{});
}
}

View file

@ -0,0 +1,131 @@
#pragma once
#include <variant>
#include <string>
#include <string_view>
#include <stdexcept>
#include <functional>
#include <memory>
#include <psemek/util/to_string.hpp>
namespace psemek::parser
{
struct parse_error
: std::runtime_error
{
parse_error(std::string message, std::size_t line, std::size_t character)
: std::runtime_error(std::move(message))
, line_{line}
, character_{character}
, what_{util::to_string(message, " at ", line, "#", character)}
{}
std::size_t line() const { return line_; }
std::size_t character() const { return character_; }
const char * what() const noexcept { return what_.data(); }
private:
std::size_t line_;
std::size_t character_;
std::string what_;
};
struct grammar_error
: std::runtime_error
{
grammar_error(std::string message)
: std::runtime_error(std::move(message))
{}
};
namespace detail
{
template <typename P, typename B>
auto result_type(P const & p, B & b) -> std::remove_cvref_t<decltype(std::get<0>(p.apply(b)))>;
template <typename Iterator>
struct buffer
{
using char_type = std::remove_cvref_t<decltype(*std::declval<Iterator>())>;
buffer(Iterator begin, Iterator end)
: begin{begin}
, end{end}
, it{begin}
{}
Iterator begin;
Iterator end;
Iterator it;
std::ptrdiff_t offset() const { return it - begin; }
};
template <typename P, typename Tag = void>
struct parser
{
using tag = Tag;
P p;
template <typename Buffer>
auto apply(Buffer & buf) const
{
return p(buf);
}
auto parse(std::string_view text) const
{
buffer<char const *> buf{text.data(), text.data() + text.size()};
auto res = apply(buf);
if (res.index() == 1)
{
auto const & e = std::get<1>(res);
std::size_t l = 0;
std::size_t offset = e.offset;
std::size_t last_newline = 0;
for (std::size_t i = 0; i < e.offset; ++i)
{
if (buf.begin[i] == '\n')
{
++l;
offset -= i - last_newline;
last_newline = i;
}
}
throw parse_error(e.message, l, offset);
}
return std::get<0>(res);
}
};
}
struct error
{
std::ptrdiff_t offset;
char const * message;
};
template <typename T>
using result = std::variant<T, error>;
template <typename P>
auto make_parser(P && p)
{
return detail::parser<std::remove_cvref_t<P>>{std::forward<P>(p)};
}
template <typename P, typename Tag>
auto make_parser(P && p, Tag)
{
return detail::parser<std::remove_cvref_t<P>, Tag>{std::forward<P>(p)};
}
}

View file

@ -0,0 +1,113 @@
#pragma once
#include <psemek/parser/parser.hpp>
#include <psemek/parser/combinators.hpp>
namespace psemek::parser
{
struct end_token{};
const auto end = make_parser([](auto & buffer)
-> result<end_token>
{
if (buffer.it == buffer.end)
return end_token{};
return error{buffer.offset(), "unexpected trailing data"};
});
template <typename T>
auto pure(T && t)
{
return make_parser([t = std::forward<T>(t)](auto &)
-> result<std::remove_cvref_t<T>>
{
return t;
});
}
const auto peek = make_parser([](auto & buffer)
-> result<typename std::remove_cvref_t<decltype(buffer)>::char_type>
{
if (buffer.it == buffer.end)
return error{buffer.offset(), "unexpected end"};
return *buffer.it++;
});
inline auto ch(char x)
{
return guard(peek, [x](auto const & y){ return x == y; }, std::string("expected \"") + std::string(1, x) + std::string("\""));
}
inline auto str(std::string s)
{
auto msg = std::string("expected \"") + s + std::string("\"");
return make_parser([s = std::move(s), msg = std::move(msg)](auto & buffer)
-> result<std::string_view>
{
auto it = buffer.it;
std::size_t i = 0;
while (it != buffer.end && i < s.size())
{
if (*it != s[i])
return error{buffer.offset(), msg.data()};
++it;
++i;
}
if (i < s.size())
return error{buffer.offset(), "unexpected end"};
buffer.it = it;
return s;
});
}
struct ws_token{};
inline auto ws = map(many(one_of(ch(' '), ch('\t'))), [](auto const &){ return ws_token{}; });
struct newline_token{};
inline auto newline = map(ch('\n'), [](auto const &){ return newline_token{}; });
inline auto alpha = guard(peek, [](auto c){ return std::isalpha(c); });
inline auto digit = map(guard(peek, [](auto c){ return '0' <= c && c <= '9'; }), [](char c){ return c - '0'; });
// TODO: overflow check for integers
template <typename T>
inline auto integer = map(
concat(
maybe(ch('-')),
fold(digit, [](int s, int d){ return 10 * s + d; })
),
[](auto const & t){ return std::get<1>(t) * (std::get<0>(t) ? -1 : 1); }
);
template <typename T>
inline auto real = map(
concat(
maybe(ch('-')),
fold(digit, [](T s, int d){ return 10 * s + d; }),
maybe(
concat(
ch('.'),
fold(digit, [](auto p, int d){
return std::make_pair(p.first + d * p.second / 10, p.second / 10);
}, std::make_pair(T{0}, T{1}))
)
)
),
[](auto const & t){
T sign = (std::get<0>(t) ? -1 : 1);
T i = std::get<1>(t);
T f = (std::get<2>(t) ? std::get<1>(*std::get<2>(t)).first : 0);
return sign * (i + f);
}
);
}

View file

@ -15,3 +15,6 @@
* transparent objects
* get rid of diffuse materials
* find a better specular model (Blinn-Phong seems to over-shine the specular highlight)
* parser
* overflow checks for number parsers
* recursive parsers