Add parser combinator library (wip) with an example
This commit is contained in:
parent
dfc8f13936
commit
02f028b0c9
6 changed files with 604 additions and 0 deletions
87
examples/parser.cpp
Normal file
87
examples/parser.cpp
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
#include <psemek/parser/primitives.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iterator>
|
||||
|
||||
#include <psemek/util/clock.hpp>
|
||||
|
||||
#include <psemek/gfx/obj_parser.hpp>
|
||||
|
||||
template <typename Stream>
|
||||
Stream & operator << (Stream & s, std::monostate)
|
||||
{
|
||||
return s << "()";
|
||||
}
|
||||
|
||||
template <typename Stream>
|
||||
Stream & operator << (Stream & s, psemek::parser::end_token)
|
||||
{
|
||||
return s << "(end)";
|
||||
}
|
||||
|
||||
template <typename Stream>
|
||||
Stream & operator << (Stream & s, psemek::parser::ws_token)
|
||||
{
|
||||
return s << "(ws)";
|
||||
}
|
||||
|
||||
template <typename Stream>
|
||||
Stream & operator << (Stream & s, psemek::parser::newline_token)
|
||||
{
|
||||
return s << "(newline)";
|
||||
}
|
||||
|
||||
template <typename Stream, typename T>
|
||||
Stream & operator << (Stream & s, std::optional<T> const & x)
|
||||
{
|
||||
if (x)
|
||||
return s << *x;
|
||||
return s << "(none)";
|
||||
}
|
||||
|
||||
template <typename Stream, typename T>
|
||||
Stream & operator << (Stream & s, std::vector<T> const & v)
|
||||
{
|
||||
s << '[';
|
||||
for (std::size_t i = 0; i < v.size(); ++i)
|
||||
{
|
||||
if (i > 0) s << ", ";
|
||||
s << v[i];
|
||||
}
|
||||
return s << ']';
|
||||
}
|
||||
|
||||
template <typename Stream, typename T, std::size_t ... I>
|
||||
void print_tuple (Stream & s, T const & t, std::index_sequence<I...>)
|
||||
{
|
||||
((s << (I == 0 ? "" : ", ") << std::get<I>(t)), ...);
|
||||
}
|
||||
|
||||
template <typename Stream, typename ... Ts>
|
||||
Stream & operator << (Stream & s, std::tuple<Ts...> const & t)
|
||||
{
|
||||
s << '(';
|
||||
print_tuple(s, t, std::make_index_sequence<sizeof...(Ts)>{});
|
||||
return s << ')';
|
||||
}
|
||||
|
||||
template <typename Stream, typename T, typename ... Ts>
|
||||
Stream & operator << (Stream & s, std::variant<T, Ts...> const & v)
|
||||
{
|
||||
auto visitor = [&s](auto const & x){ s << x; };
|
||||
std::visit(visitor, v);
|
||||
return s;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
using namespace psemek::parser;
|
||||
|
||||
auto const p = map(concat(integer<int>, ws, one_of(ch('+'), ch('-')), ws, integer<int>), [](auto const & t){
|
||||
auto id = [](auto x){ return x; };
|
||||
return std::make_tuple(std::get<0>(t), std::visit(id, std::get<2>(t)), std::get<4>(t));
|
||||
});
|
||||
|
||||
std::cout << p.parse("45 + 67") << std::endl;
|
||||
}
|
||||
6
libs/parser/CMakeLists.txt
Normal file
6
libs/parser/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
file(GLOB_RECURSE PSEMEK_PARSER_HEADERS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "include/*.hpp")
|
||||
file(GLOB_RECURSE PSEMEK_PARSER_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "source/*.cpp")
|
||||
|
||||
add_library(psemek-parser ${PSEMEK_PARSER_HEADERS} ${PSEMEK_PARSER_SOURCES})
|
||||
target_include_directories(psemek-parser PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
|
||||
target_link_libraries(psemek-parser PUBLIC psemek-util)
|
||||
264
libs/parser/include/psemek/parser/combinators.hpp
Normal file
264
libs/parser/include/psemek/parser/combinators.hpp
Normal file
|
|
@ -0,0 +1,264 @@
|
|||
#pragma once
|
||||
|
||||
#include <psemek/parser/parser.hpp>
|
||||
|
||||
#include <vector>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
|
||||
namespace psemek::parser
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
struct concat_tag{};
|
||||
|
||||
template <std::size_t I, typename Buffer, typename Res>
|
||||
bool concat_helper(std::integral_constant<std::size_t, I>, Buffer &, Res &, error &)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
template <std::size_t I, typename Buffer, typename Res, typename P, typename ... Ps>
|
||||
bool concat_helper(std::integral_constant<std::size_t, I>, Buffer & buf, Res & res, error & e, P const & p, Ps const & ... ps)
|
||||
{
|
||||
auto r = p.apply(buf);
|
||||
if (r.index() == 1)
|
||||
{
|
||||
e = std::get<1>(r);
|
||||
return false;
|
||||
}
|
||||
std::get<I>(res) = std::move(std::get<0>(r));
|
||||
return concat_helper(std::integral_constant<std::size_t, I + 1>{}, buf, res, e, ps...);
|
||||
}
|
||||
|
||||
template <typename Buffer, typename ... Ps>
|
||||
auto concat_impl(Buffer & buf, Ps const & ... ps)
|
||||
-> result<std::tuple<decltype(detail::result_type(ps, buf))...>>
|
||||
{
|
||||
std::tuple<decltype(detail::result_type(ps, buf))...> res;
|
||||
error e;
|
||||
auto it = buf.it;
|
||||
if (!concat_helper(std::integral_constant<std::size_t, 0>{}, buf, res, e, ps...))
|
||||
{
|
||||
buf.it = it;
|
||||
return e;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
struct one_of_tag{};
|
||||
|
||||
template <std::size_t I, typename Buffer, typename Res>
|
||||
bool one_of_helper(std::integral_constant<std::size_t, I>, Buffer &, Res &, error &)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
template <std::size_t I, typename Buffer, typename Res, typename P, typename ... Ps>
|
||||
bool one_of_helper(std::integral_constant<std::size_t, I>, Buffer & buf, Res & res, error & e, P const & p, Ps const & ... ps)
|
||||
{
|
||||
auto r = p.apply(buf);
|
||||
if (r.index() == 0)
|
||||
{
|
||||
res.template emplace<I>(std::move(std::get<0>(r)));
|
||||
return true;
|
||||
}
|
||||
e = std::get<1>(r);
|
||||
return one_of_helper(std::integral_constant<std::size_t, I + 1>{}, buf, res, e, ps...);
|
||||
}
|
||||
|
||||
template <typename Buffer, typename ... Ps>
|
||||
auto one_of_impl(Buffer & buf, Ps const & ... ps)
|
||||
-> result<std::variant<decltype(detail::result_type(ps, buf))...>>
|
||||
{
|
||||
std::variant<decltype(detail::result_type(ps, buf))...> res;
|
||||
error e;
|
||||
auto it = buf.it;
|
||||
if (!one_of_helper(std::integral_constant<std::size_t, 0>{}, buf, res, e, ps...))
|
||||
{
|
||||
buf.it = it;
|
||||
return e;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template <typename P, typename F>
|
||||
auto map(P && p, F && f)
|
||||
{
|
||||
return make_parser([p = std::forward<P>(p), f = std::forward<F>(f)](auto & buffer)
|
||||
-> result<decltype(f(detail::result_type(p, buffer)))>
|
||||
{
|
||||
auto res = p.apply(buffer);
|
||||
if (res.index() == 1)
|
||||
return std::get<1>(res);
|
||||
return f(std::get<0>(res));
|
||||
});
|
||||
}
|
||||
|
||||
template <typename P, typename F>
|
||||
auto guard(P && p, F && f, std::string message = {})
|
||||
{
|
||||
if (message.empty())
|
||||
message = "guard failed";
|
||||
|
||||
return make_parser([p = std::forward<P>(p), f = std::forward<F>(f), message = std::move(message)](auto & buffer)
|
||||
-> result<decltype(detail::result_type(p, buffer))>
|
||||
{
|
||||
auto it = buffer.it;
|
||||
auto res = p.apply(buffer);
|
||||
if (res.index() == 1)
|
||||
return std::get<1>(res);
|
||||
if (!f(std::get<0>(res)))
|
||||
{
|
||||
buffer.it = it;
|
||||
return error{buffer.offset(), message.data()};
|
||||
}
|
||||
return std::get<0>(res);
|
||||
});
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
auto maybe(P && p)
|
||||
{
|
||||
return make_parser([p = std::forward<P>(p)](auto & buffer)
|
||||
-> result<std::optional<decltype(detail::result_type(p, buffer))>>
|
||||
{
|
||||
auto res = p.apply(buffer);
|
||||
if (res.index() == 1)
|
||||
return std::nullopt;
|
||||
return std::get<0>(res);
|
||||
});
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
auto from_to(P && p, std::size_t min_count, std::optional<std::size_t> max_count, std::string message = {})
|
||||
{
|
||||
if (message.empty())
|
||||
message = "(unknown)";
|
||||
|
||||
auto msg = std::string("expected at least ") + std::to_string(min_count) + std::string(" ") + message;
|
||||
|
||||
return make_parser([p = std::forward<P>(p), min_count, max_count, msg = std::move(msg)](auto & buffer)
|
||||
-> result<std::vector<decltype(detail::result_type(p, buffer))>>
|
||||
{
|
||||
auto it = buffer.it;
|
||||
|
||||
std::vector<decltype(detail::result_type(p, buffer))> res;
|
||||
while (true)
|
||||
{
|
||||
if (max_count && res.size() >= *max_count) break;
|
||||
|
||||
auto pos = buffer.it;
|
||||
auto r = p.apply(buffer);
|
||||
if (r.index() == 1) break;
|
||||
if (buffer.it == pos)
|
||||
throw grammar_error("infinite loop");
|
||||
res.push_back(std::move(std::get<0>(r)));
|
||||
}
|
||||
|
||||
if (res.size() < min_count)
|
||||
{
|
||||
buffer.it = it;
|
||||
return error{buffer.offset(), msg.data()};
|
||||
}
|
||||
|
||||
return res;
|
||||
});
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
auto many(P && p)
|
||||
{
|
||||
return from_to(std::forward<P>(p), 0, std::nullopt);
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
auto at_least(P && p, std::size_t count, std::string message = {})
|
||||
{
|
||||
return from_to(std::forward<P>(p), count, std::nullopt, std::move(message));
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
auto exactly(P && p, std::size_t count, std::string message = {})
|
||||
{
|
||||
return from_to(std::forward<P>(p), count, count, std::move(message));
|
||||
}
|
||||
|
||||
template <typename ... Ps>
|
||||
auto concat(Ps && ... ps)
|
||||
{
|
||||
return make_parser([... ps = std::forward<Ps>(ps)](auto & buffer)
|
||||
-> result<std::tuple<decltype(detail::result_type(ps, buffer)) ...>>
|
||||
{
|
||||
return detail::concat_impl(buffer, ps...);
|
||||
}, detail::concat_tag{});
|
||||
}
|
||||
|
||||
template <typename ... Ps>
|
||||
auto one_of(Ps && ... ps)
|
||||
{
|
||||
return make_parser([... ps = std::forward<Ps>(ps)](auto & buffer)
|
||||
-> result<std::variant<decltype(detail::result_type(ps, buffer)) ...>>
|
||||
{
|
||||
return detail::one_of_impl(buffer, ps...);
|
||||
}, detail::one_of_tag{});
|
||||
}
|
||||
|
||||
template <typename P, typename F, typename A>
|
||||
auto fold(P && p, F && f, A && a)
|
||||
{
|
||||
return make_parser([p = std::forward<P>(p), f = std::forward<F>(f), a = std::forward<A>(a)](auto & buffer)
|
||||
-> result<std::remove_cvref_t<A>>
|
||||
{
|
||||
auto accum = a;
|
||||
while (true)
|
||||
{
|
||||
auto pos = buffer.it;
|
||||
auto res = p.apply(buffer);
|
||||
if (res.index() == 1)
|
||||
return accum;
|
||||
if (pos == buffer.it)
|
||||
throw grammar_error("infinite loop");
|
||||
|
||||
accum = f(accum, std::get<0>(res));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template <typename P, typename F>
|
||||
auto fold(P && p, F && f)
|
||||
{
|
||||
return make_parser([p = std::forward<P>(p), f = std::forward<F>(f)](auto & buffer)
|
||||
-> result<decltype(detail::result_type(p, buffer))>
|
||||
{
|
||||
auto res0 = p.apply(buffer);
|
||||
if (res0.index() == 1)
|
||||
return std::get<1>(res0);
|
||||
auto accum = std::move(std::get<0>(res0));
|
||||
while (true)
|
||||
{
|
||||
auto pos = buffer.it;
|
||||
auto res = p.apply(buffer);
|
||||
if (res.index() == 1)
|
||||
return accum;
|
||||
if (pos == buffer.it)
|
||||
throw grammar_error("infinite loop");
|
||||
|
||||
accum = f(std::move(accum), std::move(std::get<0>(res)));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
struct skip_token{};
|
||||
|
||||
template <typename P>
|
||||
auto skip_while(P && p)
|
||||
{
|
||||
return fold(std::forward<P>(p), [](auto const &, auto const &){ return skip_token{}; }, skip_token{});
|
||||
}
|
||||
|
||||
}
|
||||
131
libs/parser/include/psemek/parser/parser.hpp
Normal file
131
libs/parser/include/psemek/parser/parser.hpp
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
#pragma once
|
||||
|
||||
#include <variant>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <stdexcept>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
#include <psemek/util/to_string.hpp>
|
||||
|
||||
namespace psemek::parser
|
||||
{
|
||||
|
||||
struct parse_error
|
||||
: std::runtime_error
|
||||
{
|
||||
parse_error(std::string message, std::size_t line, std::size_t character)
|
||||
: std::runtime_error(std::move(message))
|
||||
, line_{line}
|
||||
, character_{character}
|
||||
, what_{util::to_string(message, " at ", line, "#", character)}
|
||||
{}
|
||||
|
||||
std::size_t line() const { return line_; }
|
||||
std::size_t character() const { return character_; }
|
||||
|
||||
const char * what() const noexcept { return what_.data(); }
|
||||
|
||||
private:
|
||||
std::size_t line_;
|
||||
std::size_t character_;
|
||||
std::string what_;
|
||||
};
|
||||
|
||||
struct grammar_error
|
||||
: std::runtime_error
|
||||
{
|
||||
grammar_error(std::string message)
|
||||
: std::runtime_error(std::move(message))
|
||||
{}
|
||||
};
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
template <typename P, typename B>
|
||||
auto result_type(P const & p, B & b) -> std::remove_cvref_t<decltype(std::get<0>(p.apply(b)))>;
|
||||
|
||||
template <typename Iterator>
|
||||
struct buffer
|
||||
{
|
||||
using char_type = std::remove_cvref_t<decltype(*std::declval<Iterator>())>;
|
||||
|
||||
buffer(Iterator begin, Iterator end)
|
||||
: begin{begin}
|
||||
, end{end}
|
||||
, it{begin}
|
||||
{}
|
||||
|
||||
Iterator begin;
|
||||
Iterator end;
|
||||
Iterator it;
|
||||
|
||||
std::ptrdiff_t offset() const { return it - begin; }
|
||||
};
|
||||
|
||||
template <typename P, typename Tag = void>
|
||||
struct parser
|
||||
{
|
||||
using tag = Tag;
|
||||
|
||||
P p;
|
||||
|
||||
template <typename Buffer>
|
||||
auto apply(Buffer & buf) const
|
||||
{
|
||||
return p(buf);
|
||||
}
|
||||
|
||||
auto parse(std::string_view text) const
|
||||
{
|
||||
buffer<char const *> buf{text.data(), text.data() + text.size()};
|
||||
auto res = apply(buf);
|
||||
if (res.index() == 1)
|
||||
{
|
||||
auto const & e = std::get<1>(res);
|
||||
std::size_t l = 0;
|
||||
std::size_t offset = e.offset;
|
||||
|
||||
std::size_t last_newline = 0;
|
||||
for (std::size_t i = 0; i < e.offset; ++i)
|
||||
{
|
||||
if (buf.begin[i] == '\n')
|
||||
{
|
||||
++l;
|
||||
offset -= i - last_newline;
|
||||
last_newline = i;
|
||||
}
|
||||
}
|
||||
|
||||
throw parse_error(e.message, l, offset);
|
||||
}
|
||||
return std::get<0>(res);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
struct error
|
||||
{
|
||||
std::ptrdiff_t offset;
|
||||
char const * message;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using result = std::variant<T, error>;
|
||||
|
||||
template <typename P>
|
||||
auto make_parser(P && p)
|
||||
{
|
||||
return detail::parser<std::remove_cvref_t<P>>{std::forward<P>(p)};
|
||||
}
|
||||
|
||||
template <typename P, typename Tag>
|
||||
auto make_parser(P && p, Tag)
|
||||
{
|
||||
return detail::parser<std::remove_cvref_t<P>, Tag>{std::forward<P>(p)};
|
||||
}
|
||||
|
||||
}
|
||||
113
libs/parser/include/psemek/parser/primitives.hpp
Normal file
113
libs/parser/include/psemek/parser/primitives.hpp
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
#pragma once
|
||||
|
||||
#include <psemek/parser/parser.hpp>
|
||||
#include <psemek/parser/combinators.hpp>
|
||||
|
||||
namespace psemek::parser
|
||||
{
|
||||
|
||||
struct end_token{};
|
||||
|
||||
const auto end = make_parser([](auto & buffer)
|
||||
-> result<end_token>
|
||||
{
|
||||
if (buffer.it == buffer.end)
|
||||
return end_token{};
|
||||
return error{buffer.offset(), "unexpected trailing data"};
|
||||
});
|
||||
|
||||
template <typename T>
|
||||
auto pure(T && t)
|
||||
{
|
||||
return make_parser([t = std::forward<T>(t)](auto &)
|
||||
-> result<std::remove_cvref_t<T>>
|
||||
{
|
||||
return t;
|
||||
});
|
||||
}
|
||||
|
||||
const auto peek = make_parser([](auto & buffer)
|
||||
-> result<typename std::remove_cvref_t<decltype(buffer)>::char_type>
|
||||
{
|
||||
if (buffer.it == buffer.end)
|
||||
return error{buffer.offset(), "unexpected end"};
|
||||
return *buffer.it++;
|
||||
});
|
||||
|
||||
inline auto ch(char x)
|
||||
{
|
||||
return guard(peek, [x](auto const & y){ return x == y; }, std::string("expected \"") + std::string(1, x) + std::string("\""));
|
||||
}
|
||||
|
||||
inline auto str(std::string s)
|
||||
{
|
||||
auto msg = std::string("expected \"") + s + std::string("\"");
|
||||
return make_parser([s = std::move(s), msg = std::move(msg)](auto & buffer)
|
||||
-> result<std::string_view>
|
||||
{
|
||||
auto it = buffer.it;
|
||||
std::size_t i = 0;
|
||||
|
||||
while (it != buffer.end && i < s.size())
|
||||
{
|
||||
if (*it != s[i])
|
||||
return error{buffer.offset(), msg.data()};
|
||||
|
||||
++it;
|
||||
++i;
|
||||
}
|
||||
|
||||
if (i < s.size())
|
||||
return error{buffer.offset(), "unexpected end"};
|
||||
|
||||
buffer.it = it;
|
||||
return s;
|
||||
});
|
||||
}
|
||||
|
||||
struct ws_token{};
|
||||
|
||||
inline auto ws = map(many(one_of(ch(' '), ch('\t'))), [](auto const &){ return ws_token{}; });
|
||||
|
||||
struct newline_token{};
|
||||
|
||||
inline auto newline = map(ch('\n'), [](auto const &){ return newline_token{}; });
|
||||
|
||||
inline auto alpha = guard(peek, [](auto c){ return std::isalpha(c); });
|
||||
|
||||
inline auto digit = map(guard(peek, [](auto c){ return '0' <= c && c <= '9'; }), [](char c){ return c - '0'; });
|
||||
|
||||
// TODO: overflow check for integers
|
||||
|
||||
template <typename T>
|
||||
inline auto integer = map(
|
||||
concat(
|
||||
maybe(ch('-')),
|
||||
fold(digit, [](int s, int d){ return 10 * s + d; })
|
||||
),
|
||||
[](auto const & t){ return std::get<1>(t) * (std::get<0>(t) ? -1 : 1); }
|
||||
);
|
||||
|
||||
template <typename T>
|
||||
inline auto real = map(
|
||||
concat(
|
||||
maybe(ch('-')),
|
||||
fold(digit, [](T s, int d){ return 10 * s + d; }),
|
||||
maybe(
|
||||
concat(
|
||||
ch('.'),
|
||||
fold(digit, [](auto p, int d){
|
||||
return std::make_pair(p.first + d * p.second / 10, p.second / 10);
|
||||
}, std::make_pair(T{0}, T{1}))
|
||||
)
|
||||
)
|
||||
),
|
||||
[](auto const & t){
|
||||
T sign = (std::get<0>(t) ? -1 : 1);
|
||||
T i = std::get<1>(t);
|
||||
T f = (std::get<2>(t) ? std::get<1>(*std::get<2>(t)).first : 0);
|
||||
return sign * (i + f);
|
||||
}
|
||||
);
|
||||
|
||||
}
|
||||
3
todo.md
3
todo.md
|
|
@ -15,3 +15,6 @@
|
|||
* transparent objects
|
||||
* get rid of diffuse materials
|
||||
* find a better specular model (Blinn-Phong seems to over-shine the specular highlight)
|
||||
* parser
|
||||
* overflow checks for number parsers
|
||||
* recursive parsers
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue