commit 9d7f81d7fcff7ae03c08ebfe6a567b50e83e39a3 Author: lisyarus Date: Tue Dec 16 14:01:50 2025 +0300 Initial commit: wip spec, ast definition, parser & interpreter diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..53c34af --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,9 @@ +cmake_minimum_required(VERSION 3.30) +project(pslang CXX) + +add_subdirectory(libs/type) +add_subdirectory(libs/ast) +add_subdirectory(libs/parser) +add_subdirectory(libs/interpreter) + +add_subdirectory(apps/interpreter) diff --git a/apps/interpreter/CMakeLists.txt b/apps/interpreter/CMakeLists.txt new file mode 100644 index 0000000..2109f54 --- /dev/null +++ b/apps/interpreter/CMakeLists.txt @@ -0,0 +1,6 @@ +file(GLOB_RECURSE PSLI_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp") +file(GLOB_RECURSE PSLI_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/source/*.cpp") + +add_executable(psli ${PSLI_HEADERS} ${PSLI_SOURCES}) +target_include_directories(psli PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") +target_link_libraries(psli PUBLIC pslang-interpreter) diff --git a/apps/interpreter/source/main.cpp b/apps/interpreter/source/main.cpp new file mode 100644 index 0000000..a761c65 --- /dev/null +++ b/apps/interpreter/source/main.cpp @@ -0,0 +1,45 @@ +#include +#include +#include + +#include +#include + +int main(int argc, char ** argv) +{ + if (argc == 1) + { + std::cout << "Usage: psli [ options ] [ ... ]\n"; + std::cout << "Available options:\n"; + std::cout << " -t, --trace Trace each line of execution\n"; + std::cout << " -d, --dump Dump all variables after processing each file\n"; + return 0; + } + + using namespace pslang; + + auto context = interpreter::empty_context(); + + bool dump = false; + + for (int arg = 1; arg < argc; ++arg) + { + if (std::strcmp(argv[arg], "-d") == 0 || std::strcmp(argv[arg], "--dump") == 0) + { + dump = true; + continue; + } + + if (std::strcmp(argv[arg], "-t") == 0 || std::strcmp(argv[arg], "--trace") == 0) + { + context.trace = true; + continue; + } + + auto ast = parser::parse(argv[arg]); + interpreter::execute(context, ast); + + if (dump) + interpreter::dump(std::cout, context); + } +} diff --git a/examples/example.psl b/examples/example.psl new file mode 100644 index 0000000..5471670 --- /dev/null +++ b/examples/example.psl @@ -0,0 +1,84 @@ +import math +import events +import components +import ecs + +const x = 10s // deduced type i16 +var y = 14u // deduced type u32 +var z: f64 = 3.14l + +func fma(x: f32, y: f32, z: f32) -> f32: + return x * y + z + +struct vec2: + x: f32 + y: f32 + +// pass by value +func length(v: vec2) -> f32: + return math.sqrt(v.x * v.x + v.y * v.y) + +// return type deduced as u64 +func merge(x: u32, y: u32): + return (x as u64) or ((y as u64) << 32) + +var v = vec2(10, 20) +length(v) + +// can be called using method syntax +v.length() + +// function pointers +var my_func = fma // deduced type (f32, f32, f32) -> f32 + +// pass by reference/pointer with * +// TODO: const pointer? +func my_system(event: events.update, position: *components.position, velocity: *components.velocity): + position += event.dt * velocity + +func attach(dispatcher: *ecs.dispatcher): + // TODO: how does it work? C++-style variadic templates? Oh no... + dispatcher.system(my_system) + +// objects with methods +struct rectangle: + width: i32 + height: i32 + +func extend(r: &rectangle, size: i32): + r.width += size + r.height += size + +var r = rectangle(10, 12) +r.extend(5) + +// named initializers +var r2 = rectangle(width = 20, height = 30) + +// regular pointers +var ptr: *i32 = null +var x = 15 +ptr = &x + +// field/method access using pointers is the same as with values +var sptr: *rectangle = &r +r.width *= 2 + +// simple generics +struct array(T): + data: *T + size: u64 + +// TODO: constructors? destructors? +func new(self: array(T), size: u64): + return array(T)(data = mem.alloc(size * sizeof(T)), size = size) + +// TODO: static arrays? +// TODO: move-only types? alloc returns smth like unique ptr? + +struct kvpair(K, V): + key: K + value: V + +struct arraymap(K, V): + values: array(kvpair(K, V)) diff --git a/examples/test.psl b/examples/test.psl new file mode 100644 index 0000000..9f30fa9 --- /dev/null +++ b/examples/test.psl @@ -0,0 +1,3 @@ +let n = 10 + (1u as i32) +let x = (n as f32) / 3.0 +x = 15.2 diff --git a/libs/ast/CMakeLists.txt b/libs/ast/CMakeLists.txt new file mode 100644 index 0000000..065af0b --- /dev/null +++ b/libs/ast/CMakeLists.txt @@ -0,0 +1,6 @@ +file(GLOB_RECURSE PSLANG_AST_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp") +file(GLOB_RECURSE PSLANG_AST_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/source/*.cpp") + +add_library(pslang-ast STATIC ${PSLANG_AST_HEADERS} ${PSLANG_AST_SOURCES}) +target_include_directories(pslang-ast PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") +target_link_libraries(pslang-ast PUBLIC pslang-type) diff --git a/libs/ast/include/pslang/ast/cast.hpp b/libs/ast/include/pslang/ast/cast.hpp new file mode 100644 index 0000000..d2975d7 --- /dev/null +++ b/libs/ast/include/pslang/ast/cast.hpp @@ -0,0 +1,15 @@ +#pragma once + +#include +#include + +namespace pslang::ast +{ + + struct cast_operation + { + expression_ptr expression; + type::type_ptr type; + }; + +} diff --git a/libs/ast/include/pslang/ast/control.hpp b/libs/ast/include/pslang/ast/control.hpp new file mode 100644 index 0000000..f937891 --- /dev/null +++ b/libs/ast/include/pslang/ast/control.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include +#include + +namespace pslang::ast +{ + + // N.B.: if_block, else_block, and else_if_block are temporary parsing elements + // and are not present in the final AST + + struct if_block + { + expression_ptr condition; + statement_list_ptr statements; + }; + + struct else_block + { + statement_list_ptr statements; + }; + + struct else_if_block + { + expression_ptr condition; + statement_list_ptr statements; + }; + + // Interpreted as a consecutive "if -> else if -> else if -> else" chain + // Empty condition means no condition (last "else" in chain) + // All blocks but the last must have a non-empty condition + struct if_chain + { + struct block + { + expression_ptr condition; + statement_list_ptr statements; + }; + + std::vector blocks; + }; + + struct while_block + { + expression_ptr condition; + statement_list_ptr statements; + }; + +} diff --git a/libs/ast/include/pslang/ast/expression.hpp b/libs/ast/include/pslang/ast/expression.hpp new file mode 100644 index 0000000..a4de813 --- /dev/null +++ b/libs/ast/include/pslang/ast/expression.hpp @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace pslang::ast +{ + + struct unary_operation + { + unary_operation_type type; + expression_ptr arg1; + }; + + struct binary_operation + { + binary_operation_type type; + expression_ptr arg1; + expression_ptr arg2; + }; + + using expression_impl = std::variant< + literal, + identifier, + unary_operation, + binary_operation, + cast_operation + >; + + struct expression + : expression_impl + { + using expression_impl::expression_impl; + }; + +} diff --git a/libs/ast/include/pslang/ast/expression_fwd.hpp b/libs/ast/include/pslang/ast/expression_fwd.hpp new file mode 100644 index 0000000..af492fd --- /dev/null +++ b/libs/ast/include/pslang/ast/expression_fwd.hpp @@ -0,0 +1,12 @@ +#pragma once + +#include + +namespace pslang::ast +{ + + struct expression; + + using expression_ptr = std::unique_ptr; + +} diff --git a/libs/ast/include/pslang/ast/identifier.hpp b/libs/ast/include/pslang/ast/identifier.hpp new file mode 100644 index 0000000..de581fd --- /dev/null +++ b/libs/ast/include/pslang/ast/identifier.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include + +namespace pslang::ast +{ + + struct identifier + { + std::string name; + }; + +} diff --git a/libs/ast/include/pslang/ast/literal.hpp b/libs/ast/include/pslang/ast/literal.hpp new file mode 100644 index 0000000..3c6d056 --- /dev/null +++ b/libs/ast/include/pslang/ast/literal.hpp @@ -0,0 +1,43 @@ +#pragma once + +#include +#include + +namespace pslang::ast +{ + + template + struct numeric_literal_base + { + T value; + }; + + using bool_literal = numeric_literal_base; + + using i8_literal = numeric_literal_base; + using u8_literal = numeric_literal_base; + using i16_literal = numeric_literal_base; + using u16_literal = numeric_literal_base; + using i32_literal = numeric_literal_base; + using u32_literal = numeric_literal_base; + using i64_literal = numeric_literal_base; + using u64_literal = numeric_literal_base; + + using f32_literal = numeric_literal_base; + using f64_literal = numeric_literal_base; + + using literal = std::variant< + bool_literal, + i8_literal, + u8_literal, + i16_literal, + u16_literal, + i32_literal, + u32_literal, + i64_literal, + u64_literal, + f32_literal, + f64_literal + >; + +} diff --git a/libs/ast/include/pslang/ast/operation.hpp b/libs/ast/include/pslang/ast/operation.hpp new file mode 100644 index 0000000..709d281 --- /dev/null +++ b/libs/ast/include/pslang/ast/operation.hpp @@ -0,0 +1,96 @@ +#pragma once + +namespace pslang::ast +{ + + enum class unary_operation_type + { + negation, + logical_not, + }; + + enum class binary_operation_type + { + addition, + subtraction, + multiplication, + division, + remainder, + logical_and, + logical_or, + logical_xor, + equals, + not_equals, + less, + greater, + less_equals, + greater_equals, + }; + + template + Ostream & operator << (Ostream & out, unary_operation_type type) + { + switch (type) + { + case unary_operation_type::negation: + out << "negation"; + break; + case unary_operation_type::logical_not: + out << "not"; + break; + } + return out; + } + + template + Ostream & operator << (Ostream & out, binary_operation_type type) + { + switch (type) + { + case binary_operation_type::addition: + out << "addition"; + break; + case binary_operation_type::subtraction: + out << "subtraction"; + break; + case binary_operation_type::multiplication: + out << "multiplication"; + break; + case binary_operation_type::division: + out << "division"; + break; + case binary_operation_type::remainder: + out << "remainder"; + break; + case binary_operation_type::logical_and: + out << "and"; + break; + case binary_operation_type::logical_or: + out << "or"; + break; + case binary_operation_type::logical_xor: + out << "xor"; + break; + case binary_operation_type::equals: + out << "equals"; + break; + case binary_operation_type::not_equals: + out << "not equals"; + break; + case binary_operation_type::less: + out << "less"; + break; + case binary_operation_type::greater: + out << "greater"; + break; + case binary_operation_type::less_equals: + out << "less or equals"; + break; + case binary_operation_type::greater_equals: + out << "greater or equals"; + break; + } + return out; + } + +} diff --git a/libs/ast/include/pslang/ast/print.hpp b/libs/ast/include/pslang/ast/print.hpp new file mode 100644 index 0000000..6556fef --- /dev/null +++ b/libs/ast/include/pslang/ast/print.hpp @@ -0,0 +1,43 @@ +#pragma once + +#include + +#include + +namespace pslang::ast +{ + + struct print_options + { + std::string_view indent_string = "| "; + std::size_t indent_level = 0; + }; + + void print(std::ostream & out, bool_literal const & node, print_options const & options = {}); + void print(std::ostream & out, i8_literal const & node, print_options const & options = {}); + void print(std::ostream & out, u8_literal const & node, print_options const & options = {}); + void print(std::ostream & out, i16_literal const & node, print_options const & options = {}); + void print(std::ostream & out, u16_literal const & node, print_options const & options = {}); + void print(std::ostream & out, i32_literal const & node, print_options const & options = {}); + void print(std::ostream & out, u32_literal const & node, print_options const & options = {}); + void print(std::ostream & out, i64_literal const & node, print_options const & options = {}); + void print(std::ostream & out, u64_literal const & node, print_options const & options = {}); + void print(std::ostream & out, f32_literal const & node, print_options const & options = {}); + void print(std::ostream & out, f64_literal const & node, print_options const & options = {}); + void print(std::ostream & out, literal const & node, print_options const & options = {}); + void print(std::ostream & out, identifier const & node, print_options const & options = {}); + void print(std::ostream & out, unary_operation const & node, print_options const & options = {}); + void print(std::ostream & out, binary_operation const & node, print_options const & options = {}); + void print(std::ostream & out, cast_operation const & node, print_options const & options = {}); + void print(std::ostream & out, expression_ptr const & node, print_options const & options = {}); + void print(std::ostream & out, assignment const & node, print_options const & options = {}); + void print(std::ostream & out, variable_declaration const & node, print_options const & options = {}); + void print(std::ostream & out, if_block const & node, print_options const & options = {}); + void print(std::ostream & out, else_block const & node, print_options const & options = {}); + void print(std::ostream & out, else_if_block const & node, print_options const & options = {}); + void print(std::ostream & out, if_chain const & node, print_options const & options = {}); + void print(std::ostream & out, while_block const & node, print_options const & options = {}); + void print(std::ostream & out, statement_ptr const & node, print_options const & options = {}); + void print(std::ostream & out, statement_list_ptr const & node, print_options const & options = {}); + +} diff --git a/libs/ast/include/pslang/ast/statement.hpp b/libs/ast/include/pslang/ast/statement.hpp new file mode 100644 index 0000000..ad02dbc --- /dev/null +++ b/libs/ast/include/pslang/ast/statement.hpp @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace pslang::ast +{ + + struct variable_declaration + { + value_category category; + std::string name; + type::type_ptr type; + expression_ptr initializer; + }; + + struct assignment + { + expression_ptr lhs; + expression_ptr rhs; + }; + + using statement_impl = std::variant< + expression_ptr, + assignment, + variable_declaration, + if_block, + else_block, + else_if_block, + if_chain, + while_block + >; + + struct statement + : statement_impl + { + using statement_impl::statement_impl; + }; + +} diff --git a/libs/ast/include/pslang/ast/statement_fwd.hpp b/libs/ast/include/pslang/ast/statement_fwd.hpp new file mode 100644 index 0000000..d2f4856 --- /dev/null +++ b/libs/ast/include/pslang/ast/statement_fwd.hpp @@ -0,0 +1,20 @@ +#pragma once + +#include +#include + +namespace pslang::ast +{ + + struct statement; + + using statement_ptr = std::unique_ptr; + + struct statement_list + { + std::vector statements; + }; + + using statement_list_ptr = std::unique_ptr; + +} diff --git a/libs/ast/include/pslang/ast/value_category.hpp b/libs/ast/include/pslang/ast/value_category.hpp new file mode 100644 index 0000000..466c46c --- /dev/null +++ b/libs/ast/include/pslang/ast/value_category.hpp @@ -0,0 +1,31 @@ +#pragma once + +namespace pslang::ast +{ + + enum class value_category + { + compile_time, + constant, + _mutable, + }; + + template + Ostream & operator << (Ostream & out, value_category category) + { + switch (category) + { + case value_category::compile_time: + out << "compile-time"; + break; + case value_category::constant: + out << "constant"; + break; + case value_category::_mutable: + out << "mutable"; + break; + } + return out; + } + +} diff --git a/libs/ast/source/print.cpp b/libs/ast/source/print.cpp new file mode 100644 index 0000000..392f878 --- /dev/null +++ b/libs/ast/source/print.cpp @@ -0,0 +1,246 @@ +#include +#include + +#include + +namespace pslang::ast +{ + + namespace + { + + print_options child(print_options options) + { + options.indent_level += 1; + return options; + } + + void put_indent(std::ostream & out, print_options const & options) + { + for (std::size_t i = 0; i < options.indent_level; ++i) + out << options.indent_string; + } + + void newline(std::ostream & out) + { + out << '\n'; + } + + } + + void print(std::ostream & out, bool_literal const & node, print_options const & options) + { + put_indent(out, options); + out << "bool literal { value = " << (node.value ? "true" : "false") << " }"; + newline(out); + } + + void print(std::ostream & out, i8_literal const & node, print_options const & options) + { + put_indent(out, options); + out << "i8 literal { value = " << (std::int32_t)node.value << " }"; + newline(out); + } + + void print(std::ostream & out, u8_literal const & node, print_options const & options) + { + put_indent(out, options); + out << "u8 literal { value = " << (std::uint32_t)node.value << " }"; + newline(out); + } + + void print(std::ostream & out, i16_literal const & node, print_options const & options) + { + put_indent(out, options); + out << "i16 literal { value = " << node.value << " }"; + newline(out); + } + + void print(std::ostream & out, u16_literal const & node, print_options const & options) + { + put_indent(out, options); + out << "u16 literal { value = " << node.value << " }"; + newline(out); + } + + void print(std::ostream & out, i32_literal const & node, print_options const & options) + { + put_indent(out, options); + out << "i32 literal { value = " << node.value << " }"; + newline(out); + } + + void print(std::ostream & out, u32_literal const & node, print_options const & options) + { + put_indent(out, options); + out << "u32 literal { value = " << node.value << " }"; + newline(out); + } + + void print(std::ostream & out, i64_literal const & node, print_options const & options) + { + put_indent(out, options); + out << "i64 literal { value = " << node.value << " }"; + newline(out); + } + + void print(std::ostream & out, u64_literal const & node, print_options const & options) + { + put_indent(out, options); + out << "u64 literal { value = " << node.value << " }"; + newline(out); + } + + void print(std::ostream & out, f32_literal const & node, print_options const & options) + { + put_indent(out, options); + out << "f32 literal { value = " << std::setprecision(7) << node.value << " }"; + newline(out); + } + + void print(std::ostream & out, f64_literal const & node, print_options const & options) + { + put_indent(out, options); + out << "f64 literal { value = " << std::setprecision(15) << node.value << " }"; + newline(out); + } + + void print(std::ostream & out, literal const & node, print_options const & options) + { + std::visit([&](auto const & value){ print(out, value, options); }, node); + } + + void print(std::ostream & out, identifier const & node, print_options const & options) + { + put_indent(out, options); + out << "identifier { name = \"" << node.name << "\" }"; + newline(out); + } + + void print(std::ostream & out, unary_operation const & node, print_options const & options) + { + put_indent(out, options); + out << node.type; + newline(out); + print(out, node.arg1, child(options)); + } + + void print(std::ostream & out, binary_operation const & node, print_options const & options) + { + put_indent(out, options); + out << node.type; + newline(out); + print(out, node.arg1, child(options)); + print(out, node.arg2, child(options)); + } + + void print(std::ostream & out, cast_operation const & node, print_options const & options) + { + put_indent(out, options); + out << "cast as "; + type::print(out, *node.type); + newline(out); + print(out, node.expression, child(options)); + } + + void print(std::ostream & out, expression_ptr const & node, print_options const & options) + { + std::visit([&](auto const & value){ print(out, value, options); }, *node); + } + + void print(std::ostream & out, assignment const & node, print_options const & options) + { + put_indent(out, options); + out << "assignment"; + newline(out); + print(out, node.lhs, child(options)); + print(out, node.rhs, child(options)); + } + + void print(std::ostream & out, variable_declaration const & node, print_options const & options) + { + put_indent(out, options); + out << "variable declaration { category = " << node.category << ", name = \"" << node.name << "\""; + if (node.type) + { + out << ", type = "; + type::print(out, *node.type); + } + out << " }"; + newline(out); + print(out, node.initializer, child(options)); + } + + void print(std::ostream & out, if_block const & node, print_options const & options) + { + put_indent(out, options); + out << "if"; + newline(out); + print(out, node.condition, child(options)); + print(out, node.statements, child(options)); + } + + void print(std::ostream & out, else_block const & node, print_options const & options) + { + put_indent(out, options); + out << "else"; + newline(out); + print(out, node.statements, child(options)); + } + + void print(std::ostream & out, else_if_block const & node, print_options const & options) + { + put_indent(out, options); + out << "else if"; + newline(out); + print(out, node.condition, child(options)); + print(out, node.statements, child(options)); + } + + void print(std::ostream & out, if_chain const & node, print_options const & options) + { + put_indent(out, options); + out << "if chain"; + newline(out); + for (auto const & block : node.blocks) + { + put_indent(out, child(options)); + out << "condition"; + newline(out); + if (block.condition) + print(out, block.condition, child(child(options))); + else + { + put_indent(out, child(child(options))); + out << "(none)"; + newline(out); + } + + put_indent(out, child(options)); + out << "body"; + newline(out); + print(out, block.statements, child(child(options))); + } + } + + void print(std::ostream & out, while_block const & node, print_options const & options) + { + put_indent(out, options); + out << "while"; + newline(out); + print(out, node.condition, child(options)); + print(out, node.statements, child(options)); + } + + void print(std::ostream & out, statement_ptr const & node, print_options const & options) + { + std::visit([&](auto const & value){ print(out, value, options); }, *node); + } + + void print(std::ostream & out, statement_list_ptr const & node, print_options const & options) + { + for (auto const & statement : node->statements) + print(out, statement, options); + } + +} diff --git a/libs/interpreter/CMakeLists.txt b/libs/interpreter/CMakeLists.txt new file mode 100644 index 0000000..f9c9c7f --- /dev/null +++ b/libs/interpreter/CMakeLists.txt @@ -0,0 +1,6 @@ +file(GLOB_RECURSE PSLANG_INTERPRETER_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp") +file(GLOB_RECURSE PSLANG_INTERPRETER_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/source/*.cpp") + +add_library(pslang-interpreter STATIC ${PSLANG_INTERPRETER_HEADERS} ${PSLANG_INTERPRETER_SOURCES}) +target_include_directories(pslang-interpreter PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") +target_link_libraries(pslang-interpreter PUBLIC pslang-parser) diff --git a/libs/interpreter/include/pslang/interpreter/context.hpp b/libs/interpreter/include/pslang/interpreter/context.hpp new file mode 100644 index 0000000..bdb6a20 --- /dev/null +++ b/libs/interpreter/include/pslang/interpreter/context.hpp @@ -0,0 +1,35 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include + +namespace pslang::interpreter +{ + + struct variable_data + { + ast::value_category category; + interpreter::value value; + }; + + struct scope + { + std::unordered_map variables; + }; + + struct context + { + bool trace = false; + std::vector scope_stack; + }; + + context empty_context(); + + void dump(std::ostream & out, context const & context); + +} diff --git a/libs/interpreter/include/pslang/interpreter/eval.hpp b/libs/interpreter/include/pslang/interpreter/eval.hpp new file mode 100644 index 0000000..be5b43a --- /dev/null +++ b/libs/interpreter/include/pslang/interpreter/eval.hpp @@ -0,0 +1,12 @@ +#pragma once + +#include +#include +#include + +namespace pslang::interpreter +{ + + value eval(context & context, ast::expression_ptr const & expression); + +} diff --git a/libs/interpreter/include/pslang/interpreter/interpreter.hpp b/libs/interpreter/include/pslang/interpreter/interpreter.hpp new file mode 100644 index 0000000..88e7e18 --- /dev/null +++ b/libs/interpreter/include/pslang/interpreter/interpreter.hpp @@ -0,0 +1,11 @@ +#pragma once + +#include +#include + +namespace pslang::interpreter +{ + + void execute(context & context, ast::statement_list_ptr const & statements); + +} diff --git a/libs/interpreter/include/pslang/interpreter/value.hpp b/libs/interpreter/include/pslang/interpreter/value.hpp new file mode 100644 index 0000000..3369672 --- /dev/null +++ b/libs/interpreter/include/pslang/interpreter/value.hpp @@ -0,0 +1,67 @@ +#pragma once + +#include + +#include +#include +#include + +namespace pslang::interpreter +{ + + template + struct primitive_value_base + { + using native_type = T; + T value; + }; + + using bool_value = primitive_value_base; + + using i8_value = primitive_value_base; + using u8_value = primitive_value_base; + using i16_value = primitive_value_base; + using u16_value = primitive_value_base; + using i32_value = primitive_value_base; + using u32_value = primitive_value_base; + using i64_value = primitive_value_base; + using u64_value = primitive_value_base; + + using f32_value = primitive_value_base; + using f64_value = primitive_value_base; + + using primitive_value_impl = std::variant< + bool_value, + i8_value, + u8_value, + i16_value, + u16_value, + i32_value, + u32_value, + i64_value, + u64_value, + f32_value, + f64_value + >; + + struct primitive_value + : primitive_value_impl + { + using primitive_value_impl::primitive_value_impl; + }; + + using value_impl = std::variant< + primitive_value + >; + + struct value + : value_impl + { + using value_impl::value_impl; + }; + + type::type type_of(value const & value); + + void print(std::ostream & out, value const & value); + +} diff --git a/libs/interpreter/source/context.cpp b/libs/interpreter/source/context.cpp new file mode 100644 index 0000000..30af7cc --- /dev/null +++ b/libs/interpreter/source/context.cpp @@ -0,0 +1,30 @@ +#include +#include + +namespace pslang::interpreter +{ + + context empty_context() + { + context result; + result.scope_stack.emplace_back(); + return result; + } + + void dump(std::ostream & out, context const & context) + { + for (auto const & scope : context.scope_stack) + { + for (auto const & variable : scope.variables) + { + out << variable.first << " = "; + print(out, variable.second.value); + out << " ("; + type::print(out, type_of(variable.second.value)); + out << ")\n"; + } + } + std::cout << std::flush; + } + +} diff --git a/libs/interpreter/source/eval.cpp b/libs/interpreter/source/eval.cpp new file mode 100644 index 0000000..21a945f --- /dev/null +++ b/libs/interpreter/source/eval.cpp @@ -0,0 +1,332 @@ +#include +#include +#include +#include +#include + +#include + +namespace pslang::interpreter +{ + + namespace + { + + void print(std::ostream & out, ast::unary_operation_type type) + { + switch (type) + { + case ast::unary_operation_type::negation: + out << "-"; + return; + case ast::unary_operation_type::logical_not: + out << "!"; + return; + } + + out << "(unknown)"; + } + + void print(std::ostream & out, ast::binary_operation_type type) + { + switch (type) + { + case ast::binary_operation_type::addition: + out << "+"; + return; + case ast::binary_operation_type::subtraction: + out << "-"; + return; + case ast::binary_operation_type::multiplication: + out << "*"; + return; + case ast::binary_operation_type::division: + out << "/"; + return; + case ast::binary_operation_type::remainder: + out << "%"; + return; + case ast::binary_operation_type::logical_and: + out << "&"; + return; + case ast::binary_operation_type::logical_or: + out << "|"; + return; + case ast::binary_operation_type::logical_xor: + out << "^"; + return; + case ast::binary_operation_type::equals: + out << "=="; + return; + case ast::binary_operation_type::not_equals: + out << "!="; + return; + case ast::binary_operation_type::less: + out << "<"; + return; + case ast::binary_operation_type::greater: + out << ">"; + return; + case ast::binary_operation_type::less_equals: + out << "<="; + return; + case ast::binary_operation_type::greater_equals: + out << ">="; + return; + } + + out << "(unknown)"; + } + + value eval_impl(context & context, ast::expression_ptr const & expression); + + template + value eval_impl(context & context, ast::numeric_literal_base const & literal) + { + return primitive_value(primitive_value_base{literal.value});; + } + + value eval_impl(context & context, ast::literal const & literal) + { + return std::visit([&](auto const & expression){ return eval_impl(context, expression); }, literal); + } + + value eval_impl(context & context, ast::identifier const & identifier) + { + for (auto it = context.scope_stack.rbegin(); it != context.scope_stack.rend(); ++it) + { + if (auto jt = it->variables.find(identifier.name); jt != it->variables.end()) + return jt->second.value; + } + + throw std::runtime_error("Identifier \"" + identifier.name + "\" is not defined"); + } + + template + value unary_operation_impl(ast::unary_operation_type type, primitive_value_base const & arg1) + { + switch (type) + { + case ast::unary_operation_type::negation: + if constexpr ((std::is_integral_v || std::is_floating_point_v) && !std::is_same_v) + { + return primitive_value(primitive_value_base{static_cast(-arg1.value)}); + } + break; + case ast::unary_operation_type::logical_not: + if constexpr (std::is_same_v) + { + return primitive_value(primitive_value_base{static_cast(!arg1.value)}); + } + else if constexpr (std::is_integral_v) + { + return primitive_value(primitive_value_base{static_cast(~arg1.value)}); + } + break; + } + + std::ostringstream os; + os << "Cannot apply unary operator \""; + print(os, type); + os << "\" to a value of type "; + type::print(os, type_of(primitive_value(arg1))); + throw std::runtime_error(os.str()); + } + + value unary_operation_impl(ast::unary_operation_type type, primitive_value const & arg1) + { + return std::visit([&](auto const & value){ return unary_operation_impl(type, value); }, arg1); + } + + value eval_impl(context & context, ast::unary_operation const & unary_operation) + { + auto arg1 = eval_impl(context, unary_operation.arg1); + return std::visit([&](auto const & value){ return unary_operation_impl(unary_operation.type, value); }, arg1); + } + + bool requires_same_argument_type(ast::binary_operation_type) + { + // TODO: shift operators should return false + return true; + } + + template + value binary_operation_impl_same_type(ast::binary_operation_type type, primitive_value_base const & arg1, value const & arg2_generic) + { + primitive_value_base const & arg2 = std::get>(std::get(arg2_generic)); + + switch (type) + { + case ast::binary_operation_type::addition: + if constexpr (!std::is_same_v) + { + return primitive_value(primitive_value_base{static_cast(arg1.value + arg2.value)}); + } + break; + case ast::binary_operation_type::subtraction: + if constexpr (!std::is_same_v) + { + return primitive_value(primitive_value_base{static_cast(arg1.value - arg2.value)}); + } + break; + case ast::binary_operation_type::multiplication: + if constexpr (!std::is_same_v) + { + return primitive_value(primitive_value_base{static_cast(arg1.value * arg2.value)}); + } + break; + case ast::binary_operation_type::division: + if constexpr (!std::is_same_v) + { + return primitive_value(primitive_value_base{static_cast(arg1.value / arg2.value)}); + } + break; + case ast::binary_operation_type::remainder: + if constexpr (!std::is_same_v && std::is_integral_v) + { + return primitive_value(primitive_value_base{static_cast(arg1.value % arg2.value)}); + } + break; + case ast::binary_operation_type::logical_and: + if constexpr (std::is_same_v) + { + return primitive_value(primitive_value_base{static_cast(arg1.value && arg2.value)}); + } + else if constexpr (std::is_integral_v) + { + return primitive_value(primitive_value_base{static_cast(arg1.value & arg2.value)}); + } + break; + case ast::binary_operation_type::logical_or: + if constexpr (std::is_same_v) + { + return primitive_value(primitive_value_base{static_cast(arg1.value || arg2.value)}); + } + else if constexpr (std::is_integral_v) + { + return primitive_value(primitive_value_base{static_cast(arg1.value | arg2.value)}); + } + break; + case ast::binary_operation_type::logical_xor: + if constexpr (std::is_same_v) + { + return primitive_value(primitive_value_base{static_cast(arg1.value ^ arg2.value)}); + } + else if constexpr (std::is_integral_v) + { + return primitive_value(primitive_value_base{static_cast(arg1.value ^ arg2.value)}); + } + break; + case ast::binary_operation_type::equals: + return primitive_value(primitive_value_base{arg1.value == arg2.value}); + case ast::binary_operation_type::not_equals: + return primitive_value(primitive_value_base{arg1.value != arg2.value}); + case ast::binary_operation_type::less: + return primitive_value(primitive_value_base{arg1.value < arg2.value}); + case ast::binary_operation_type::greater: + return primitive_value(primitive_value_base{arg1.value > arg2.value}); + case ast::binary_operation_type::less_equals: + return primitive_value(primitive_value_base{arg1.value <= arg2.value}); + case ast::binary_operation_type::greater_equals: + return primitive_value(primitive_value_base{arg1.value >= arg2.value}); + } + + std::ostringstream os; + os << "Cannot apply binary operator \""; + print(os, type); + os << "\" to values of type "; + type::print(os, type_of(primitive_value(arg1))); + os << " and "; + type::print(os, type_of(primitive_value(arg2))); + throw std::runtime_error(os.str()); + } + + value binary_operation_impl_same_type(ast::binary_operation_type type, primitive_value const & arg1, value const & arg2) + { + return std::visit([&](auto const & value){ return binary_operation_impl_same_type(type, value, arg2); }, arg1); + } + + value eval_impl(context & context, ast::binary_operation const & binary_operation) + { + auto arg1 = eval_impl(context, binary_operation.arg1); + auto arg2 = eval_impl(context, binary_operation.arg2); + + if (requires_same_argument_type(binary_operation.type)) + { + auto type1 = type_of(arg1); + auto type2 = type_of(arg2); + + if (!type::equal(type1, type2)) + { + std::ostringstream os; + os << "Cannot apply binary operator \""; + print(os, binary_operation.type); + os << "\" to values of type "; + type::print(os, type1); + os << " and "; + type::print(os, type2); + throw std::runtime_error(os.str()); + } + + return std::visit([&](auto const & value){ return binary_operation_impl_same_type(binary_operation.type, value, arg2); }, arg1); + } + + throw std::runtime_error("eval(binary_operation) for different argument types not implemented"); + } + + template + value cast_impl(primitive_value_base const & value, type::primitive_type_base const & type) + { + if constexpr (std::is_same_v) + { + return primitive_value(value); + } + else if constexpr (!std::is_same_v && !std::is_same_v) + { + return primitive_value(primitive_value_base{static_cast(value.value)}); + } + + std::ostringstream os; + os << "Cannot cast value of type "; + type::print(os, type_of(primitive_value(value))); + os << " to type "; + type::print(os, type::primitive_type(type)); + throw std::runtime_error(os.str()); + } + + template + value cast_impl(primitive_value_base const & value, type::primitive_type const & type) + { + return std::visit([&](auto const & type){ return cast_impl(value, type); }, type); + } + + template + value cast_impl(primitive_value_base const & value, type::type const & type) + { + return std::visit([&](auto const & type){ return cast_impl(value, type); }, type); + } + + value cast_impl(primitive_value const & value, type::type const & type) + { + return std::visit([&](auto const & value){ return cast_impl(value, type); }, value); + } + + value eval_impl(context & context, ast::cast_operation const & cast_operation) + { + auto arg = eval(context, cast_operation.expression); + return std::visit([&](auto const & value){ return cast_impl(value, *cast_operation.type); }, arg); + } + + value eval_impl(context & context, ast::expression_ptr const & expression) + { + return std::visit([&](auto const & expression){ return eval_impl(context, expression); }, *expression); + } + + } + + value eval(context & context, ast::expression_ptr const & expression) + { + return eval_impl(context, expression); + } + +} diff --git a/libs/interpreter/source/interpreter.cpp b/libs/interpreter/source/interpreter.cpp new file mode 100644 index 0000000..6579a96 --- /dev/null +++ b/libs/interpreter/source/interpreter.cpp @@ -0,0 +1,158 @@ +#include +#include +#include +#include + +#include +#include + +namespace pslang::interpreter +{ + + namespace + { + + void execute_impl(context & context, ast::expression_ptr const & expression) + { + eval(context, expression); + } + + void execute_impl(context & context, ast::assignment const & assignment) + { + std::string name; + if (auto identifier = std::get_if(assignment.lhs.get())) + name = identifier->name; + else + throw std::runtime_error("Cannot assign a value to a non-identifier"); + + for (auto it = context.scope_stack.rbegin(); it != context.scope_stack.rend(); ++it) + { + if (auto jt = it->variables.find(name); jt != it->variables.end()) + { + if (jt->second.category != ast::value_category::_mutable) + throw std::runtime_error("Cannot assign a value to a non-mutable variable"); + + auto new_value = eval(context, assignment.rhs); + auto new_type = type_of(new_value); + auto existing_type = type_of(jt->second.value); + if (!type::equal(existing_type, new_type)) + { + std::ostringstream os; + os << "Cannot assign a value of type "; + type::print(os, new_type); + os << " to a variable of type "; + type::print(os, existing_type); + throw std::runtime_error(os.str()); + } + jt->second.value = std::move(new_value);; + return; + } + } + + throw std::runtime_error("Identifier \"" + name + "\" is not defined"); + } + + void execute_impl(context & context, ast::variable_declaration const & variable_declaration) + { + auto & scope = context.scope_stack.back(); + if (scope.variables.count(variable_declaration.name) > 0) + throw std::runtime_error("Error: variable \"" + variable_declaration.name + "\" is already declared"); + + auto value = eval(context, variable_declaration.initializer); + if (variable_declaration.type) + { + auto actual_type = type_of(value); + if (!type::equal(*variable_declaration.type, actual_type)) + { + std::ostringstream os; + os << "Cannot initialize a variable of type "; + type::print(os, *variable_declaration.type); + os << " with an expression of type "; + type::print(os, actual_type); + throw std::runtime_error(os.str()); + } + } + scope.variables[variable_declaration.name] = {.category = variable_declaration.category, .value = value}; + } + + void execute_impl(context & context, ast::if_block const &) + { + throw std::runtime_error("Internal interpreter error: if blocks cannot be present in the final AST"); + } + + void execute_impl(context & context, ast::else_block const &) + { + throw std::runtime_error("Internal interpreter error: else blocks cannot be present in the final AST"); + } + + void execute_impl(context & context, ast::else_if_block const &) + { + throw std::runtime_error("Internal interpreter error: else if blocks cannot be present in the final AST"); + } + + void execute_impl(context & context, ast::if_chain const & if_chain) + { + for (auto const & block : if_chain.blocks) + { + if (block.condition) + { + auto value = eval(context, block.condition); + auto actual_type = type_of(value); + if (!type::equal(actual_type, type::primitive_type{type::bool_type{}})) + { + std::ostringstream os; + os << "Expected type bool, got type "; + type::print(os, actual_type); + os << " in if block condition"; + throw std::runtime_error(os.str()); + } + + if (std::get(std::get(value)).value) + { + execute(context, block.statements); + break; + } + } + } + } + + void execute_impl(context & context, ast::while_block const & while_block) + { + while (true) + { + auto value = eval(context, while_block.condition); + auto actual_type = type_of(value); + if (!type::equal(actual_type, type::primitive_type{type::bool_type{}})) + { + std::ostringstream os; + os << "Expected type bool, got type "; + type::print(os, actual_type); + os << " in while block condition"; + throw std::runtime_error(os.str()); + } + + if (std::get(std::get(value)).value) + { + execute(context, while_block.statements); + } + else + break; + } + } + + void execute_impl(context & context, ast::statement_list_ptr const & statements) + { + for (auto const & statement : statements->statements) + { + std::visit([&](auto const & statement){ execute_impl(context, statement); }, *statement); + } + } + + } + + void execute(context & context, ast::statement_list_ptr const & statements) + { + execute_impl(context, statements); + } + +} diff --git a/libs/interpreter/source/value.cpp b/libs/interpreter/source/value.cpp new file mode 100644 index 0000000..c89570e --- /dev/null +++ b/libs/interpreter/source/value.cpp @@ -0,0 +1,78 @@ +#include + +#include + +namespace pslang::interpreter +{ + + namespace + { + + template + type::type type_of_impl(primitive_value_base const &) + { + return type::primitive_type(type::primitive_type_base{}); + } + + type::type type_of_impl(primitive_value const & value) + { + return std::visit([](auto const & value){ return type_of_impl(value); }, value); + } + + type::type type_of_impl(value const & value) + { + return std::visit([](auto const & value){ return type_of_impl(value); }, value); + } + + template + void print_impl(std::ostream & out, primitive_value_base const & value) + { + if constexpr (std::is_same_v) + { + out << (value.value ? "true" : "false"); + } + else if constexpr (std::is_integral_v && std::is_signed_v) + { + out << (std::int64_t)value.value; + } + else if constexpr (std::is_integral_v && std::is_unsigned_v) + { + out << (std::uint64_t)value.value; + } + else if constexpr (std::is_same_v) + { + out << std::setprecision(7) << value.value; + } + else if constexpr (std::is_same_v) + { + out << std::setprecision(15) << value.value; + } + else + { + out << "(unknown)"; + } + } + + void print_impl(std::ostream & out, primitive_value const & value) + { + std::visit([&](auto const & value){ return print_impl(out, value); }, value); + } + + void print_impl(std::ostream & out, value const & value) + { + std::visit([&](auto const & value){ return print_impl(out, value); }, value); + } + + } + + type::type type_of(value const & value) + { + return type_of_impl(value); + } + + void print(std::ostream & out, value const & value) + { + print_impl(out, value); + } + +} diff --git a/libs/parser/CMakeLists.txt b/libs/parser/CMakeLists.txt new file mode 100644 index 0000000..c331a2c --- /dev/null +++ b/libs/parser/CMakeLists.txt @@ -0,0 +1,47 @@ +find_package(FLEX REQUIRED) +find_package(BISON REQUIRED) + +set(PSLANG_LEXER_RULES_FILE "${CMAKE_CURRENT_SOURCE_DIR}/rules/pslang.l") +set(PSLANG_PARSER_RULES_FILE "${CMAKE_CURRENT_SOURCE_DIR}/rules/pslang.y") + +set(PSLANG_LEXER_HEADER_FILE "${CMAKE_CURRENT_BINARY_DIR}/generated/gen_lexer.hpp") +set(PSLANG_LEXER_SOURCE_FILE "${CMAKE_CURRENT_BINARY_DIR}/generated/gen_lexer.cpp") + +set(PSLANG_PARSER_HEADER_FILE "${CMAKE_CURRENT_BINARY_DIR}/generated/gen_parser.hpp") +set(PSLANG_PARSER_SOURCE_FILE "${CMAKE_CURRENT_BINARY_DIR}/generated/gen_parser.cpp") + +flex_target( + generate-pslang-lexer + ${PSLANG_LEXER_RULES_FILE} + ${PSLANG_LEXER_SOURCE_FILE} + DEFINES_FILE ${PSLANG_LEXER_HEADER_FILE} +) + +bison_target( + generate-pslang-parser + ${PSLANG_PARSER_RULES_FILE} + ${PSLANG_PARSER_SOURCE_FILE} + DEFINES_FILE ${PSLANG_PARSER_HEADER_FILE} + COMPILE_FLAGS -Wcounterexamples +) + +add_flex_bison_dependency(generate-pslang-lexer generate-pslang-parser) + +set(PSLANG_PARSER_RULE_FILES + ${PSLANG_LEXER_RULES_FILE} + ${PSLANG_PARSER_RULES_FILE} +) + +set(PSLANG_PARSER_GENERATED_FILES + ${PSLANG_LEXER_HEADER_FILE} + ${PSLANG_LEXER_SOURCE_FILE} + ${PSLANG_PARSER_HEADER_FILE} + ${PSLANG_PARSER_SOURCE_FILE} +) + +file(GLOB_RECURSE PSLANG_PARSER_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp") +file(GLOB_RECURSE PSLANG_PARSER_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/source/*.cpp") + +add_library(pslang-parser STATIC ${PSLANG_PARSER_HEADERS} ${PSLANG_PARSER_SOURCES} ${PSLANG_PARSER_RULE_FILES} ${PSLANG_PARSER_GENERATED_FILES}) +target_include_directories(pslang-parser PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include" "${CMAKE_CURRENT_BINARY_DIR}/generated") +target_link_libraries(pslang-parser pslang-ast) diff --git a/libs/parser/include/pslang/parser/context.hpp b/libs/parser/include/pslang/parser/context.hpp new file mode 100644 index 0000000..d195795 --- /dev/null +++ b/libs/parser/include/pslang/parser/context.hpp @@ -0,0 +1,21 @@ +#pragma once + +#include + +namespace pslang::parser +{ + + namespace bison + { + + class location; + + } + + struct context + { + bison::location & location; + indented_statement_list & result; + }; + +} diff --git a/libs/parser/include/pslang/parser/indented_statement.hpp b/libs/parser/include/pslang/parser/indented_statement.hpp new file mode 100644 index 0000000..da8524b --- /dev/null +++ b/libs/parser/include/pslang/parser/indented_statement.hpp @@ -0,0 +1,23 @@ +#pragma once + +#include + +#include + +namespace pslang::parser +{ + + struct indented_statement + { + std::size_t indentation; + ast::statement_ptr statement; + }; + + struct indented_statement_list + { + std::vector statements; + }; + + ast::statement_list_ptr finilize(indented_statement_list statements); + +} diff --git a/libs/parser/include/pslang/parser/parser.hpp b/libs/parser/include/pslang/parser/parser.hpp new file mode 100644 index 0000000..9cb1206 --- /dev/null +++ b/libs/parser/include/pslang/parser/parser.hpp @@ -0,0 +1,12 @@ +#pragma once + +#include + +#include + +namespace pslang::parser +{ + + ast::statement_list_ptr parse(std::filesystem::path const & file); + +} diff --git a/libs/parser/rules/pslang.l b/libs/parser/rules/pslang.l new file mode 100644 index 0000000..22a2541 --- /dev/null +++ b/libs/parser/rules/pslang.l @@ -0,0 +1,83 @@ +%option noyywrap nounput noinput + +%{ + +#include "gen_parser.hpp" +#include + +using bp = ::pslang::parser::bison::parser; + +#define YY_DECL bp::symbol_type yylex(::pslang::parser::context& ctx) +#define YY_USER_ACTION ctx.location.columns(yyleng); + +%} + +%% + +%{ + ctx.location.step(); +%} + +[ ]+ { ctx.location.step(); } + +const { return bp::make_const(ctx.location); } +let { return bp::make_let(ctx.location); } +mut { return bp::make_mut(ctx.location); } +if { return bp::make_if(ctx.location); } +else { return bp::make_else(ctx.location); } +while { return bp::make_while(ctx.location); } +as { return bp::make_as(ctx.location); } +true { return bp::make_true(ctx.location); } +false { return bp::make_false(ctx.location); } + +bool { return bp::make_bool(ctx.location); } +i8 { return bp::make_i8(ctx.location); } +u8 { return bp::make_u8(ctx.location); } +i16 { return bp::make_i16(ctx.location); } +u16 { return bp::make_u16(ctx.location); } +i32 { return bp::make_i32(ctx.location); } +u32 { return bp::make_u32(ctx.location); } +i64 { return bp::make_i64(ctx.location); } +u64 { return bp::make_u64(ctx.location); } +f32 { return bp::make_f32(ctx.location); } +f64 { return bp::make_f64(ctx.location); } + +[a-z]+ { return bp::make_name(yytext, ctx.location); } + +"\n" { ctx.location.lines(1); return bp::make_newline(ctx.location); } +"\t" { return bp::make_indent(ctx.location); } +"=" { return bp::make_assignment(ctx.location); } +":" { return bp::make_colon(ctx.location); } +"(" { return bp::make_lparen(ctx.location); } +")" { return bp::make_rparen(ctx.location); } +"+" { return bp::make_plus(ctx.location); } +"-" { return bp::make_minus(ctx.location); } +"*" { return bp::make_asterisk(ctx.location); } +"/" { return bp::make_slash(ctx.location); } +"%" { return bp::make_percent(ctx.location); } +"&" { return bp::make_ampersand(ctx.location); } +"|" { return bp::make_vertical_bar(ctx.location); } +"^" { return bp::make_circumflex(ctx.location); } +"!" { return bp::make_exclamation(ctx.location); } +"==" { return bp::make_equals(ctx.location); } +"!=" { return bp::make_not_equals(ctx.location); } +"<" { return bp::make_less(ctx.location); } +">" { return bp::make_greater(ctx.location); } +"<=" { return bp::make_less_equals(ctx.location); } +">=" { return bp::make_greater_equals(ctx.location); } + +[0-9]+b { return bp::make_lit_i8(yytext, ctx.location); } +[0-9]+ub { return bp::make_lit_u8(yytext, ctx.location); } +[0-9]+s { return bp::make_lit_i16(yytext, ctx.location); } +[0-9]+us { return bp::make_lit_u16(yytext, ctx.location); } +[0-9]+ { return bp::make_lit_i32(yytext, ctx.location); } +[0-9]+u { return bp::make_lit_u32(yytext, ctx.location); } +[0-9]+l { return bp::make_lit_i64(yytext, ctx.location); } +[0-9]+ul { return bp::make_lit_u64(yytext, ctx.location); } + +[0-9]+\.[0-9]+ { return bp::make_lit_f32(yytext, ctx.location); } +[0-9]+\.[0-9]+l { return bp::make_lit_f64(yytext, ctx.location); } + +<> { return bp::make_end(ctx.location); } + +. { throw std::runtime_error(std::string("Unexpected character: ") + yytext); } diff --git a/libs/parser/rules/pslang.y b/libs/parser/rules/pslang.y new file mode 100644 index 0000000..daeb50f --- /dev/null +++ b/libs/parser/rules/pslang.y @@ -0,0 +1,285 @@ +%skeleton "lalr1.cc" +%require "3.8.1" +%header + +%language "C++" +%define api.namespace {pslang::parser::bison} +%define api.location.file none + +%define api.token.raw +%define api.token.constructor +%define api.value.type variant +%define api.value.automove + +%define parse.assert +%define parse.trace +%define parse.error detailed +%define parse.lac full + +%locations + +%{ + +#include + +void yyerror(char const * s) +{ + printf("error: %s\n", s); +} + +%} + +%code requires { + +#include +#include + +namespace pslang::parser { + +struct context; + +} + +} + +%code { + +#include + +#include +#include +#include + +#define YY_DECL ::pslang::parser::bison::parser::symbol_type yylex(::pslang::parser::context& ctx) +YY_DECL; + +template +::pslang::ast::literal parse_numeric_literal(std::string const & str) +{ + T value; + auto result = std::from_chars(str.data(), str.data() + str.size(), value); + if (result.ec != std::errc()) + throw std::system_error(std::make_error_code(result.ec)); + return ::pslang::ast::numeric_literal_base{value}; +} + +} + +%param { ::pslang::parser::context& ctx } + +%define api.token.prefix {tok_} + +%token newline "newline" +%token indent "indentation" +%token assignment "=" +%token colon ":" +%token lparen "(" +%token rparen ")" +%token plus "+" +%token minus "-" +%token asterisk "*" +%token slash "/" +%token percent "%" +%token ampersand "&" +%token vertical_bar "|" +%token circumflex "^" +%token exclamation "!" +%token equals "==" +%token not_equals "!=" +%token less "<" +%token greater ">" +%token less_equals "<=" +%token greater_equals ">=" + +%token lit_i8 +%token lit_u8 +%token lit_i16 +%token lit_u16 +%token lit_i32 +%token lit_u32 +%token lit_i64 +%token lit_u64 + +%token lit_f8 +%token lit_f16 +%token lit_f32 +%token lit_f64 + +%token name + +%token const +%token let +%token mut +%token if +%token else +%token while +%token as +%token true +%token false + +%token bool +%token i8 +%token u8 +%token i16 +%token u16 +%token i32 +%token u32 +%token i64 +%token u64 +%token f32 +%token f64 + +%token end 0 + +%type indented_statement_list +%type indentation +%type statement +%type variable_declaration +%type variable_keyword +%type type_expression +%type primitive_type +%type expression +%type bool_expression +%type compare_expression +%type as_expression +%type negate_expression +%type sum_expression +%type mult_expression +%type not_expression +%type base_expression +%type literal + +%% + +module +: indented_statement_list end { ctx.result = $1; } +; + +indented_statement_list +: indented_statement_list indentation statement newline { auto tmp = $1; tmp.statements.push_back({$2, std::make_unique($3)}); $$ = std::move(tmp); } +| indented_statement_list indentation newline { $$ = $1; } +| %empty { $$ = {}; } +; + +indentation +: indent indentation { $$ = $2 + 1ull; } +| %empty { $$ = 0ull; } +; + +statement +: expression { $$ = std::make_unique($1); } +| expression assignment expression { $$ = ast::assignment{ std::make_unique($1), std::make_unique($3) }; } +| variable_declaration { $$ = $1; } +| if expression colon { $$ = ast::if_block{std::make_unique($2), {}}; } +| else colon { $$ = ast::else_block{{}}; } +| else if expression colon { $$ = ast::else_if_block{std::make_unique($3), {}}; } +| while expression colon { $$ = ast::while_block{std::make_unique($2), {}}; } +; + +variable_declaration +: variable_keyword name assignment expression { $$ = ast::variable_declaration{$1, $2, nullptr, std::make_unique($4)}; } +| variable_keyword name colon type_expression assignment expression { $$ = ast::variable_declaration{$1, $2, std::make_unique($4), std::make_unique($6)}; } +; + +variable_keyword +: const { $$ = ast::value_category::compile_time; } +| let { $$ = ast::value_category::constant; } +| mut { $$ = ast::value_category::_mutable; } +; + +type_expression +: primitive_type { $$ = type::type($1); } +; + +primitive_type +: bool { $$ = type::bool_type{}; } +| i8 { $$ = type::i8_type{}; } +| u8 { $$ = type::u8_type{}; } +| i16 { $$ = type::i16_type{}; } +| u16 { $$ = type::u16_type{}; } +| i32 { $$ = type::i32_type{}; } +| u32 { $$ = type::u32_type{}; } +| i64 { $$ = type::i64_type{}; } +| u64 { $$ = type::u64_type{}; } +| f32 { $$ = type::f32_type{}; } +| f64 { $$ = type::f64_type{}; } +; + +expression +: bool_expression { $$ = $1; } +; + +bool_expression +: compare_expression { $$ = $1; } +| bool_expression ampersand compare_expression { $$ = ast::binary_operation{ast::binary_operation_type::logical_and, std::make_unique($1), std::make_unique($3) }; } +| bool_expression vertical_bar compare_expression { $$ = ast::binary_operation{ast::binary_operation_type::logical_or, std::make_unique($1), std::make_unique($3) }; } +| bool_expression circumflex compare_expression { $$ = ast::binary_operation{ast::binary_operation_type::logical_xor, std::make_unique($1), std::make_unique($3) }; } +; + +compare_expression +: as_expression { $$ = $1; } +| compare_expression equals as_expression { $$ = ast::binary_operation{ast::binary_operation_type::equals, std::make_unique($1), std::make_unique($3) }; } +| compare_expression not_equals as_expression { $$ = ast::binary_operation{ast::binary_operation_type::not_equals, std::make_unique($1), std::make_unique($3) }; } +| compare_expression less as_expression { $$ = ast::binary_operation{ast::binary_operation_type::less, std::make_unique($1), std::make_unique($3) }; } +| compare_expression greater as_expression { $$ = ast::binary_operation{ast::binary_operation_type::greater, std::make_unique($1), std::make_unique($3) }; } +| compare_expression less_equals as_expression { $$ = ast::binary_operation{ast::binary_operation_type::less_equals, std::make_unique($1), std::make_unique($3) }; } +| compare_expression greater_equals as_expression { $$ = ast::binary_operation{ast::binary_operation_type::greater_equals, std::make_unique($1), std::make_unique($3) }; } +; + +as_expression +: negate_expression { $$ = $1; } +| negate_expression as type_expression { $$ = ast::cast_operation{ std::make_unique($1), std::make_unique($3) }; } +; + +negate_expression +: sum_expression { $$ = $1; } +| minus sum_expression { $$ = ast::unary_operation{ast::unary_operation_type::negation, std::make_unique($2) }; } +; + +sum_expression +: mult_expression { $$ = $1; } +| sum_expression plus mult_expression { $$ = ast::binary_operation{ast::binary_operation_type::addition, std::make_unique($1), std::make_unique($3) }; } +| sum_expression minus mult_expression { $$ = ast::binary_operation{ast::binary_operation_type::subtraction, std::make_unique($1), std::make_unique($3) }; } +; + +mult_expression +: not_expression { $$ = $1; } +| mult_expression asterisk not_expression { $$ = ast::binary_operation{ast::binary_operation_type::multiplication, std::make_unique($1), std::make_unique($3) }; } +| mult_expression slash not_expression { $$ = ast::binary_operation{ast::binary_operation_type::division, std::make_unique($1), std::make_unique($3) }; } +| mult_expression percent not_expression { $$ = ast::binary_operation{ast::binary_operation_type::remainder, std::make_unique($1), std::make_unique($3) }; } +; + +not_expression +: base_expression +| exclamation base_expression { $$ = ast::unary_operation{ast::unary_operation_type::logical_not, std::make_unique($2) }; } +; + +base_expression +: literal +| name { $$ = ast::identifier{$1}; } +| lparen expression rparen { $$ = $2; } +; + +literal +: true { $$ = ast::literal(ast::bool_literal{true}); } +| false { $$ = ast::literal(ast::bool_literal{false}); } +| lit_i8 { $$ = parse_numeric_literal($1); } +| lit_u8 { $$ = parse_numeric_literal($1); } +| lit_i16 { $$ = parse_numeric_literal($1); } +| lit_u16 { $$ = parse_numeric_literal($1); } +| lit_i32 { $$ = parse_numeric_literal($1); } +| lit_u32 { $$ = parse_numeric_literal($1); } +| lit_i64 { $$ = parse_numeric_literal($1); } +| lit_u64 { $$ = parse_numeric_literal($1); } +| lit_f32 { $$ = parse_numeric_literal($1); } +| lit_f64 { $$ = parse_numeric_literal($1); } +; + +%% + +void pslang::parser::bison::parser::error(location_type const& location, std::string const& message) +{ + std::ostringstream os; + os << "Error parsing at " << location << ": " << message << "\n"; + throw std::runtime_error(os.str()); +} diff --git a/libs/parser/source/finilize.cpp b/libs/parser/source/finilize.cpp new file mode 100644 index 0000000..895f61f --- /dev/null +++ b/libs/parser/source/finilize.cpp @@ -0,0 +1,131 @@ +#include +#include + +#include +#include + +namespace pslang::parser +{ + + namespace + { + + ast::statement_list * get_statement_list(ast::expression_ptr &) + { + return nullptr; + } + + ast::statement_list * get_statement_list(ast::assignment &) + { + return nullptr; + } + + ast::statement_list * get_statement_list(ast::variable_declaration &) + { + return nullptr; + } + + ast::statement_list * get_statement_list(ast::if_block & node) + { + node.statements = std::make_unique(); + return node.statements.get(); + } + + ast::statement_list * get_statement_list(ast::else_block & node) + { + node.statements = std::make_unique(); + return node.statements.get(); + } + + ast::statement_list * get_statement_list(ast::else_if_block & node) + { + node.statements = std::make_unique(); + return node.statements.get(); + } + + // NB: if chain merging happens after retrieving statement list + ast::statement_list * get_statement_list(ast::if_chain & node) + { + return nullptr; + } + + ast::statement_list * get_statement_list(ast::while_block & node) + { + node.statements = std::make_unique(); + return node.statements.get(); + } + + ast::statement_list * get_statement_list(ast::statement & statement) + { + return std::visit([](auto & value){ return get_statement_list(value); }, statement); + } + + } + + ast::statement_list_ptr finilize(indented_statement_list statements) + { + ast::statement_list_ptr result = std::make_unique(); + + std::vector stack; + stack.push_back(result.get()); + std::size_t current_indent = 0; + + for (auto & statement : statements.statements) + { + if (statement.indentation > current_indent) + { + throw std::runtime_error("Unexpected indent"); + } + + while (statement.indentation < current_indent) + { + stack.pop_back(); + --current_indent; + } + + // Now statement.indentation == current_indent + + auto list = get_statement_list(*statement.statement); + + if (auto if_block = std::get_if(statement.statement.get())) + { + ast::if_chain chain; + chain.blocks.push_back({.condition = std::move(if_block->condition), .statements = std::move(if_block->statements)}); + stack.back()->statements.push_back(std::make_unique(std::move(chain))); + } + else if (auto else_block = std::get_if(statement.statement.get())) + { + if (stack.back()->statements.empty()) + throw std::runtime_error("Unexpected else block"); + auto chain = std::get_if(stack.back()->statements.back().get()); + if (!chain || chain->blocks.empty() || !chain->blocks.back().condition) + throw std::runtime_error("Unexpected else block"); + + chain->blocks.push_back({.condition = nullptr, .statements = std::move(else_block->statements)}); + } + else if (auto else_if_block = std::get_if(statement.statement.get())) + { + if (stack.back()->statements.empty()) + throw std::runtime_error("Unexpected else if block"); + auto chain = std::get_if(stack.back()->statements.back().get()); + if (!chain || chain->blocks.empty() || !chain->blocks.back().condition) + throw std::runtime_error("Unexpected else if block"); + + chain->blocks.push_back({.condition = std::move(else_if_block->condition), .statements = std::move(else_if_block->statements)}); + } + else + { + stack.back()->statements.push_back(std::move(statement.statement)); + } + + if (list) + { + stack.push_back(list); + ++current_indent; + } + } + + return result; + } + +} diff --git a/libs/parser/source/parser.cpp b/libs/parser/source/parser.cpp new file mode 100644 index 0000000..8e49f3c --- /dev/null +++ b/libs/parser/source/parser.cpp @@ -0,0 +1,31 @@ +#include +#include +#include +#include "gen_parser.hpp" +#include "gen_lexer.hpp" + +namespace pslang::parser +{ + + ast::statement_list_ptr parse(std::filesystem::path const & file) + { + auto filename = file.string(); + + yyin = fopen(filename.c_str(), "r"); + if (!yyin) + throw std::system_error(std::make_error_code(static_cast(errno))); + + bison::location location(&filename); + indented_statement_list result; + context ctx{location, result}; + + bison::parser parser(ctx); + + parser.parse(); + + fclose(yyin); + + return finilize(std::move(result)); + } + +} diff --git a/libs/type/CMakeLists.txt b/libs/type/CMakeLists.txt new file mode 100644 index 0000000..894b612 --- /dev/null +++ b/libs/type/CMakeLists.txt @@ -0,0 +1,5 @@ +file(GLOB_RECURSE PSLANG_TYPE_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp") +file(GLOB_RECURSE PSLANG_TYPE_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/source/*.cpp") + +add_library(pslang-type STATIC ${PSLANG_TYPE_HEADERS} ${PSLANG_TYPE_SOURCES}) +target_include_directories(pslang-type PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") diff --git a/libs/type/include/pslang/type/primitive.hpp b/libs/type/include/pslang/type/primitive.hpp new file mode 100644 index 0000000..6d51c09 --- /dev/null +++ b/libs/type/include/pslang/type/primitive.hpp @@ -0,0 +1,55 @@ +#pragma once + +#include +#include + +namespace pslang::type +{ + + template + struct primitive_type_base + { + using native_type = T; + }; + + template + bool operator == (primitive_type_base const &, primitive_type_base const &) + { + return true; + } + + using bool_type = primitive_type_base; + + using i8_type = primitive_type_base; + using u8_type = primitive_type_base; + using i16_type = primitive_type_base; + using u16_type = primitive_type_base; + using i32_type = primitive_type_base; + using u32_type = primitive_type_base; + using i64_type = primitive_type_base; + using u64_type = primitive_type_base; + + using f32_type = primitive_type_base; + using f64_type = primitive_type_base; + + using primitive_type_impl = std::variant< + bool_type, + i8_type, + u8_type, + i16_type, + u16_type, + i32_type, + u32_type, + i64_type, + u64_type, + f32_type, + f64_type + >; + + struct primitive_type + : primitive_type_impl + { + using primitive_type_impl::primitive_type_impl; + }; + +} diff --git a/libs/type/include/pslang/type/print.hpp b/libs/type/include/pslang/type/print.hpp new file mode 100644 index 0000000..3900956 --- /dev/null +++ b/libs/type/include/pslang/type/print.hpp @@ -0,0 +1,12 @@ +#pragma once + +#include + +#include + +namespace pslang::type +{ + + void print(std::ostream & out, type const & type); + +} diff --git a/libs/type/include/pslang/type/type.hpp b/libs/type/include/pslang/type/type.hpp new file mode 100644 index 0000000..b1125e7 --- /dev/null +++ b/libs/type/include/pslang/type/type.hpp @@ -0,0 +1,23 @@ +#pragma once + +#include +#include + +#include + +namespace pslang::type +{ + + using type_impl = std::variant< + primitive_type + >; + + struct type + : type_impl + { + using type_impl::type_impl; + }; + + bool equal(type const & t1, type const & t2); + +} diff --git a/libs/type/include/pslang/type/type_fwd.hpp b/libs/type/include/pslang/type/type_fwd.hpp new file mode 100644 index 0000000..26c647e --- /dev/null +++ b/libs/type/include/pslang/type/type_fwd.hpp @@ -0,0 +1,12 @@ +#pragma once + +#include + +namespace pslang::type +{ + + struct type; + + using type_ptr = std::unique_ptr; + +} diff --git a/libs/type/source/print.cpp b/libs/type/source/print.cpp new file mode 100644 index 0000000..a5bd063 --- /dev/null +++ b/libs/type/source/print.cpp @@ -0,0 +1,82 @@ +#include +#include + +namespace pslang::type +{ + + namespace + { + + void print_impl(std::ostream & out, bool_type const &) + { + out << "bool"; + } + + void print_impl(std::ostream & out, i8_type const &) + { + out << "i8"; + } + + void print_impl(std::ostream & out, u8_type const &) + { + out << "u8"; + } + + void print_impl(std::ostream & out, i16_type const &) + { + out << "i16"; + } + + void print_impl(std::ostream & out, u16_type const &) + { + out << "u16"; + } + + void print_impl(std::ostream & out, i32_type const &) + { + out << "i32"; + } + + void print_impl(std::ostream & out, u32_type const &) + { + out << "u32"; + } + + void print_impl(std::ostream & out, i64_type const &) + { + out << "i64"; + } + + void print_impl(std::ostream & out, u64_type const &) + { + out << "u64"; + } + + void print_impl(std::ostream & out, f32_type const &) + { + out << "f32"; + } + + void print_impl(std::ostream & out, f64_type const &) + { + out << "f64"; + } + + void print_impl(std::ostream & out, primitive_type const & type) + { + std::visit([&](auto const & value){ print_impl(out, value); }, type); + } + + void print_impl(std::ostream & out, type const & type) + { + std::visit([&](auto const & value){ print_impl(out, value); }, type); + } + + } + + void print(std::ostream & out, type const & type) + { + print_impl(out, type); + } + +} diff --git a/libs/type/source/type.cpp b/libs/type/source/type.cpp new file mode 100644 index 0000000..b216bd1 --- /dev/null +++ b/libs/type/source/type.cpp @@ -0,0 +1,11 @@ +#include + +namespace pslang::type +{ + + bool equal(type const & t1, type const & t2) + { + return static_cast(t1) == static_cast(t2); + } + +} diff --git a/spec.txt b/spec.txt new file mode 100644 index 0000000..a3963a2 --- /dev/null +++ b/spec.txt @@ -0,0 +1,210 @@ +======== TYPES ======== + +Built-in types: + unit + bool + u8 + u16 + u32 + u64 + i8 + i16 + i32 + i64 + f8 (maybe? cpu-emulated at best) + f16 (maybe? cpu-emulated at best) + f32 + f64 + +N.B.: there is no dedicated 'char' type, strings operate on u8 (utf-8) or u32 (utf-32) + +Pointer types: + *T (pointer to const T) + *mut T (pointer to mutable T) + **T (pointer to const pointer to const T) + *mut *T (pointer to mutable pointer to const T) + **mut T (pointer to const pointer to mutable T) + *mut *mut T (pointer to mutable pointer to mutable T) + +Array types: + T[N] array of N elements of type T (N must be a compile-time value) + +======== LITERALS ======== + +Literals: + 56b -> i8 + 42ub -> u8 + 456s -> i16 + 456us -> u16 + 98765 -> i32 + 98765u -> u32 + 123l -> i64 + 123ul -> u64 + 3.14h -> f16 + 3.14f -> f32 + 3.14 -> f64 + 'a' -> u8 + '猫'u -> u32 + +TODO: string literals? fixed-size arrays? built-in spans? + "hello, world" -> utf-8 string + "здарова, братки"u -> utf-32 string + + +======== VARIABLES ======== + +Variable declaration: + const x = ... compile-time value, type inferred + const x: T = ... compile-time value of type T + let x = ... immutable value, type inferred + let x: T = ... immutable value of type T + mut x = ... mutable, ... + mut x: T = ... + +Array declaration: + let arr: i32[4] = [12, 15, 65, 42] + let arr: i32[] = [56, 23] // size inferred as 2 + let arr = [2, 5, 6] // size and type inferred as i32[3] + +Variables must always be initialized. (TODO: really? What about arrays?) +Const variables must be initialized with a const expression (any expression that doesn't include non-const values). + +======== OPERATORS ======== + +Logical (only bool type): + !x + x & y + x | y + x ^ y + +Equality (all built-in types, only same type): + x == y + x != y + +Comparison (all built-in types, only same type): + x < y + x > y + x <= y + x >= y + +Bitwise (integer types, only same type): + !x + x & y + x | y + x ^ y + +Bitwise shift (any pair of integer types): + x >> y + x << y + +Arithmetic (only same integer/floating-point type): + -x + x + y + x * y + x / y // in case of integers, rounds down when y>0 when x<0, consistently with % + x % y // integer only; mathematical, i.e. always in [0, y-1] when y>0 even when x<0; TODO: what if y<0? + +Pointer arithmetic (any pointer type + any integer type): + p + x + p - x + p - q // returns i64 + +Pointer arithmetic works element-wise (like C or C++), i.e. p + n advances by n * sizeof(T) when typeof(p) is *T + +Casting: + x as u32 // always explicit, no implicit casts allowed + +Any integer/floating-point types can be cast to each other. +Any pointer types can be cast to each other (TODO: alignment? UB or safe fallback?). + +Address: + &x // returns *T + &mut x // returns *mut T, fails if x is non-mut variable + +Assignment: + x = 15 // requires x to be a mut variable + *p = 15 // p must be a pointer to mut + +======== STRUCTS ======== + +Struct types: + struct rect: + width: u32 + height: u32 + +Creating a struct value: + let x = rect(10u, 20u) + let y = rect(width = 10u, height = 20u) + +Struct field access: + let r = rect(1u, 2u) + let x = r.width + let p = &r + let y = p.height // field access through pointer is the same + +Function types: + (T1, T2, T3) -> i32 + (T1, T2) -> unit // no return value + +Function declaration (required for e.g. loops in call graph): + func foo(x: i32, y: i32) -> i32 + func bar(x: f32) // same as -> unit + +Function definition: + func foo(x: i32, y: i32) -> i32: + return x * y + + func bar(x: f32): // deduced return type unit + print(x) + +Flow control: + if condition: + statements + else if condition: + statements + else: + statements + + while condition: + statements + + TODO: for loops? iterator/range interface? + +======== TYPE OF TYPES ======== + +Types are also considered to be values. The keyword `type` denotes the type of all types. +I.e. `typeof(16) == i32` and `typeof(i32) == type`. Incidentally, `typeof(type) == type` as well; there are no type kinds or etc. +`type` can be used in any place where a type is required (variable types, function arguments, function return value, etc). +E.g. + func foo(x: type) -> type: + return x[4] // type of arrays of 4 elements of type x + + let y: type = u32 + if foo(y) == u32[4]: + do_smth() + +======== CONST EXPRESSIONS ======== + +// TODO +// Auto-upgrading values to compile-time when a function is executed from const-only values? + +======== METAPROGRAMMING ======== + +// TODO +// Functions returning functions/structs +// Syntactic sugar for common cases +// Figure out: max(a,b) - how to deduce type parameters? +// func max(t: type): +// return func(x : t, y : t): +// if x > y: +// return x +// else: +// return y + +======== MODULES AND IMPORTS ======== + +// TODO + +======== STANDARD LIBRARY ======== + +// TODO: containers, memory management, strings?