From 0dc740d65695d335631d51ee0b5a8ea3477c85f2 Mon Sep 17 00:00:00 2001 From: lisyarus Date: Fri, 27 Mar 2026 14:53:36 +0300 Subject: [PATCH] IR & Aarch64 compiler structs support wip: struct constructors, assignment, field access --- libs/ir/include/pslang/ir/node.hpp | 8 + libs/ir/source/compiler.cpp | 48 +++++- libs/ir/source/print.cpp | 12 +- .../jit/arch/aarch64/instruction_builder.hpp | 26 +++ libs/jit/source/arch/aarch64/compiler_v2.cpp | 160 ++++++++++++------ .../arch/aarch64/instruction_builder.cpp | 35 ++++ 6 files changed, 236 insertions(+), 53 deletions(-) diff --git a/libs/ir/include/pslang/ir/node.hpp b/libs/ir/include/pslang/ir/node.hpp index 6b396c5..705fbd5 100644 --- a/libs/ir/include/pslang/ir/node.hpp +++ b/libs/ir/include/pslang/ir/node.hpp @@ -21,9 +21,14 @@ namespace pslang::ir ast::literal value; }; + struct alloc + {}; + struct copy { node_ref source; + // Field id's for compound access like a.b.c + std::vector path; }; struct load @@ -75,6 +80,8 @@ namespace pslang::ir { node_ref lhs; node_ref rhs; + // Field id's for compound assignment like a.b.c = 1 + std::vector path; }; struct jump @@ -118,6 +125,7 @@ namespace pslang::ir using instruction = std::variant< label, literal, + alloc, copy, load, store, diff --git a/libs/ir/source/compiler.cpp b/libs/ir/source/compiler.cpp index 9417af6..f23a454 100644 --- a/libs/ir/source/compiler.cpp +++ b/libs/ir/source/compiler.cpp @@ -352,7 +352,20 @@ namespace pslang::ir } else // if (node.type) { - throw std::runtime_error("IR compile visitor not implemented for type constructors"); + auto type = ast::get_type(*node.type); + if (auto struct_type = std::get_if(type.get())) + { + mcontext.nodes->emplace_back(alloc{}, node.inferred_type); + auto result = last(); + for (std::size_t i = 0; i < node.arguments.size(); ++i) + { + auto const & field = struct_type->node->fields[i]; + auto arg = apply(*node.arguments[i]); + mcontext.nodes->emplace_back(assignment{result, arg, {i}}, field.inferred_type); + } + return result; + } + throw std::runtime_error("Type constructors are not implemented for non-struct types"); } } @@ -368,6 +381,22 @@ namespace pslang::ir throw std::runtime_error("Unknown array access left-hand side"); } + node_ref apply(ast::field_access const & node) + { + auto object = apply(*node.object); + auto object_type = ast::get_type(*node.object); + auto struct_node = std::get_if(object_type.get())->node; + for (std::size_t i = 0; i < struct_node->fields.size(); ++i) + { + if (struct_node->fields[i].name == node.field_name) + { + mcontext.nodes->emplace_back(copy{object, {i}}, node.inferred_type); + return last(); + } + } + throw std::runtime_error("Unknown field name"); + } + // TODO: array, array_access, field_access // Statements @@ -399,6 +428,23 @@ namespace pslang::ir } } + if (auto field_access = std::get_if(node.lhs.get())) + { + auto object_type = ast::get_type(*field_access->object); + auto struct_node = std::get_if(object_type.get())->node; + auto object = apply(field_access->object); + for (std::size_t i = 0; i < struct_node->fields.size(); ++i) + { + auto const & field = struct_node->fields[i]; + if (field.name == field_access->field_name) + { + mcontext.nodes->emplace_back(assignment{object, rhs, {i}}, field.inferred_type); + return last(); + } + } + throw std::runtime_error("Unknown field name"); + } + if (auto unary_operation = std::get_if(node.lhs.get())) { if (unary_operation->type == ast::unary_operation_type::dereference) diff --git a/libs/ir/source/print.cpp b/libs/ir/source/print.cpp index 6de6774..ddf4c2f 100644 --- a/libs/ir/source/print.cpp +++ b/libs/ir/source/print.cpp @@ -199,9 +199,16 @@ namespace pslang::ir std::visit(print_literal_visitor{out}, instruction.value); } + void operator()(alloc const & instruction) + { + out << "alloc"; + } + void operator()(copy const & instruction) { out << "copy $" << get_index(instruction.source); + for (auto field_id : instruction.path) + out << "." << field_id; } void operator()(load const & instruction) @@ -248,7 +255,10 @@ namespace pslang::ir void operator()(assignment const & instruction) { - out << "assign $" << get_index(instruction.lhs) << " $" << get_index(instruction.rhs); + out << "assign $" << get_index(instruction.lhs); + for (auto field_id : instruction.path) + out << "." << field_id; + out << " $" << get_index(instruction.rhs); } void operator()(jump const & instruction) diff --git a/libs/jit/include/pslang/jit/arch/aarch64/instruction_builder.hpp b/libs/jit/include/pslang/jit/arch/aarch64/instruction_builder.hpp index bab2792..0c8e308 100644 --- a/libs/jit/include/pslang/jit/arch/aarch64/instruction_builder.hpp +++ b/libs/jit/include/pslang/jit/arch/aarch64/instruction_builder.hpp @@ -12,6 +12,8 @@ namespace pslang::jit::aarch64 // NB: stack pointer is register 31 + void nop(); + // Move @val shifted by 16*@shift bits into register @reg, zeroing out other bits // @shift must be 0, 1, 2 or 3 void movz(std::uint8_t reg, std::uint16_t val, std::uint8_t shift = 0); @@ -24,6 +26,18 @@ namespace pslang::jit::aarch64 // register @reg_addr plus an unsigned 12-bit offset multiplied by 8. void str(std::uint8_t reg_src, std::uint8_t reg_addr, std::uint16_t offset); + // Store the lowest 32 bytes of the value of the register @reg_src at the address specified by the value of + // register @reg_addr plus an unsigned 12-bit offset multiplied by 4. + void strw(std::uint8_t reg_src, std::uint8_t reg_addr, std::uint16_t offset); + + // Store the lowest 16 bytes of the value of the register @reg_src at the address specified by the value of + // register @reg_addr plus an unsigned 12-bit offset multiplied by 2. + void strh(std::uint8_t reg_src, std::uint8_t reg_addr, std::uint16_t offset); + + // Store the lowest 8 bytes of the value of the register @reg_src at the address specified by the value of + // register @reg_addr plus an unsigned 12-bit offset. + void strb(std::uint8_t reg_src, std::uint8_t reg_addr, std::uint16_t offset); + // Store the value of register @reg_src into an address specified by the value of register @reg_addr // plus a signed 9-bit offset. Store the new address value in @reg_addr void str_pre(std::uint8_t reg_src, std::uint8_t reg_addr, std::int16_t offset); @@ -48,6 +62,18 @@ namespace pslang::jit::aarch64 // plus an unsigned 12-bit offset multiplied by 8 and store it in the register @reg_dst. void ldr(std::uint8_t reg_dst, std::uint8_t reg_addr, std::uint16_t offset); + // Load the 32-bit value at address specified by the value of register @reg_addr plus an + // unsigned 12-bit offset multiplied by 4 and store it in the lowest 32 bits of the register @reg_dst. + void ldrw(std::uint8_t reg_dst, std::uint8_t reg_addr, std::uint16_t offset); + + // Load the 16-bit value at address specified by the value of register @reg_addr plus an + // unsigned 12-bit offset multiplied by 2 and store it in the lowest 16 bits of the register @reg_dst. + void ldrh(std::uint8_t reg_dst, std::uint8_t reg_addr, std::uint16_t offset); + + // Load the 8-bit value at address specified by the value of register @reg_addr plus an + // unsigned 12-bit offset and store it in the lowest 8 bits of the register @reg_dst. + void ldrb(std::uint8_t reg_dst, std::uint8_t reg_addr, std::uint16_t offset); + // Load the value at address specified by the value of register @reg_addr // plus a signed 9-bit offset and store it in the register @reg_dst. // Store the new address value in @reg_addr diff --git a/libs/jit/source/arch/aarch64/compiler_v2.cpp b/libs/jit/source/arch/aarch64/compiler_v2.cpp index 1e7150c..661e457 100644 --- a/libs/jit/source/arch/aarch64/compiler_v2.cpp +++ b/libs/jit/source/arch/aarch64/compiler_v2.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -236,52 +237,46 @@ namespace pslang::jit::aarch64 store_fp(it, 0, fp_mode_for(*type)); } - void apply(ir::node_ref it, ir::copy const & node, types::type_ptr const &) + void apply(ir::node_ref it, ir::alloc const & node, types::type_ptr const &) { - // TODO: struct/array copy? - load(node.source, 0); - store(it, 0); + // Nothing to do: alloc just allocates a node of a struct type, + // but we already allocated stack space for it + } + + void apply(ir::node_ref it, ir::copy const & node, types::type_ptr const & type) + { + // TODO: array/array element copy? + auto size = ast::type_size(*type); + auto dst_offset = stack_size - stack_position.at(it); + + auto src_type = node.source->inferred_type; + auto src_offset = stack_size - stack_position.at(node.source); + for (auto field_id : node.path) + { + auto const & field = std::get(*src_type).node->fields[field_id]; + src_type = field.inferred_type; + src_offset += field.layout.offset; + } + + copy_memory(31, src_offset, 31, dst_offset, size, 0); } void apply(ir::node_ref it, ir::load const & node, types::type_ptr const & type) { - // TODO: struct/array load? + // TODO: array/array element load? load(node.ptr, 0); auto size = ast::type_size(*type); - - if (size == 1) - builder.ldurb(0, 0, 0); - else if (size == 2) - builder.ldurh(0, 0, 0); - else if (size == 4) - builder.ldurw(0, 0, 0); - else if (size == 8) - builder.ldur(0, 0, 0); - else - throw std::runtime_error(std::format("Unsupported load size: {}", size)); - - if (types::is_bool_type(*type) || types::is_integer_type(*type)) - extend(0, type); - - store(it, 0); + auto dst_offset = stack_size - stack_position.at(it); + copy_memory(0, 0, 31, dst_offset, size, 1); } void apply(ir::node_ref, ir::store const & node, types::type_ptr const & type) { - // TODO: struct/array store? + // TODO: array/array element store? load(node.ptr, 0); - load(node.value, 1); auto size = ast::type_size(*type); - if (size == 1) - builder.sturb(1, 0, 0); - else if (size == 2) - builder.sturh(1, 0, 0); - else if (size == 4) - builder.sturw(1, 0, 0); - else if (size == 8) - builder.stur(1, 0, 0); - else - throw std::runtime_error(std::format("Unsupported store size: {}", size)); + std::int32_t src_offset = stack_size - stack_position.at(node.value); + copy_memory(31, src_offset, 0, 0, size, 1); } void apply(ir::node_ref it, ir::unary_operation const & node, types::type_ptr const & type) @@ -293,7 +288,6 @@ namespace pslang::jit::aarch64 { load(node.arg1, 0); builder.sub_reg(31, 0, 0); - extend(0, type); store(it, 0); } else if (types::is_floating_point_type(*type)) @@ -307,8 +301,6 @@ namespace pslang::jit::aarch64 case ast::unary_operation_type::logical_not: load(node.arg1, 0); builder.or_not_reg(31, 0, 0); - if (types::is_integer_type(*type)) - extend(0, type); store(it, 0); break; case ast::unary_operation_type::address_of: @@ -317,10 +309,7 @@ namespace pslang::jit::aarch64 store(it, 0); break; case ast::unary_operation_type::dereference: - load(node.arg1, 0); - builder.ldr(0, 0 ,0); - store(it, 0); - break; + throw std::runtime_error("Dereference operator mush not be present in compiled IR"); } } @@ -349,8 +338,6 @@ namespace pslang::jit::aarch64 else { builder.add_reg(0, 1, 0); - if (!types::is_pointer_type(*type)) - extend(0, type); } break; case ast::binary_operation_type::subtraction: @@ -359,8 +346,6 @@ namespace pslang::jit::aarch64 else { builder.sub_reg(0, 1, 0); - if (!types::is_pointer_type(*type)) - extend(0, type); } break; case ast::binary_operation_type::multiplication: @@ -369,7 +354,6 @@ namespace pslang::jit::aarch64 else { builder.mul_reg(0, 1, 0); - extend(0, type); } break; case ast::binary_operation_type::division: @@ -377,14 +361,17 @@ namespace pslang::jit::aarch64 builder.fdiv(0, 1, fp_mode, 0); else { + extend(0, type); + extend(1, type); if (types::is_signed_integer_type(*type)) builder.sdiv_reg(0, 1, 0); else builder.udiv_reg(0, 1, 0); - extend(0, type); } break; case ast::binary_operation_type::remainder: + extend(0, type); + extend(1, type); if (types::is_signed_integer_type(*type)) { builder.sdiv_reg(0, 1, 2); @@ -419,6 +406,8 @@ namespace pslang::jit::aarch64 } else { + extend(0, node.arg1->inferred_type); + extend(1, node.arg2->inferred_type); builder.cmp_reg(0, 1); builder.csetm(0, 0b0000); } @@ -431,6 +420,8 @@ namespace pslang::jit::aarch64 } else { + extend(0, node.arg1->inferred_type); + extend(1, node.arg2->inferred_type); builder.cmp_reg(0, 1); builder.csetm(0, 0b0001); } @@ -443,6 +434,8 @@ namespace pslang::jit::aarch64 } else { + extend(0, node.arg1->inferred_type); + extend(1, node.arg2->inferred_type); builder.cmp_reg(1, 0); if (types::is_bool_type(*node.arg1->inferred_type) || types::is_unsigned_integer_type(*node.arg1->inferred_type)) builder.csetm(0, 0b1000); @@ -458,6 +451,8 @@ namespace pslang::jit::aarch64 } else { + extend(0, node.arg1->inferred_type); + extend(1, node.arg2->inferred_type); builder.cmp_reg(0, 1); if (types::is_bool_type(*node.arg1->inferred_type) || types::is_unsigned_integer_type(*node.arg1->inferred_type)) builder.csetm(0, 0b1000); @@ -473,6 +468,8 @@ namespace pslang::jit::aarch64 } else { + extend(0, node.arg1->inferred_type); + extend(1, node.arg2->inferred_type); builder.cmp_reg(0, 1); if (types::is_bool_type(*node.arg1->inferred_type) || types::is_unsigned_integer_type(*node.arg1->inferred_type)) builder.csetm(0, 0b1001); @@ -488,6 +485,8 @@ namespace pslang::jit::aarch64 } else { + extend(0, node.arg1->inferred_type); + extend(1, node.arg2->inferred_type); builder.cmp_reg(1, 0); if (types::is_bool_type(*node.arg1->inferred_type) || types::is_unsigned_integer_type(*node.arg1->inferred_type)) builder.csetm(0, 0b1001); @@ -600,11 +599,21 @@ namespace pslang::jit::aarch64 store(it, 0); } - void apply(ir::node_ref, ir::assignment const & node, types::type_ptr const &) + void apply(ir::node_ref, ir::assignment const & node, types::type_ptr const & type) { - // TODO: struct/array assignment? - load(node.rhs, 0); - store(node.lhs, 0); + // TODO: array/array element assignment? + std::size_t src_offset = stack_size - stack_position.at(node.rhs); + + auto dst_type = node.lhs->inferred_type; + std::size_t dst_offset = stack_size - stack_position.at(node.lhs); + for (auto field_id : node.path) + { + auto struct_node = std::get(*dst_type).node; + dst_type = struct_node->fields[field_id].inferred_type; + dst_offset = dst_offset + struct_node->fields[field_id].layout.offset; + } + + copy_memory(31, src_offset, 31, dst_offset, ast::type_size(*dst_type), 0); } void apply(ir::node_ref, ir::jump const & node, types::type_ptr const &) @@ -731,7 +740,12 @@ namespace pslang::jit::aarch64 { if (ir::is_value_instruction(it->instruction)) { - stack_size += 8; + auto size = ast::type_size(*it->inferred_type); + if (size == 0) continue; + // TODO: inefficient for small types, maybe only round up to type alignment? + // Need to make sure all read/write arm64 instructions used can handle offsets that + // are not a multiple of 8 + stack_size += ((size + 7) / 8) * 8; stack_position[it] = stack_size; } } @@ -756,6 +770,8 @@ namespace pslang::jit::aarch64 for (; it != end; ++it) { + // Uncomment to debug per-node instruction generation: + builder.nop(); lcontext.nodes[it] = pcontext.code.size(); std::visit([&](auto const & instruction){ apply(it, instruction, it->inferred_type); }, it->instruction); } @@ -791,6 +807,48 @@ namespace pslang::jit::aarch64 { reg_extend_visitor{{}, builder, reg}.apply(*type); } + + void copy_memory(std::uint8_t reg_src_addr, std::size_t src_offset, std::uint8_t reg_dst_addr, std::size_t dst_offset, std::size_t size, std::uint8_t tmp_reg) + { + std::int32_t offset = 0; + + while (size > 0) + { + auto check_step = [&](std::size_t step) + { + return size >= step && (((src_offset + offset) % step) == 0) && (((dst_offset + offset) % step) == 0); + }; + + if (check_step(8)) + { + builder.ldr(tmp_reg, reg_src_addr, (src_offset + offset) / 8); + builder.str(tmp_reg, reg_dst_addr, (dst_offset + offset) / 8); + size -= 8; + offset += 8; + } + else if (check_step(4)) + { + builder.ldrw(tmp_reg, reg_src_addr, (src_offset + offset) / 4); + builder.strw(tmp_reg, reg_dst_addr, (dst_offset + offset) / 4); + size -= 4; + offset += 4; + } + else if (check_step(2)) + { + builder.ldrh(tmp_reg, reg_src_addr, (src_offset + offset) / 2); + builder.strh(tmp_reg, reg_dst_addr, (dst_offset + offset) / 2); + size -= 2; + offset += 2; + } + else + { + builder.ldrb(tmp_reg, reg_src_addr, src_offset + offset); + builder.strb(tmp_reg, reg_dst_addr, dst_offset + offset); + size -= 1; + offset += 1; + } + } + } }; } diff --git a/libs/jit/source/arch/aarch64/instruction_builder.cpp b/libs/jit/source/arch/aarch64/instruction_builder.cpp index b88fc54..29fc226 100644 --- a/libs/jit/source/arch/aarch64/instruction_builder.cpp +++ b/libs/jit/source/arch/aarch64/instruction_builder.cpp @@ -5,6 +5,11 @@ namespace pslang::jit::aarch64 static constexpr std::uint32_t REG_MASK = 0x1fu; + void instruction_builder::nop() + { + do_push(0xd503201fu); + } + void instruction_builder::movz(std::uint8_t reg, std::uint16_t val, std::uint8_t shift) { do_push(0xd2800000u | (reg & REG_MASK) | (val << 5) | ((shift & 0x3u) << 21)); @@ -20,6 +25,21 @@ namespace pslang::jit::aarch64 do_push(0xf9000000u | (reg_src & REG_MASK) | ((reg_addr & REG_MASK) << 5) | ((std::uint16_t(offset) & 0xfffu) << 10)); } + void instruction_builder::strw(std::uint8_t reg_src, std::uint8_t reg_addr, std::uint16_t offset) + { + do_push(0xb9000000u | (reg_src & REG_MASK) | ((reg_addr & REG_MASK) << 5) | ((std::uint16_t(offset) & 0xfffu) << 10)); + } + + void instruction_builder::strh(std::uint8_t reg_src, std::uint8_t reg_addr, std::uint16_t offset) + { + do_push(0x79000000u | (reg_src & REG_MASK) | ((reg_addr & REG_MASK) << 5) | ((std::uint16_t(offset) & 0xfffu) << 10)); + } + + void instruction_builder::strb(std::uint8_t reg_src, std::uint8_t reg_addr, std::uint16_t offset) + { + do_push(0x39000000u | (reg_src & REG_MASK) | ((reg_addr & REG_MASK) << 5) | ((std::uint16_t(offset) & 0xfffu) << 10)); + } + void instruction_builder::str_pre(std::uint8_t reg_src, std::uint8_t reg_addr, std::int16_t offset) { do_push(0xf8000c00u | (reg_src & REG_MASK) | ((reg_addr & REG_MASK) << 5) | ((std::uint16_t(offset) & 0x1ffu) << 12)); @@ -50,6 +70,21 @@ namespace pslang::jit::aarch64 do_push(0xf9400000u | (reg_dst & REG_MASK) | ((reg_addr & REG_MASK) << 5) | ((std::uint16_t(offset) & 0xfffu) << 10)); } + void instruction_builder::ldrw(std::uint8_t reg_dst, std::uint8_t reg_addr, std::uint16_t offset) + { + do_push(0xb9400000u | (reg_dst & REG_MASK) | ((reg_addr & REG_MASK) << 5) | ((std::uint16_t(offset) & 0xfffu) << 10)); + } + + void instruction_builder::ldrh(std::uint8_t reg_dst, std::uint8_t reg_addr, std::uint16_t offset) + { + do_push(0x79400000u | (reg_dst & REG_MASK) | ((reg_addr & REG_MASK) << 5) | ((std::uint16_t(offset) & 0xfffu) << 10)); + } + + void instruction_builder::ldrb(std::uint8_t reg_dst, std::uint8_t reg_addr, std::uint16_t offset) + { + do_push(0x39400000u | (reg_dst & REG_MASK) | ((reg_addr & REG_MASK) << 5) | ((std::uint16_t(offset) & 0xfffu) << 10)); + } + void instruction_builder::ldr_pre(std::uint8_t reg_dst, std::uint8_t reg_addr, std::int16_t offset) { do_push(0xf8400c00u | (reg_dst & REG_MASK) | ((reg_addr & REG_MASK) << 5) | ((std::uint16_t(offset) & 0x1ffu) << 12));