pslang/libs/jit/source/arch/aarch64/compiler_v2.cpp

1139 lines
34 KiB
C++

#include <pslang/jit/arch/aarch64/compiler.hpp>
#include <pslang/jit/arch/aarch64/instruction_builder.hpp>
#include <pslang/ir/node.hpp>
#include <pslang/ir/compiler.hpp>
#include <pslang/ast/type.hpp>
#include <pslang/ast/struct.hpp>
#include <pslang/ast/function.hpp>
#include <pslang/types/type_visitor.hpp>
#include <sstream>
namespace pslang::jit::aarch64
{
namespace
{
// Homogeneous floating-point aggregate: up to 4 floating-point members
// of the same type (after struct flattening)
struct hfa_data
{
types::type_ptr element_type;
std::size_t count;
};
struct local_context
{
bool use_frame_pointer = true;
std::unordered_map<ast::struct_definition const *, std::optional<hfa_data>> struct_hfa;
std::unordered_map<std::string, std::int32_t> extern_symbols;
std::unordered_map<ir::node_ref, std::int32_t> nodes;
std::unordered_map<float, std::int32_t> f16_constants;
std::unordered_map<float, std::int32_t> f32_constants;
std::unordered_map<double, std::int32_t> f64_constants;
struct resolve_data
{
std::int32_t offset;
ir::node_ref target;
};
std::vector<resolve_data> branch_resolve;
std::vector<resolve_data> cbranch_resolve;
std::vector<resolve_data> adr_resolve;
};
std::uint8_t fp_mode_for(types::type const & type)
{
if (types::equal(type, types::primitive_type(types::f16_type{})))
return 1;
if (types::equal(type, types::primitive_type(types::f32_type{})))
return 2;
return 3;
}
std::int32_t fp_size(std::uint8_t mode)
{
return 1 << mode;
}
std::optional<hfa_data> get_hfa_data(local_context & lcontext, types::type_ptr const & type);
std::optional<hfa_data> compute_hfa_data(local_context & lcontext, ast::struct_definition const * node)
{
types::type_ptr type = nullptr;
std::size_t count = 0;
for (std::size_t i = 0; i < node->fields.size(); ++i)
{
auto const & field = node->fields[i];
// NB: recursion must be impossible due to prior checks in type checker
if (auto subdata = get_hfa_data(lcontext, field.inferred_type))
{
if (type && !types::equal(*type, *subdata->element_type))
return std::nullopt;
type = subdata->element_type;
count += subdata->count;
}
else
return std::nullopt;
}
if (count == 0)
return std::nullopt;
return hfa_data{type, count};
}
std::optional<hfa_data> get_hfa_data(local_context & lcontext, types::type_ptr const & type)
{
if (auto struct_type = std::get_if<types::struct_type>(type.get()))
{
if (auto it = lcontext.struct_hfa.find(struct_type->node); it != lcontext.struct_hfa.end())
return it->second;
auto result = compute_hfa_data(lcontext, struct_type->node);
lcontext.struct_hfa[struct_type->node] = result;
return result;
}
else if (auto array_type = std::get_if<types::array_type>(type.get()))
{
if (auto subdata = get_hfa_data(lcontext, array_type->element_type))
return hfa_data{subdata->element_type, subdata->count * array_type->size};
else
return std::nullopt;
}
else if (types::is_floating_point_type(*type))
return hfa_data{type, 1};
else
return std::nullopt;
}
struct populate_const_data_visitor
{
program_context & pcontext;
local_context & lcontext;
template <typename Node>
void apply(Node const & node, types::type_ptr const &)
{}
void apply(ir::literal const & node, types::type_ptr const &)
{
if (auto f16_literal = std::get_if<ast::f16_literal>(&node.value))
{
lcontext.f16_constants[f16_literal->value.repr] = pcontext.code.size();
push_bytes(f16_literal->value.repr);
}
else if (auto f32_literal = std::get_if<ast::f32_literal>(&node.value))
{
lcontext.f32_constants[f32_literal->value] = pcontext.code.size();
push_bytes(f32_literal->value);
}
else if (auto f64_literal = std::get_if<ast::f64_literal>(&node.value))
{
lcontext.f32_constants[f64_literal->value] = pcontext.code.size();
push_bytes(f64_literal->value);
}
}
void apply(ir::extern_symbol const & node, types::type_ptr const &)
{
std::int32_t offset = pcontext.code.size();
lcontext.extern_symbols[node.name] = offset;
pcontext.foreign_resolve.push_back({node.name, offset});
push_bytes<void *>(nullptr);
}
private:
template <typename T>
void push_bytes(T const & value)
{
auto begin = (std::uint8_t const *)(&value);
auto end = begin + sizeof(value);
pcontext.code.insert(pcontext.code.end(), begin, end);
}
};
// Set register @reg to -1 (all bits = 1)
void set_m1(instruction_builder & builder, std::uint8_t reg)
{
builder.or_not_reg(31, 31, reg);
}
struct literal_visitor
{
program_context & pcontext;
local_context & lcontext;
instruction_builder & builder;
void operator()(ast::bool_literal const & node)
{
if (node.value)
set_m1(builder, 0);
else
builder.movz(0, 0);
}
template <typename T>
requires(std::is_integral_v<T> && !std::is_same_v<T, bool>)
void operator()(ast::primitive_literal_base<T> const & node)
{
for (std::size_t i = 0; i < sizeof(T); i += 2)
{
if (i == 0)
{
builder.movz(0, std::uint64_t(node.value));
}
else
{
auto val = std::uint16_t(std::uint64_t(node.value) >> (i * 8));
if (val != 0)
builder.movk(0, val, i / 2);
}
}
if (sizeof(T) < 8)
{
if constexpr (std::is_signed_v<T>)
{
if (node.value < 0)
builder.sbfm(0, 0, sizeof(T) * 8);
}
}
}
void operator()(ast::f16_literal const & node)
{
auto offset = lcontext.f16_constants.at(node.value.repr);
std::int32_t current = pcontext.code.size();
builder.ldr_fp_pc(0, 0, (offset - current) / 4);
builder.fcvt(0, 0b10, 0, 0b01);
}
void operator()(ast::f32_literal const & node)
{
auto offset = lcontext.f32_constants.at(node.value);
std::int32_t current = pcontext.code.size();
builder.ldr_fp_pc(0, 0, (offset - current) / 4);
}
void operator()(ast::f64_literal const & node)
{
auto offset = lcontext.f64_constants.at(node.value);
std::int32_t current = pcontext.code.size();
builder.ldr_fp_pc(0, 1, (offset - current) / 4);
}
};
struct reg_extend_visitor
: types::const_visitor<reg_extend_visitor>
{
using const_visitor::apply;
instruction_builder & builder;
std::uint8_t reg;
void apply(types::bool_type const &)
{
builder.ubfm(reg, reg, 8);
}
void apply(types::f16_type const &)
{}
void apply(types::f32_type const &)
{}
void apply(types::f64_type const &)
{}
template <typename T>
void apply(types::primitive_type_base<T> const &)
{
if constexpr (sizeof(T) == 8)
{
return;
}
if constexpr (std::is_signed_v<T>)
{
builder.sbfm(reg, reg, sizeof(T) * 8);
}
if constexpr (std::is_unsigned_v<T>)
{
builder.ubfm(reg, reg, sizeof(T) * 8);
}
}
template <typename T>
void apply(T const &)
{
throw std::runtime_error(std::string("reg_extend_visitor is not implemented for ") + typeid(T).name());
}
};
struct compile_visitor
{
program_context & pcontext;
ir::module_context const & mcontext;
local_context & lcontext;
instruction_builder & builder;
std::vector<std::int32_t> argument_position;
std::unordered_map<ir::node_ref, std::int32_t> stack_position;
std::int32_t stack_size = 0;
bool return_value_is_large_struct = false;
void apply(ir::node_ref, ir::label const &, types::type_ptr const &)
{}
void apply(ir::node_ref it, ir::literal const & node, types::type_ptr const & type)
{
std::visit(literal_visitor{pcontext, lcontext, builder}, node.value);
if (types::is_integer_like_type(*type))
store(it, 0);
else if (types::is_floating_point_type(*type))
store_fp(it, 0, fp_mode_for(*type));
}
void apply(ir::node_ref it, ir::alloc const & node, types::type_ptr const &)
{
// Nothing to do: alloc just allocates a node of a struct type,
// but we already allocated stack space for it
}
void apply(ir::node_ref it, ir::copy const & node, types::type_ptr const & type)
{
// TODO: array/array element copy?
auto size = ast::type_size(*type);
auto dst_offset = stack_size - stack_position.at(it);
auto src_type = node.source->inferred_type;
auto src_offset = stack_size - stack_position.at(node.source);
for (auto field_id : node.path)
{
auto const & field = std::get<types::struct_type>(*src_type).node->fields[field_id];
src_type = field.inferred_type;
src_offset += field.layout.offset;
}
copy_memory(31, src_offset, 31, dst_offset, size, 0);
}
void apply(ir::node_ref it, ir::load const & node, types::type_ptr const & type)
{
// TODO: array/array element load?
load(node.ptr, 0);
auto size = ast::type_size(*type);
auto dst_offset = stack_size - stack_position.at(it);
copy_memory(0, 0, 31, dst_offset, size, 1);
}
void apply(ir::node_ref, ir::store const & node, types::type_ptr const & type)
{
// TODO: array/array element store?
load(node.ptr, 0);
auto size = ast::type_size(*type);
std::int32_t src_offset = stack_size - stack_position.at(node.value);
copy_memory(31, src_offset, 0, 0, size, 1);
}
void apply(ir::node_ref it, ir::unary_operation const & node, types::type_ptr const & type)
{
switch (node.type)
{
case ast::unary_operation_type::negation:
if (types::is_integer_type(*type))
{
load(node.arg1, 0);
builder.sub_reg(31, 0, 0);
store(it, 0);
}
else if (types::is_floating_point_type(*type))
{
auto mode = fp_mode_for(*type);
load_fp(node.arg1, 0, mode);
builder.fneg(0, mode, 0);
store_fp(it, 0, mode);
}
break;
case ast::unary_operation_type::logical_not:
load(node.arg1, 0);
builder.or_not_reg(31, 0, 0);
store(it, 0);
break;
case ast::unary_operation_type::address_of:
case ast::unary_operation_type::mutable_address_of:
builder.add_imm(31, 0, stack_size - stack_position.at(node.arg1));
store(it, 0);
break;
case ast::unary_operation_type::dereference:
throw std::runtime_error("Dereference operator mush not be present in compiled IR");
}
}
void apply(ir::node_ref it, ir::binary_operation const & node, types::type_ptr const & type)
{
auto arg1_type = node.arg1->inferred_type;
bool const is_fp = types::is_floating_point_type(*arg1_type);
bool const result_is_fp = types::is_floating_point_type(*type);
std::uint8_t const fp_mode = fp_mode_for(*arg1_type);
if (is_fp)
{
load_fp(node.arg1, 0, fp_mode);
load_fp(node.arg2, 1, fp_mode);
}
else
{
load(node.arg1, 0);
load(node.arg2, 1);
}
switch (node.type)
{
case ast::binary_operation_type::addition:
if (is_fp)
builder.fadd(0, 1, fp_mode, 0);
else
{
builder.add_reg(0, 1, 0);
}
break;
case ast::binary_operation_type::subtraction:
if (is_fp)
builder.fsub(0, 1, fp_mode, 0);
else
{
builder.sub_reg(0, 1, 0);
}
break;
case ast::binary_operation_type::multiplication:
if (is_fp)
builder.fmul(0, 1, fp_mode, 0);
else
{
builder.mul_reg(0, 1, 0);
}
break;
case ast::binary_operation_type::division:
if (is_fp)
builder.fdiv(0, 1, fp_mode, 0);
else
{
extend(0, type);
extend(1, type);
if (types::is_signed_integer_type(*type))
builder.sdiv_reg(0, 1, 0);
else
builder.udiv_reg(0, 1, 0);
}
break;
case ast::binary_operation_type::remainder:
extend(0, type);
extend(1, type);
if (types::is_signed_integer_type(*type))
{
builder.sdiv_reg(0, 1, 2);
builder.mul_reg(1, 2, 1);
builder.sub_reg(0, 1, 0);
}
else if (types::is_unsigned_integer_type(*type))
{
builder.udiv_reg(0, 1, 2);
builder.mul_reg(1, 2, 1);
builder.sub_reg(0, 1, 0);
}
break;
case ast::binary_operation_type::binary_and:
builder.and_reg(0, 1, 0);
break;
case ast::binary_operation_type::logical_and:
throw std::runtime_error("Short-circuiting operators must have been unwrapped in IR compiler");
case ast::binary_operation_type::binary_or:
builder.or_reg(0, 1, 0);
break;
case ast::binary_operation_type::logical_or:
throw std::runtime_error("Short-circuiting operators must have been unwrapped in IR compiler");
case ast::binary_operation_type::logical_xor:
builder.xor_reg(0, 1, 0);
break;
case ast::binary_operation_type::equals:
if (is_fp)
{
builder.fcmp(0, 1, fp_mode);
builder.csetm(0, 0b0000);
}
else
{
extend(0, node.arg1->inferred_type);
extend(1, node.arg2->inferred_type);
builder.cmp_reg(0, 1);
builder.csetm(0, 0b0000);
}
break;
case ast::binary_operation_type::not_equals:
if (is_fp)
{
builder.fcmp(0, 1, fp_mode);
builder.csetm(0, 0b0001);
}
else
{
extend(0, node.arg1->inferred_type);
extend(1, node.arg2->inferred_type);
builder.cmp_reg(0, 1);
builder.csetm(0, 0b0001);
}
break;
case ast::binary_operation_type::less:
if (is_fp)
{
builder.fcmp(0, 1, fp_mode);
builder.csetm(0, 0b0100);
}
else
{
extend(0, node.arg1->inferred_type);
extend(1, node.arg2->inferred_type);
builder.cmp_reg(1, 0);
if (types::is_bool_type(*node.arg1->inferred_type) || types::is_unsigned_integer_type(*node.arg1->inferred_type))
builder.csetm(0, 0b1000);
else
builder.csetm(0, 0b1100);
}
break;
case ast::binary_operation_type::greater:
if (is_fp)
{
builder.fcmp(1, 0, fp_mode);
builder.csetm(0, 0b0100);
}
else
{
extend(0, node.arg1->inferred_type);
extend(1, node.arg2->inferred_type);
builder.cmp_reg(0, 1);
if (types::is_bool_type(*node.arg1->inferred_type) || types::is_unsigned_integer_type(*node.arg1->inferred_type))
builder.csetm(0, 0b1000);
else
builder.csetm(0, 0b1100);
}
break;
case ast::binary_operation_type::less_equals:
if (is_fp)
{
builder.fcmp(0, 1, fp_mode);
builder.csetm(0, 0b1001);
}
else
{
extend(0, node.arg1->inferred_type);
extend(1, node.arg2->inferred_type);
builder.cmp_reg(0, 1);
if (types::is_bool_type(*node.arg1->inferred_type) || types::is_unsigned_integer_type(*node.arg1->inferred_type))
builder.csetm(0, 0b1001);
else
builder.csetm(0, 0b1101);
}
break;
case ast::binary_operation_type::greater_equals:
if (is_fp)
{
builder.fcmp(1, 0, fp_mode);
builder.csetm(0, 0b1001);
}
else
{
extend(0, node.arg1->inferred_type);
extend(1, node.arg2->inferred_type);
builder.cmp_reg(1, 0);
if (types::is_bool_type(*node.arg1->inferred_type) || types::is_unsigned_integer_type(*node.arg1->inferred_type))
builder.csetm(0, 0b1001);
else
builder.csetm(0, 0b1101);
}
break;
default:
{
std::ostringstream os;
os << "binary operation " << node.type << " is not implemented";
throw std::runtime_error(os.str());
}
}
if (result_is_fp)
store_fp(it, 0, fp_mode);
else
store(it, 0);
}
void apply(ir::node_ref it, ir::cast_operation const & node, types::type_ptr const &)
{
auto src_type = node.arg1->inferred_type;
auto dst_type = node.target_type;
if (types::equal(*src_type, *dst_type) || (types::is_pointer_type(*src_type) && types::is_pointer_type(*dst_type)))
{
load(node.arg1, 0);
store(it, 0);
return;
}
if (auto array_type = std::get_if<types::array_type>(src_type.get()))
{
if (auto pointer_type = std::get_if<types::pointer_type>(dst_type.get()))
{
if (types::equal(*array_type->element_type, *pointer_type->referenced_type))
{
std::int32_t offset = stack_size - stack_position.at(node.arg1);
builder.add_imm(31, 0, offset);
store(it, 0);
return;
}
}
}
if (types::is_numeric_type(*src_type) && types::is_numeric_type(*dst_type))
{
if (types::is_integer_type(*src_type))
{
load(node.arg1, 0);
if (types::is_integer_type(*dst_type))
{
extend(0, dst_type);
}
else if (types::is_floating_point_type(*dst_type))
{
auto dst_mode = fp_mode_for(*dst_type);
if (types::is_signed_integer_type(*src_type))
{
builder.fmov(0, 0, 3, 1);
builder.scvtf(0, 0, 3);
if (dst_mode != 3)
builder.fcvt(0, 3, 0, dst_mode);
}
else if (types::is_unsigned_integer_type(*src_type))
{
builder.fmov(0, 0, 3, 1);
builder.ucvtf(0, 0, 3);
if (dst_mode != 3)
builder.fcvt(0, 3, 0, dst_mode);
}
}
}
else if (types::is_floating_point_type(*src_type))
{
auto src_mode = fp_mode_for(*src_type);
if (types::is_integer_type(*dst_type))
{
if (types::is_signed_integer_type(*dst_type))
{
builder.fcvtns(0, 0, src_mode);
extend(0, dst_type);
}
else if (types::is_unsigned_integer_type(*dst_type))
{
builder.fcvtnu(0, 0, src_mode);
extend(0, dst_type);
}
}
else if (types::is_floating_point_type(*dst_type))
{
auto dst_mode = fp_mode_for(*dst_type);
builder.fcvt(0, src_mode, 0, dst_mode);
}
}
if (types::is_integer_type(*dst_type))
{
store(it, 0);
}
else if (types::is_floating_point_type(*dst_type))
{
store_fp(it, 0, fp_mode_for(*dst_type));
}
return;
}
throw std::runtime_error("Unknown types for cast instruction");
}
void apply(ir::node_ref it, ir::argument const & node, types::type_ptr const & type)
{
// Nothing to do: arguments already pushed on stack in function preamble
}
void apply(ir::node_ref it, ir::instruction_address const & node, types::type_ptr const &)
{
lcontext.adr_resolve.emplace_back(pcontext.code.size(), node.target);
builder.adr(0, 0);
store(it, 0);
}
void apply(ir::node_ref it, ir::extern_symbol const & node, types::type_ptr const &)
{
builder.ldr_pc(0, (lcontext.extern_symbols[node.name] - (std::int32_t)pcontext.code.size()) / 4);
store(it, 0);
}
void apply(ir::node_ref, ir::assignment const & node, types::type_ptr const & type)
{
// TODO: array/array element assignment?
std::size_t src_offset = stack_size - stack_position.at(node.rhs);
auto dst_type = node.lhs->inferred_type;
std::size_t dst_offset = stack_size - stack_position.at(node.lhs);
for (auto field_id : node.path)
{
if (auto struct_type = std::get_if<types::struct_type>(dst_type.get()))
{
auto struct_node = struct_type->node;
dst_type = struct_node->fields[field_id].inferred_type;
dst_offset += struct_node->fields[field_id].layout.offset;
}
else if (auto array_type = std::get_if<types::array_type>(dst_type.get()))
{
dst_type = array_type->element_type;
dst_offset += field_id * ast::type_size(*array_type->element_type);
}
else
throw std::runtime_error("Unknown object type for field assignment");
}
copy_memory(31, src_offset, 31, dst_offset, ast::type_size(*dst_type), 0);
}
void apply(ir::node_ref, ir::jump const & node, types::type_ptr const &)
{
lcontext.branch_resolve.emplace_back(pcontext.code.size(), node.target);
builder.b(0);
}
void apply(ir::node_ref, ir::jump_if_zero const & node, types::type_ptr const &)
{
load(node.condition, 0);
lcontext.cbranch_resolve.emplace_back(pcontext.code.size(), node.target);
builder.cbz(0, 0);
}
void apply(ir::node_ref, ir::jump_if_nonzero const & node, types::type_ptr const &)
{
load(node.condition, 0);
lcontext.cbranch_resolve.emplace_back(pcontext.code.size(), node.target);
builder.cbnz(0, 0);
}
template <typename Node, typename DoCall>
void apply_call(ir::node_ref it, Node const & node, types::type_ptr const & type, DoCall && do_call)
{
// TODO: handle the case when there weren't enough registers
std::uint8_t reg = 0;
std::uint8_t fp_reg = 0;
for (auto const & argument : node.arguments)
{
auto struct_type = std::get_if<types::struct_type>(argument->inferred_type.get());
auto array_type = std::get_if<types::array_type>(argument->inferred_type.get());
if (struct_type || array_type)
{
// NB: fixed-size arrays are handled in the same way
// as structs of N identical fields
auto size = ast::type_size(*argument->inferred_type);
if (auto hfa = get_hfa_data(lcontext, argument->inferred_type); hfa && hfa->count <= 4)
{
// HFA - passed in consecutive FP registers
std::int32_t base_offset = stack_size - stack_position.at(argument);
auto fp_mode = fp_mode_for(*hfa->element_type);
auto size = fp_size(fp_mode);
for (std::size_t i = 0; i < hfa->count; ++i)
builder.ldr_fp(fp_reg++, fp_mode, 31, (base_offset + i * size) / size);
}
else if (size <= 16)
{
// Small struct - passed in up to 2 GP registers
std::int32_t base_offset = stack_size - stack_position.at(argument);
std::int32_t offset = 0;
while (offset < size)
{
builder.ldr(reg++, 31, (base_offset + offset) / 8);
offset += 8;
}
}
else
{
// Large struct - passed by pointer
std::int32_t base_offset = stack_size - stack_position.at(argument);
builder.add_imm(31, reg++, base_offset);
}
}
else if (types::is_integer_like_type(*argument->inferred_type))
load(argument, reg++);
else if (types::is_floating_point_type(*argument->inferred_type))
load_fp(argument, fp_reg++, fp_mode_for(*argument->inferred_type));
else
throw std::runtime_error("Unsupported function argument type");
}
if (return_value_is_large_struct)
{
builder.sub_imm(31, 31, 16);
builder.str(8, 31, 0);
}
if (!lcontext.use_frame_pointer)
{
builder.sub_imm(31, 31, 16);
builder.str(30, 31, 0);
}
do_call();
if (!lcontext.use_frame_pointer)
{
builder.ldr(30, 31, 0);
builder.add_imm(31, 31, 16);
}
if (return_value_is_large_struct)
{
builder.ldr(8, 31, 0);
builder.add_imm(31, 31, 16);
}
// TODO: array return value?
auto size = ast::type_size(*type);
auto struct_type = std::get_if<types::struct_type>(type.get());
auto array_type = std::get_if<types::array_type>(type.get());
if (size == 0)
{}
else if (struct_type || array_type)
{
// NB: fixed-size arrays are handled in the same way
// as structs of N identical fields
auto base_offset = stack_size - stack_position.at(it);
if (auto hfa = get_hfa_data(lcontext, type); hfa && hfa->count <= 4)
{
auto fp_mode = fp_mode_for(*hfa->element_type);
auto size = fp_size(fp_mode);
// HFA - returned in consecutive FP registers
for (std::size_t i = 0; i < hfa->count; ++i)
builder.str_fp(i, fp_mode, 31, (base_offset + i * size) / size);
}
else if (size <= 16)
{
// Small struct - returned in x0-x1 registers
builder.str(0, 31, base_offset / 8);
if (size > 8)
builder.str(1, 31, (base_offset + 8) / 8);
}
else
{
// Large struct - returned by pointer in x8 register
copy_memory(8, 0, 31, base_offset, size, 0);
}
}
else if (types::is_integer_like_type(*type))
store(it, 0);
else if (types::is_floating_point_type(*type))
store_fp(it, 0, fp_mode_for(*type));
else
throw std::runtime_error("Unsupported return value type");
}
void apply(ir::node_ref it, ir::call const & node, types::type_ptr const & type)
{
apply_call(it, node, type, [&]{
lcontext.branch_resolve.emplace_back(pcontext.code.size(), node.target);
builder.bl(0);
});
}
void apply(ir::node_ref it, ir::call_pointer const & node, types::type_ptr const & type)
{
apply_call(it, node, type, [&]{
load(node.pointer, 9);
builder.bl_reg(9);
});
}
void apply(ir::node_ref, ir::return_value const & node, types::type_ptr const &)
{
// TODO: array return value?
if (node.value)
{
auto type = (*node.value)->inferred_type;
auto size = ast::type_size(*type);
auto struct_type = std::get_if<types::struct_type>(type.get());
auto array_type = std::get_if<types::array_type>(type.get());
if (size == 0)
{}
else if (struct_type || array_type)
{
auto base_offset = stack_size - stack_position.at(*node.value);
if (auto hfa = get_hfa_data(lcontext, type); hfa && hfa->count <= 4)
{
auto fp_mode = fp_mode_for(*hfa->element_type);
auto size = fp_size(fp_mode);
// HFA - returned in consecutive FP registers
for (std::size_t i = 0; i < hfa->count; ++i)
builder.ldr_fp(i, fp_mode, 31, (base_offset + i * size) / size);
}
else if (size <= 16)
{
// Small struct - returned in x0-x1 registers
builder.ldr(0, 31, base_offset / 8);
if (size > 8)
builder.ldr(1, 31, (base_offset + 8) / 8);
}
else
{
// Large struct - returned by pointer in x8 register
copy_memory(31, base_offset, 8, 0, size, 0);
}
}
else if (types::is_integer_like_type(*type))
load(*node.value, 0);
else if (types::is_floating_point_type(*type))
load_fp(*node.value, 0, fp_mode_for(*type));
else
throw std::runtime_error("Unsupported return value type");
}
if (lcontext.use_frame_pointer)
{
builder.ldr(29, 31, (stack_size - 16) / 8);
builder.ldr(30, 31, (stack_size - 8) / 8);
}
if (stack_size > 0)
builder.add_imm(31, 31, stack_size);
builder.ret();
}
void compile(ast::function_definition const * function_definition, ir::node_ref begin, ir::node_ref end)
{
auto result_type = function_definition->inferred_result_type;
auto struct_type = std::get_if<types::struct_type>(result_type.get());
auto array_type = std::get_if<types::array_type>(result_type.get());
if (struct_type || array_type)
if (!get_hfa_data(lcontext, result_type) && ast::type_size(*result_type) > 16)
return_value_is_large_struct = true;
stack_size = 0;
if (lcontext.use_frame_pointer)
stack_size += 16;
for (auto const & argument : function_definition->arguments)
{
auto size = ast::type_size(*argument.inferred_type);
stack_size += ((size + 7) / 8) * 8;
argument_position.push_back(stack_size);
}
for (auto it = begin; it != end; ++it)
{
if (auto argument = std::get_if<ir::argument>(&it->instruction))
{
stack_position[it] = argument_position[argument->index];
}
else if (ir::is_value_instruction(it->instruction))
{
auto size = ast::type_size(*it->inferred_type);
if (size == 0) continue;
// TODO: inefficient for small types, maybe only round up to type alignment?
// Need to make sure all read/write arm64 instructions used can handle offsets that
// are not a multiple of 8
stack_size += ((size + 7) / 8) * 8;
stack_position[it] = stack_size;
}
}
stack_size = ((stack_size + 15) / 16) * 16;
if (!std::holds_alternative<ir::label>(begin->instruction))
throw std::runtime_error("First IR node of a function must be a label");
auto it = begin;
lcontext.nodes[it] = pcontext.code.size();
if (stack_size > 0)
builder.sub_imm(31, 31, stack_size);
if (lcontext.use_frame_pointer)
{
builder.str(29, 31, (stack_size - 16) / 8);
builder.str(30, 31, (stack_size - 8) / 8);
builder.add_imm(31, 29, stack_size - 16);
}
// TODO: handle the case when there weren't enough registers
std::uint8_t reg = 0;
std::uint8_t fp_reg = 0;
for (std::size_t i = 0; i < function_definition->arguments.size(); ++i)
{
auto const & argument = function_definition->arguments[i];
auto size = ast::type_size(*argument.inferred_type);
auto struct_type = std::get_if<types::struct_type>(argument.inferred_type.get());
auto array_type = std::get_if<types::array_type>(argument.inferred_type.get());
if (size == 0) continue;
if (struct_type || array_type)
{
if (auto hfa = get_hfa_data(lcontext, argument.inferred_type); hfa && hfa->count <= 4)
{
// HFA - passed in consecutive FP registers
std::int32_t base_offset = stack_size - argument_position[i];
auto fp_mode = fp_mode_for(*hfa->element_type);
auto size = fp_size(fp_mode);
for (std::size_t i = 0; i < hfa->count; ++i)
builder.str_fp(fp_reg++, fp_mode, 31, (base_offset + i * size) / size);
}
else if (size <= 16)
{
// Small struct - passed in up to 2 GP registers
std::int32_t base_offset = stack_size - argument_position[i];
std::int32_t offset = 0;
while (offset < size)
{
builder.str(reg++, 31, (base_offset + offset) / 8);
offset += 8;
}
}
else
{
// Large struct - passed by pointer
std::int32_t dst_offset = stack_size - argument_position[i];
copy_memory(reg++, 0, 31, dst_offset, size, 9);
}
}
else if (types::is_integer_like_type(*argument.inferred_type))
builder.str(reg++, 31, (stack_size - argument_position[i]) / 8);
else if (types::is_floating_point_type(*argument.inferred_type))
{
auto fp_mode = fp_mode_for(*argument.inferred_type);
builder.str_fp(fp_reg++, fp_mode, 31, (stack_size - argument_position[i]) / fp_size(fp_mode));
}
else
throw std::runtime_error("Unknown argument type");
}
++it;
for (; it != end; ++it)
{
// Uncomment to debug per-node instruction generation:
builder.nop();
lcontext.nodes[it] = pcontext.code.size();
std::visit([&](auto const & instruction){ apply(it, instruction, it->inferred_type); }, it->instruction);
}
}
private:
void load(ir::node_ref it, std::uint8_t reg)
{
std::int32_t offset = stack_size - stack_position.at(it);
builder.ldr(reg, 31, offset / 8);
}
void load_fp(ir::node_ref it, std::uint8_t reg, std::uint8_t mode)
{
std::int32_t offset = stack_size - stack_position.at(it);
builder.ldr_fp(reg, mode, 31, offset / fp_size(mode));
}
void store(ir::node_ref it, std::uint8_t reg)
{
std::int32_t offset = stack_size - stack_position.at(it);
builder.str(reg, 31, offset / 8);
}
void store_fp(ir::node_ref it, std::uint8_t reg, std::uint8_t mode)
{
std::int32_t offset = stack_size - stack_position.at(it);
builder.str_fp(reg, mode, 31, offset / fp_size(mode));
}
// Sign- or zero-extend the register depending on the exact type
void extend(std::uint8_t reg, types::type_ptr const & type)
{
reg_extend_visitor{{}, builder, reg}.apply(*type);
}
void copy_memory(std::uint8_t reg_src_addr, std::size_t src_offset, std::uint8_t reg_dst_addr, std::size_t dst_offset, std::size_t size, std::uint8_t tmp_reg)
{
std::int32_t offset = 0;
while (size > 0)
{
auto check_step = [&](std::size_t step)
{
return size >= step && (((src_offset + offset) % step) == 0) && (((dst_offset + offset) % step) == 0);
};
if (check_step(8))
{
builder.ldr(tmp_reg, reg_src_addr, (src_offset + offset) / 8);
builder.str(tmp_reg, reg_dst_addr, (dst_offset + offset) / 8);
size -= 8;
offset += 8;
}
else if (check_step(4))
{
builder.ldrw(tmp_reg, reg_src_addr, (src_offset + offset) / 4);
builder.strw(tmp_reg, reg_dst_addr, (dst_offset + offset) / 4);
size -= 4;
offset += 4;
}
else if (check_step(2))
{
builder.ldrh(tmp_reg, reg_src_addr, (src_offset + offset) / 2);
builder.strh(tmp_reg, reg_dst_addr, (dst_offset + offset) / 2);
size -= 2;
offset += 2;
}
else
{
builder.ldrb(tmp_reg, reg_src_addr, src_offset + offset);
builder.strb(tmp_reg, reg_dst_addr, dst_offset + offset);
size -= 1;
offset += 1;
}
}
}
};
}
void compile(program_context & pcontext, ir::module_context const & mcontext)
{
local_context lcontext;
instruction_builder builder{pcontext.code};
{
populate_const_data_visitor visitor{pcontext, lcontext};
for (auto it = mcontext.nodes->begin(); it != mcontext.nodes->end(); ++it)
std::visit([&](auto const & instruction){ visitor.apply(instruction, it->inferred_type); }, it->instruction);
}
for (auto const & symbol : mcontext.symbols)
{
pcontext.symbols[symbol.first] = pcontext.code.size();
compile_visitor visitor{pcontext, mcontext, lcontext, builder};
visitor.compile(symbol.first, symbol.second.begin, symbol.second.end);
}
pcontext.entry_point = lcontext.nodes.at(mcontext.entry_point);
for (auto const & resolve : lcontext.branch_resolve)
builder.b_inject(pcontext.code.data() + resolve.offset, (lcontext.nodes.at(resolve.target) - resolve.offset) / 4);
for (auto const & resolve : lcontext.cbranch_resolve)
builder.cb_inject(pcontext.code.data() + resolve.offset, (lcontext.nodes.at(resolve.target) - resolve.offset) / 4);
for (auto const & resolve : lcontext.adr_resolve)
builder.adr_inject(pcontext.code.data() + resolve.offset, (lcontext.nodes.at(resolve.target) - resolve.offset) / 4);
}
}