From 23121291488e4088358965cd758a0e5dc29f6e64 Mon Sep 17 00:00:00 2001 From: admin Date: Sun, 4 Jan 2026 20:19:24 +0100 Subject: [PATCH] first codegen prototype --- include/codegen/allocator.hpp | 66 ------- include/codegen/codegen.hpp | 21 +- include/codegen/fasm_stack.hpp | 180 ------------------ include/codegen/slot.hpp | 48 ----- include/codegen/targets/fasm_x86_64_linux.hpp | 70 +++++++ include/parser/lexer.hpp | 10 +- src/main.cpp | 28 +-- 7 files changed, 92 insertions(+), 331 deletions(-) delete mode 100644 include/codegen/allocator.hpp delete mode 100644 include/codegen/fasm_stack.hpp delete mode 100644 include/codegen/slot.hpp create mode 100644 include/codegen/targets/fasm_x86_64_linux.hpp diff --git a/include/codegen/allocator.hpp b/include/codegen/allocator.hpp deleted file mode 100644 index 82c9404..0000000 --- a/include/codegen/allocator.hpp +++ /dev/null @@ -1,66 +0,0 @@ -#pragma once -#include "prelude/string.hpp" -#include "codegen/slot.hpp" - -#define AVAILABLE_REGISTERS 4 -#define AVAILABLE_STACK_SIZE 4096 - -template -class Allocator -{ -public: - virtual ~Allocator() {}; - -public: - virtual const Slot &Allocate(SlotAddr addr) = 0; - virtual const Slot &Resolve(SlotAddr addr) = 0; -}; - -class SlotAllocator : public Allocator -{ -public: - SlotAllocator() = default; - ~SlotAllocator() = default; - -public: - const IR::IRSlot &Allocate(const StringView &addr) override - { - if (m_regs < AVAILABLE_REGISTERS) - { - m_slots.Push(IR::IRSlot(addr, m_regs++, IR::IRSlot::Type::REGISTRY)); - return m_slots.data[m_slots.size - 1]; - } - if (m_offset_counter + 8 <= AVAILABLE_STACK_SIZE) - { - m_offset_counter += 8; - m_slots.Push(IR::IRSlot(addr, m_offset_counter, IR::IRSlot::Type::STACK)); - return m_slots.data[m_slots.size - 1]; - } - // TODO: proper error handling (stack overflow etc.) - assert(0 && "failed to allocate local"); - } - - const IR::IRSlot &Resolve(const StringView &addr) override - { - for (size_t i = 0; i < m_slots.size; ++i) - { - if (m_slots.data[i].GetAddr() == addr) - { - return m_slots.data[i]; - } - } - - assert(0 && "could not resolve stack offset for specified address"); - } - -public: - View slots() const { return m_slots.view(); } - -public: - unsigned int GetStackSize() const { return m_offset_counter; } - -public: - int m_regs = 0; - unsigned int m_offset_counter = 0; - Builder m_slots; -}; \ No newline at end of file diff --git a/include/codegen/codegen.hpp b/include/codegen/codegen.hpp index 64b14a1..11fa21a 100644 --- a/include/codegen/codegen.hpp +++ b/include/codegen/codegen.hpp @@ -1,17 +1,20 @@ - -// TODO: store all of values, allocated registers, stack offsets in single allocator -// to be able to find out which kind a specific temp register represent - #pragma once -#include -#include +#include "ir/op.hpp" #include "prelude/string.hpp" -#include "ir/ir.hpp" class CodeGenerator { public: - virtual ~CodeGenerator() {}; + CodeGenerator() = default; + virtual ~CodeGenerator() {} - virtual void Generate(const char* filename, View ops) = 0; +public: + virtual bool Generate(const IR::OpView* ops) = 0; + StringView GetOutput() { return output().view(); } + +protected: + StringBuilder& output() { return m_output; } + +private: + StringBuilder m_output; }; diff --git a/include/codegen/fasm_stack.hpp b/include/codegen/fasm_stack.hpp deleted file mode 100644 index fbda045..0000000 --- a/include/codegen/fasm_stack.hpp +++ /dev/null @@ -1,180 +0,0 @@ -#pragma once - -#include "codegen/codegen.hpp" -#include "ir/slot.hpp" -#include "ir/value.hpp" - -class StackFasmX86_64Generator : public CodeGenerator -{ -public: - ~StackFasmX86_64Generator() override = default; - -private: - StringView GetTempAddr(IR::ValueHandle *reg) - { - return std::move((StringBuilder() << reg->Format()).view()); - } - - StringView GetSlotAddr(const IR::IRSlot &slot) - { - switch (slot.GetType()) - { - case IR::IRSlot::Type::REGISTRY: - { - StringBuilder sb; - sb.AppendFormat("r%d", slot.GetSlot() + 12); // for r10, r11, r12 etc. - return sb.view(); - } - break; - case IR::IRSlot::Type::STACK: - { - StringBuilder sb; - sb.AppendFormat("[rbp-%d]", slot.GetSlot()); // for r10, r11, r12 etc. - return sb.view(); - } - break; - default: - assert(0 && "TODO: either unreachable or handle properly"); - } - } - -private: - void GenerateOp(const IR::Op *op) - { - switch (op->GetType()) - { - case IR::OpType::EXTERN: - { - auto extrn = reinterpret_cast(op); - auto symbol = extrn->symbol(); - appendf("extrn '%s' as __%s\n", symbol.c_str(), symbol.c_str()); - appendf("%s = PLT __%s\n", symbol.c_str(), symbol.c_str()); - } - break; - case IR::OpType::FN: - { - m_allocator = new SlotAllocator(); - - auto fn = reinterpret_cast(op); - auto name = fn->name(); - appendf("public %s\n", name.c_str()); - appendf("%s:\n", name.c_str()); - appendf("push rbp\n"); - appendf("mov rbp, rsp\n"); - - StringBuilder fnOutput; - StringBuilder *backup = m_output; - m_output = &fnOutput; - - if (fn->params().size > 0) - { - // TODO: support multiple parameters - auto param_slot = m_allocator->Allocate(fn->params().data[0]); - appendf("mov %s, rdi\n", GetSlotAddr(param_slot).c_str()); - } - for (auto &fOp : fn->body().ops()) - { - GenerateOp(fOp); - } - - int stackSize = m_allocator->GetStackSize(); - m_output = backup; - appendf("sub rsp, %d\n", stackSize); - *m_output << fnOutput.c_str(); - - appendf("leave\nret\n"); - - m_allocator = nullptr; - } - break; - case IR::OpType::CALL: - { - auto call = reinterpret_cast(op); - // TODO: support several arguments - if (call->args().size == 1) - { - auto slot = m_allocator->Resolve(GetTempAddr(call->args().data[0])); - appendf("mov rdi, %s\n", GetSlotAddr(slot).c_str()); - } - appendf("call %s\n", call->callee().c_str()); - auto result_slot = m_allocator->Allocate(GetTempAddr(call->result())); - appendf("mov %s, rax\n", GetSlotAddr(result_slot).c_str()); - } - break; - // case IR::OpType::LOAD_CONST: - // { - // auto lc = reinterpret_cast(op); - // auto addr = GetTempAddr(lc->result()); - // auto slot = m_allocator->Allocate(addr); - // appendf("mov %s, %ld\n", GetSlotAddr(slot).c_str(), lc->value()); - // } - // break; - case IR::OpType::STORE: - { - auto s = reinterpret_cast(op); - auto slot = m_allocator->Allocate(s->addr()); - auto value_slot = m_allocator->Resolve(GetTempAddr(s->src())); - appendf("mov rax, %s\n", GetSlotAddr(value_slot).c_str()); - appendf("mov %s, rax\n", GetSlotAddr(slot).c_str()); - } - break; - case IR::OpType::LOAD: - { - auto l = reinterpret_cast(op); - auto value_slot = m_allocator->Allocate(GetTempAddr(l->result())); - auto variable_slot = m_allocator->Resolve(l->addr()); - appendf("mov rax, %s\n", GetSlotAddr(variable_slot).c_str()); - appendf("mov %s, rax\n", GetSlotAddr(value_slot).c_str()); - } - break; - case IR::OpType::ADD: - { - auto expr = reinterpret_cast(op); - auto lhs_slot = m_allocator->Resolve(GetTempAddr(expr->lhs())); - appendf("mov rax, %s\n", GetSlotAddr(lhs_slot).c_str()); - auto rhs_slot = m_allocator->Resolve(GetTempAddr(expr->rhs())); - appendf("add rax, %s\n", GetSlotAddr(rhs_slot).c_str()); - auto result_slot = m_allocator->Allocate(GetTempAddr(expr->result())); - appendf("mov %s, rax\n", GetSlotAddr(result_slot).c_str()); - } - break; - default: - appendf("; NOT HANDLED\n; %s\n", op->Format(0).c_str()); - break; - } - } - - void appendf(const char *fmt, ...) - { - assert(m_output != nullptr && "nowhere to write"); - va_list args; - va_start(args, fmt); - m_output->VAppendFormat(fmt, args); - va_end(args); - } - -public: - StringView GetOutput() { return m_output->view(); } - -private: - StringBuilder *m_output = nullptr; - -public: - void Generate(const char *filename, View ops) override - { - m_output = new StringBuilder(); - appendf("; fasm x86_64 linux generated assembly using pl\n"); - appendf("format ELF64\n"); - appendf("section '.text' executable\n"); - - for (auto &op : ops) - { - GenerateOp(op); - } - } - -public: - // StackAllocator* m_stack = nullptr; - // TODO: handle sub-blocks - SlotAllocator *m_allocator = nullptr; -}; diff --git a/include/codegen/slot.hpp b/include/codegen/slot.hpp deleted file mode 100644 index 8d6bd70..0000000 --- a/include/codegen/slot.hpp +++ /dev/null @@ -1,48 +0,0 @@ -#pragma once -#include "prelude/string.hpp" - -namespace IR -{ - -struct IRSlot -{ - enum class Type - { - UNKNOWN = 0, - STACK, - REGISTRY, - }; -public: - IRSlot(StringView addr, unsigned int slot, Type slotType) : m_addr(addr), m_slot(slot), m_slotType(slotType) {} - IRSlot() = default; -public: - Type GetType() const { return m_slotType; } - const StringView& GetAddr() const { return m_addr; } - unsigned int GetSlot() const { return m_slot; } -public: - StringView Format() const { - StringBuilder sb; - switch(GetType()) - { - case Type::REGISTRY: - sb << "r" << GetSlot(); - break; - case Type::STACK: - sb << "s[" << GetSlot() << "]"; - break; - default: - sb << "(UNKNOWN_SLOT_TYPE)"; - break; - } - return sb.view(); - } -private: - StringView m_addr; - unsigned int m_slot; - Type m_slotType; -}; - -using IRSlotBuilder = Builder; -using IRSlotView = View; - -} // namespace IR diff --git a/include/codegen/targets/fasm_x86_64_linux.hpp b/include/codegen/targets/fasm_x86_64_linux.hpp new file mode 100644 index 0000000..7df7402 --- /dev/null +++ b/include/codegen/targets/fasm_x86_64_linux.hpp @@ -0,0 +1,70 @@ +#pragma once +#include "codegen/codegen.hpp" +#include "ir/op.hpp" +#include "ir/ops.hpp" +#include "prelude/linkedlist.hpp" + +class FasmX86_64Generator : public CodeGenerator +{ +public: + FasmX86_64Generator() = default; + +public: + bool Generate(const IR::OpView* ops) override + { + output().Extend("format ELF64\n"); + output().Extend("section '.text' executable\n"); + + for (size_t i = 0; i < ops->size; ++i) + { + GenerateStatement(ops->data[i]); + } + + return true; + } + +private: + void GenerateExtern(IR::ExternOp* extrn) + { + // TODO: instead of __symbol().c_str(), extrn->symbol().c_str()); + output().AppendFormat("%s = PLT __%s\n", extrn->symbol().c_str(), extrn->symbol().c_str()); + } + + void GenerateFunction(IR::FnOp* fn) + { + output().AppendFormat("public %s\n", fn->name().c_str()); + output().AppendFormat("%s:\n", fn->name().c_str()); + output().Extend(" push rbp\n"); + output().Extend(" mov rbp, rsp\n"); + + for (auto cur = fn->body().ops().Begin(); cur != nullptr; cur = cur->next) + { + GenerateStatement(cur->value); + } + + output().Extend(" leave\n"); + output().Extend(" ret\n"); + } + + void GenerateCall(IR::CallOp* call) + { + output().AppendFormat(" call %s\n", call->callee().c_str()); + } + + void GenerateStatement(IR::Op* op) + { + switch(op->GetType()) + { + case IR::OpType::EXTERN: + return GenerateExtern(reinterpret_cast(op)); + case IR::OpType::FN: + return GenerateFunction(reinterpret_cast(op)); + case IR::OpType::CALL: + return GenerateCall(reinterpret_cast(op)); + // TODO: + default: output().AppendFormat(" ; %d not implemented\n", op->GetType()); + } + } +}; diff --git a/include/parser/lexer.hpp b/include/parser/lexer.hpp index 376f5ed..bfc654e 100644 --- a/include/parser/lexer.hpp +++ b/include/parser/lexer.hpp @@ -47,13 +47,13 @@ inline bool operator!=(char c, TokenType t) struct Token { - TokenType token; - long int_number; + TokenType token = TokenType::Unknown; + long int_number = 0; // null-terminated StringView string; - long line_number; - long offset_start; - long offset_end; + long line_number = 0; + long offset_start = 0; + long offset_end = 0; public: Token(TokenType t) : token(t) {} Token(TokenType t, long lnumber, long soffset, long eoffset) diff --git a/src/main.cpp b/src/main.cpp index cf882e9..dfcd7ae 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -2,6 +2,7 @@ #include #include #include +#include "codegen/targets/fasm_x86_64_linux.hpp" #include "ir/op.hpp" #include "ir/optimize.hpp" #include "parser/lexer.hpp" @@ -72,35 +73,16 @@ int main(int argc, char **argv) } } - StringBuilder sb; - for (size_t i = 0; i < ops.size; ++i) - { - sb.AppendFormat("%s\n", ops.data[i]->Format(0).c_str()); - } + FasmX86_64Generator gen; - printf("%s\n", sb.c_str()); + gen.Generate(&ops); - auto output = File::Open("example.ll", File::Mode::WRITE); - if (!output.Write(sb.view())) + auto output = File::Open("example.asm", File::Mode::WRITE); + if (!output.Write(gen.GetOutput())) { fprintf(stderr, "ERROR: Failed to write IR to a file"); } std::println("OK"); - // StackFasmX86_64Generator gen; - - // gen.Generate(filename, ops); - - // StringView output = gen.GetOutput(); - - // FILE *file = fopen("out.asm", "w"); - - // fwrite(output.c_str(), output.size, sizeof(char), file); - - // fclose(file); - - // system("fasm out.asm"); - // system("gcc -o out out.o"); - return 0; }