From 16cc06b7885f69f7eb991a9547f4c0412f3cd1c8 Mon Sep 17 00:00:00 2001 From: admin Date: Sun, 30 Nov 2025 22:04:35 +0100 Subject: [PATCH] feat: beta compiling to fasm (codegen) + hello world example let's goooo --- .vscode/settings.json | 75 ++++++++- CMakeLists.txt | 1 + example.rx | 14 +- hello.asm | 35 +++++ include/ast.hpp | 163 ++++++++++---------- include/codegen.hpp | 270 +++++++++++++++++++++++++++++++++ include/ir.hpp | 342 ++++++++++++++++++++++++++++++++++++++++++ include/lexer.hpp | 29 ++-- include/string.hpp | 278 +++++++++++++++++++++++++++++++--- src/ir.cpp | 9 ++ src/main.cpp | 21 ++- test | Bin 15360 -> 0 bytes test.asm | 39 ++++- test.o | Bin 608 -> 0 bytes 14 files changed, 1141 insertions(+), 135 deletions(-) create mode 100644 hello.asm create mode 100644 include/codegen.hpp create mode 100644 include/ir.hpp create mode 100644 src/ir.cpp delete mode 100755 test delete mode 100644 test.o diff --git a/.vscode/settings.json b/.vscode/settings.json index 47dba71..a0e55de 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,6 +1,79 @@ { "files.associations": { "print": "cpp", - "cctype": "cpp" + "cctype": "cpp", + "new": "cpp", + "format": "cpp", + "any": "cpp", + "array": "cpp", + "atomic": "cpp", + "bit": "cpp", + "bitset": "cpp", + "charconv": "cpp", + "chrono": "cpp", + "clocale": "cpp", + "cmath": "cpp", + "codecvt": "cpp", + "compare": "cpp", + "concepts": "cpp", + "condition_variable": "cpp", + "cstdarg": "cpp", + "cstddef": "cpp", + "cstdint": "cpp", + "cstdio": "cpp", + "cstdlib": "cpp", + "cstring": "cpp", + "ctime": "cpp", + "cwchar": "cpp", + "cwctype": "cpp", + "deque": "cpp", + "forward_list": "cpp", + "list": "cpp", + "map": "cpp", + "set": "cpp", + "string": "cpp", + "unordered_map": "cpp", + "unordered_set": "cpp", + "vector": "cpp", + "exception": "cpp", + "algorithm": "cpp", + "functional": "cpp", + "iterator": "cpp", + "memory": "cpp", + "memory_resource": "cpp", + "numeric": "cpp", + "optional": "cpp", + "random": "cpp", + "ratio": "cpp", + "source_location": "cpp", + "string_view": "cpp", + "system_error": "cpp", + "tuple": "cpp", + "type_traits": "cpp", + "utility": "cpp", + "fstream": "cpp", + "initializer_list": "cpp", + "iomanip": "cpp", + "iosfwd": "cpp", + "iostream": "cpp", + "istream": "cpp", + "limits": "cpp", + "mutex": "cpp", + "numbers": "cpp", + "ostream": "cpp", + "queue": "cpp", + "ranges": "cpp", + "semaphore": "cpp", + "span": "cpp", + "sstream": "cpp", + "stdexcept": "cpp", + "stdfloat": "cpp", + "stop_token": "cpp", + "streambuf": "cpp", + "text_encoding": "cpp", + "thread": "cpp", + "cinttypes": "cpp", + "typeinfo": "cpp", + "variant": "cpp" } } \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d0e055..431de19 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,7 @@ project(pl VERSION 0.0.1 LANGUAGES C CXX) set(CMAKE_CXX_STANDARD 23) set(SOURCES + src/ir.cpp src/main.cpp ) diff --git a/example.rx b/example.rx index 6b13e79..b062790 100644 --- a/example.rx +++ b/example.rx @@ -1,7 +1,15 @@ extern putchar fn main() { - local a = 34 - local b = 35 - putchar(a + b) + local h = 72 + local e = 69 + local l = 76 + local o = 79 + local nl = 10 + putchar(h) + putchar(e) + putchar(l) + putchar(l) + putchar(o) + putchar(nl) } \ No newline at end of file diff --git a/hello.asm b/hello.asm new file mode 100644 index 0000000..b3088ae --- /dev/null +++ b/hello.asm @@ -0,0 +1,35 @@ +format ELF64 + +section '.text' executable + +extrn 'putchar' as __putchar +putchar = PLT __putchar + +public main + +main: + ; allocate space for locals (a and b: 4 bytes each → 8 bytes total) + push rbp + mov rbp, rsp + sub rsp, 8 + + ; local a = 34 → stored at [rbp - 4] + mov dword [rbp - 4], 34 + + ; local b = 35 → stored at [rbp - 8] + mov dword [rbp - 8], 35 + + ; compute a + b + mov eax, [rbp - 4] + add eax, [rbp - 8] + + ; call putchar(a + b) + ; SysV: first integer arg → EDI + mov edi, eax + call putchar + + ; return 0 from main + mov eax, 0 + + leave + ret \ No newline at end of file diff --git a/include/ast.hpp b/include/ast.hpp index 588748a..b5e6b00 100644 --- a/include/ast.hpp +++ b/include/ast.hpp @@ -19,16 +19,15 @@ enum class NodeType COUNT_NODES, }; +#define NODE_TYPE(x) \ + NodeType GetType() const override { return NodeType::x; } + class Node { public: virtual NodeType GetType() const = 0; virtual ~Node() {} -}; - -#define NODE_TYPE(x) \ - NodeType GetType() const override { return NodeType::x; } - +}; class ExpressionNode : public Node { @@ -79,79 +78,15 @@ class ExternNode : public Node { public: // TODO: support multiple extern symbols - ExternNode(char* symbol) + ExternNode(StringView symbol) : m_symbol(symbol) {} - ~ExternNode() override { - delete m_symbol; - } + ~ExternNode() override = default; NODE_TYPE(Extern) -private: - char* m_symbol; -}; - -class FnDeclNode : public Node -{ public: - // TODO: support parameters - FnDeclNode(char* name, Node* body) - : m_name(name), m_body(body) {} - ~FnDeclNode() override { - delete m_name; - delete m_body; - } - - NODE_TYPE(FnDecl) + const StringView& symbol() const { return m_symbol; } private: - char* m_name; - Node* m_body; -}; - -class FnCallNode : public Node -{ -public: - // TODO: support multiple arguments - FnCallNode(char* name, Node* arg) - : m_name(name), m_arg(arg) {} - ~FnCallNode() override { - delete m_name; - delete m_arg; - } - - NODE_TYPE(FnCall) -private: - char* m_name; - Node* m_arg; -}; - -class VariableNode : public Node -{ -public: - VariableNode(char* name) - : m_name(name) {} - ~VariableNode() override { - delete m_name; - } - - NODE_TYPE(Variable) -private: - char* m_name; -}; - -class VarDeclNode : public Node -{ -public: - VarDeclNode(char* name, Node* value) - : m_name(name), m_value(value) {} - ~VarDeclNode() override { - delete m_name; - delete m_value; - } - - NODE_TYPE(VarDecl) -private: - char* m_name; - Node* m_value; + StringView m_symbol; }; class CompoundNode : public Node @@ -187,6 +122,77 @@ private: std::vector m_nodes; }; +class FnDeclNode : public Node +{ +public: + // TODO: support parameters + FnDeclNode(const StringView& name, CompoundNode* body) + : m_name(name), m_body(body) {} + ~FnDeclNode() override { + delete m_body; + } + + NODE_TYPE(FnDecl) +public: + const StringView& name() const { return m_name; } + const CompoundNode* body() const { return m_body; } +private: + StringView m_name; + CompoundNode* m_body; +}; + +class FnCallNode : public Node +{ +public: + // TODO: support multiple arguments + FnCallNode(const StringView& name, Node* arg) + : m_name(name), m_arg(arg) {} + ~FnCallNode() override { + delete m_arg; + } + + NODE_TYPE(FnCall) +public: + const StringView& name() const { return m_name; } + // TODO: support multiple args + const Node* arg() const { return m_arg; } +private: + StringView m_name; + Node* m_arg; +}; + +class VariableNode : public Node +{ +public: + VariableNode(const StringView& name) + : m_name(name) {} + ~VariableNode() override = default; + + NODE_TYPE(Variable) +public: + const StringView& name() const { return m_name; } +private: + StringView m_name; +}; + +class VarDeclNode : public Node +{ +public: + VarDeclNode(const StringView& name, Node* value) + : m_name(name), m_value(value) {} + ~VarDeclNode() override { + delete m_value; + } + + NODE_TYPE(VarDecl) +public: + const StringView& name() const { return m_name; } + const Node* value() const { return m_value; } +private: + StringView m_name; + Node* m_value; +}; + class ProgramNode : public Node { public: @@ -203,6 +209,9 @@ public: { m_externs.push_back(extrn); } +public: + const std::vector externs() const { return m_externs; } + const std::vector funcs() const { return m_funcs; } private: std::vector m_funcs; std::vector m_externs; @@ -225,7 +234,7 @@ public: { // Function Declaration m_lexer->NextExpect(TokenType::Id); - char *name = strdup(m_lexer->token().string); + StringView name = m_lexer->token().string; m_lexer->NextExpect('('); // TODO: parse parameters m_lexer->NextExpect(')'); @@ -239,7 +248,7 @@ public: return new FnDeclNode(name, compound); } - FnCallNode* ParseFnCall(char* name) + FnCallNode* ParseFnCall(const StringView& name) { // m_lexer->NextExpect(TokenType::Id); // char* name = strdup(m_lexer->token().string); @@ -264,7 +273,7 @@ public: case TokenType::Id: // variable name or function call { m_lexer->NextExpect(TokenType::Id); - char *name = strdup(m_lexer->token().string); + auto name = m_lexer->token().string; token = m_lexer->seek_token(); if (token->token == '(') { @@ -341,7 +350,7 @@ public: { m_lexer->NextExpect(TokenType::Local); m_lexer->NextExpect(TokenType::Id); - char *name = strdup(m_lexer->token().string); + auto name = m_lexer->token().string; m_lexer->NextExpect('='); Node* value = ParseExpression(); return new VarDeclNode(name, value); diff --git a/include/codegen.hpp b/include/codegen.hpp new file mode 100644 index 0000000..ed96b1b --- /dev/null +++ b/include/codegen.hpp @@ -0,0 +1,270 @@ +#pragma once +#include +#include "string.hpp" +#include "ir.hpp" + +template +class Allocator +{ +public: + virtual ~Allocator() {}; +public: + virtual const Slot& Allocate(const StringView& addr) = 0; + virtual const Slot& Resolve(const StringView& addr) = 0; +}; + +struct StackSlot +{ + size_t offset; + StringView addr; +}; + +class StackAllocator : public Allocator +{ +public: + StackAllocator() = default; + ~StackAllocator() override = default; +public: + const StackSlot& Allocate(const StringView& addr) + { + m_offset_counter += 4; + m_slots.Push(StackSlot { m_offset_counter, addr }); + return m_slots.data[m_slots.size - 1]; + } + + const StackSlot& Resolve(const StringView& addr) + { + for (size_t i = 0; i < m_slots.size; ++i) + { + if (strcmp(m_slots.data[i].addr.c_str(), addr.c_str()) == 0) + { + return m_slots.data[i]; + } + } + + assert(0 && "could not resolve stack offset for specified address"); + } +private: + size_t m_offset_counter = 0; + Builder m_slots; +}; + +struct ConstSlot +{ + long value; + StringView addr; +}; + +class ConstAllocator : public Allocator +{ +public: + ConstAllocator() = default; + ~ConstAllocator() override = default; +public: + const ConstSlot& Allocate(const StringView& addr) + { + m_slots.Push(ConstSlot { 0, addr }); + return m_slots.data[m_slots.size - 1]; + } + + const ConstSlot& StoreValue(const StringView& addr, long value) + { + for (size_t i = 0; i < m_slots.size; ++i) + { + if (strcmp(m_slots.data[i].addr.c_str(), addr.c_str()) == 0) + { + m_slots.data[i].value = value; + return m_slots.data[i]; + } + } + + assert(0 && "could not resolve const under specified address"); + } + + const ConstSlot& Resolve(const StringView& addr) + { + for (size_t i = 0; i < m_slots.size; ++i) + { + if (strcmp(m_slots.data[i].addr.c_str(), addr.c_str()) == 0) + { + return m_slots.data[i]; + } + } + + assert(0 && "could not resolve const under specified address"); + } +private: + Builder m_slots; +}; + +struct RegisterSlot +{ + const StringView& reg; + StringView addr; +}; + +class RegisterAllocator : public Allocator +{ +public: + RegisterAllocator() + { + m_regs.Push(std::move(StringView("eax"))); + m_regs.Push(std::move(StringView("ecx"))); + } + ~RegisterAllocator() override = default; +public: + const RegisterSlot& Allocate(const StringView& addr) + { + assert(m_slots.size < m_regs.size && "no space available for allocating to register"); + m_slots.Push(RegisterSlot { m_regs.data[m_slots.size], addr }); + return m_slots.data[m_slots.size - 1]; + } + + const RegisterSlot& Resolve(const StringView& addr) + { + for (size_t i = 0; i < m_slots.size; ++i) + { + if (strcmp(m_slots.data[i].addr.c_str(), addr.c_str()) == 0) + { + return m_slots.data[i]; + } + } + + assert(0 && "could not resolve const under specified address"); + } + + void Clear() + { + m_slots.size = 0; + } +private: + Builder m_slots; + Builder m_regs; +}; + +class CodeGenerator +{ +public: + virtual ~CodeGenerator() {}; + + virtual void Generate(const char* filename, View ops) = 0; +}; + +class StackFasmX86_64Generator : public CodeGenerator +{ +public: + ~StackFasmX86_64Generator() override = default; +private: + int GetStackSize(const IR::OpView ops) + { + int stackSize = 0; + for (auto &op : ops) + { + if (op->GetType() == IR::OpType::STORE) stackSize += 4; + } + return stackSize; + } + + StringView GetTempAddr(IR::Reg reg) + { + return std::move((StringBuilder() << 't' << reg).view()); + } +private: + void GenerateOp(const IR::Op *op) + { + switch (op->GetType()) + { + case IR::OpType::EXTERN: + { + auto extrn = reinterpret_cast(op); + auto symbol = extrn->symbol(); + printf("extrn '%s' as __%s\n", symbol.c_str(), symbol.c_str()); + printf("%s = PLT __%s\n", symbol.c_str(), symbol.c_str()); + } + break; + case IR::OpType::FN: + { + auto fn = reinterpret_cast(op); + auto name = fn->name(); + printf("public %s\n", name.c_str()); + printf("%s:\n", name.c_str()); + printf("push rbp\n"); + printf("mov rbp, rsp\n"); + int stackSize = GetStackSize(fn->ops()); + printf("sub rsp, %d\n", stackSize); + for(auto &fOp : fn->ops()) + { + GenerateOp(fOp); + } + printf("leave\nret\n"); + } + break; + case IR::OpType::CALL: + { + auto call = reinterpret_cast(op); + // TODO: support several arguments + if (call->args().size == 1) + { + auto reg_slot = m_registers.Resolve(GetTempAddr(call->args().data[0])); + printf("mov edi, %s\n", reg_slot.reg.c_str()); + } + printf("call %s\n", call->callee().c_str()); + m_registers.Clear(); + } + break; + case IR::OpType::LOAD_CONST: + { + auto lc = reinterpret_cast(op); + auto addr = GetTempAddr(lc->result()); + m_consts.Allocate(addr); + m_consts.StoreValue(addr, lc->value()); + } + break; + case IR::OpType::STORE: + { + auto s = reinterpret_cast(op); + printf("; DEBUG: resolving stack slot at %s\n", s->addr().c_str()); + auto slot = m_stack.Allocate(s->addr()); + auto value = m_consts.Resolve(GetTempAddr(s->src())); + printf("mov dword [rbp-%d], %ld\n", slot.offset, value.value); + } + break; + case IR::OpType::LOAD: + { + auto l = reinterpret_cast(op); + auto reg_slot = m_registers.Allocate(GetTempAddr(l->result())); + auto stack_slot = m_stack.Resolve(l->addr()); + printf("mov %s, [rbp-%d]\n", reg_slot.reg.c_str(), stack_slot.offset); + } + break; + case IR::OpType::ADD: + { + auto expr = reinterpret_cast(op); + auto lhs_slot = m_registers.Resolve(GetTempAddr(expr->lhs())); + auto rhs_slot = m_registers.Resolve(GetTempAddr(expr->rhs())); + printf("add %s, %s\n", lhs_slot.reg.c_str(), rhs_slot.reg.c_str()); + m_registers.Clear(); + m_registers.Allocate(GetTempAddr(expr->result())); + } + break; + default: printf("; NOT HANDLED\n; %s\n", op->Format(0).c_str()); break; + } + } +public: + void Generate(const char* filename, View ops) override + { + printf("; fasm x86_64 linux generated assembly using pl\n"); + printf("format ELF64\n"); + printf("section '.text' executable\n"); + + for (auto& op : ops) + { + GenerateOp(op); + } + } +public: + // TODO: handle sub-blocks + StackAllocator m_stack; + ConstAllocator m_consts; + RegisterAllocator m_registers; +}; \ No newline at end of file diff --git a/include/ir.hpp b/include/ir.hpp new file mode 100644 index 0000000..150bad0 --- /dev/null +++ b/include/ir.hpp @@ -0,0 +1,342 @@ +#pragma once +#include "string.hpp" +#include "ast.hpp" + +namespace IR +{ + +enum class OpType +{ + EXTERN = 0, + FN, + LOAD_CONST, + LOAD, + STORE, + ADD, + CALL, + COUNT_OPS, +}; + +#define OP_TYPE(x) \ + OpType GetType() const override { return OpType::x; } + +using Reg = int; + +using RegBuilder = Builder; +using RegView = View; + +class Op +{ +public: + virtual OpType GetType() const = 0; + virtual ~Op() {} + + virtual StringView Format(int indent) const = 0; +}; + +using OpView = View; +using OpBuilder = Builder; + +class Valued +{ +public: + Valued(Reg dest) + : m_dest(dest) {} + ~Valued() = default; +public: + Reg result() const { return m_dest; } +private: + Reg m_dest; +}; + +class ExternOp : public Op +{ +public: + ExternOp(StringView symbol) + : m_symbol(symbol) {} + ~ExternOp() {} + + OP_TYPE(EXTERN) +public: + StringView Format(int indent) const override + { + StringBuilder sb; + sb.AppendIndent(indent); + sb << "EXTRN " << m_symbol.c_str(); + return sb.view(); + } +public: + const StringView& symbol() const { return m_symbol; } +private: + StringView m_symbol; +}; + +class FnOp : public Op +{ +public: + FnOp(StringView name, const CompoundNode* body); + ~FnOp() {} + + OP_TYPE(FN) +public: + StringView Format(int indent) const override + { + StringBuilder sb; + sb.AppendIndent(indent); + sb << "LABEL " << m_name.c_str() << ':' << '\n'; + for (size_t i = 0; i < m_ops.size; ++i) + { + sb << m_ops.data[i]->Format(indent + 2) << '\n'; + } + return sb.view(); + } +public: + const StringView& name() const { return m_name; } + const OpView& ops() const { return m_ops; } +private: + StringView m_name; + OpView m_ops; +}; + +class LoadConstOp : public Op, public Valued +{ +public: + LoadConstOp(Reg dest, long value) + : Valued(dest), m_value(value) {} + ~LoadConstOp() {} + + OP_TYPE(LOAD_CONST) +public: + StringView Format(int indent) const override + { + StringBuilder sb; + sb.AppendIndent(indent); + sb << 't' << result() << " = LOAD_CONST " << m_value; + return sb.view(); + } +public: + long value() const { return m_value; } +private: + long m_value; +}; + +class LoadOp : public Op, public Valued +{ +public: + LoadOp(Reg dest, StringView addr) + : Valued(dest), m_addr(addr) {} + ~LoadOp() {} + + OP_TYPE(LOAD) +public: + StringView Format(int indent) const override + { + StringBuilder sb; + sb.AppendIndent(indent); + sb << 't' << result() << " = LOAD \"" << m_addr.c_str() << "\""; + return sb.view(); + } +public: + const StringView& addr() const { return m_addr; } +private: + StringView m_addr; +}; + +class StoreOp : public Op +{ +public: + StoreOp(StringView addr, Reg src) + : m_addr(addr), m_src(src) {} + ~StoreOp() {} + + OP_TYPE(STORE) +public: + StringView Format(int indent) const override + { + StringBuilder sb; + sb.AppendIndent(indent); + sb << "STORE \"" << m_addr.c_str() << "\", t" << m_src; + return sb.view(); + } +public: + const StringView& addr() const { return m_addr; } + Reg src() const { return m_src; } +private: + StringView m_addr; + Reg m_src; +}; + +class AddOp : public Op, public Valued +{ +public: + AddOp(Reg dest, Reg lhs, Reg rhs) + : Valued(dest), m_lhs(lhs), m_rhs(rhs) {} + ~AddOp() {} + + OP_TYPE(ADD) +public: + StringView Format(int indent) const override + { + StringBuilder sb; + sb.AppendIndent(indent); + sb << 't' << result() << " = ADD t" << m_lhs << ", t" << m_rhs; + return sb.view(); + } +public: + Reg lhs() const { return m_lhs; } + Reg rhs() const { return m_rhs; } +private: + Reg m_lhs; + Reg m_rhs; +}; + +class CallOp : public Op, public Valued +{ +public: + CallOp(Reg dest, StringView callee, RegView args) + : Valued(dest), m_callee(callee), m_args(args) {} + ~CallOp() {} + + OP_TYPE(CALL) +public: + StringView Format(int indent) const override + { + StringBuilder sb; + for (size_t i = 0; i < m_args.size; ++i) + { + sb.AppendIndent(indent); + sb << "PARAM t" << m_args.data[i] << '\n'; + } + sb.AppendIndent(indent); + sb << 't' << result() << " = CALL " << m_callee.c_str(); + return sb.view(); + } +public: + const StringView& callee() const { return m_callee; } + const RegView& args() const { return m_args; } +private: + StringView m_callee; + RegView m_args; +}; + +class IRBuilder +{ +public: + IRBuilder(const Node* root) + : m_root(root) {} +public: + // TODO: support other literals + Reg ParseIntLiteral(const IntLiteralNode* literal) + { + auto dst = AllocateRegister(); + m_ops.Push(new LoadConstOp(dst, literal->integer())); + return dst; + } + + Reg ParseVariable(const VariableNode* var) + { + auto dst = AllocateRegister(); + m_ops.Push(new LoadOp(dst, var->name())); + return dst; + } + + Reg ParseFnCall(const FnCallNode* fn) + { + // TODO: support multiple args + auto arg = ParseExpression(fn->arg()); + auto argRegs = RegBuilder(); + argRegs.Push(arg); + auto dst = AllocateRegister(); + m_ops.Push(new CallOp(dst, fn->name(), RegView(argRegs.data, argRegs.size))); + return dst; + } + + Reg ParseFactor(const Node* factor) + { + switch(factor->GetType()) + { + case NodeType::IntLiteral: return ParseIntLiteral(reinterpret_cast(factor)); + case NodeType::Variable: return ParseVariable(reinterpret_cast(factor)); + case NodeType::FnCall: return ParseFnCall(reinterpret_cast(factor)); + default: assert(0 && "some factor may not be handled"); break; + } + + assert(0 && "unreachable"); + return -1; + } + + Reg ParseExpression(const Node* expression) + { + if (expression->GetType() == NodeType::Expression) + { + auto expr = reinterpret_cast(expression); + auto lhs = ParseExpression(expr->left()); + auto rhs = ParseExpression(expr->right()); + auto dst = AllocateRegister(); + + assert(4 == static_cast(ExpressionNode::Operator::COUNT_OPERATORS) && "some operators may not be handled"); + switch (expr->op()) + { + case ExpressionNode::Operator::Plus: m_ops.Push(new AddOp(dst, lhs, rhs)); break; + default: assert(0 && "TODO: implement other operations"); break; + } + + return dst; + } + + return ParseFactor(expression); + } + + void ParseVarDecl(const VarDeclNode* varDecl) + { + auto value = ParseExpression(varDecl->value()); + m_ops.Push(new StoreOp(varDecl->name(), value)); + } + + void ParseBlock(const CompoundNode* compound) + { + for (auto &statement : *compound) + { + switch(statement->GetType()) + { + case NodeType::VarDecl: ParseVarDecl(reinterpret_cast(statement)); continue; + default: ParseExpression(statement); continue; + } + } + } + + OpView Build() + { + assert(m_root->GetType() == NodeType::Program && "root should be a program"); + auto program = reinterpret_cast(m_root); + + // Externs + for (auto &extrn : program->externs()) + { + m_ops.Push(new ExternOp(extrn->symbol())); + } + + // Functions + for (auto &fn : program->funcs()) + { + m_ops.Push(new FnOp(fn->name(), fn->body())); + } + + return OpView(m_ops.data, m_ops.size); + } +public: + // TODO: think about safety (copying m_ops.data before giving) + OpView ops() const { return OpView(m_ops.data, m_ops.size); } +private: + Reg AllocateRegister() + { + return m_reg_counter++; + } +private: + OpBuilder m_ops; + const Node* m_root = nullptr; + + Reg m_reg_counter = 0; +}; + +} // namespace IR diff --git a/include/lexer.hpp b/include/lexer.hpp index 5c618c5..1448968 100644 --- a/include/lexer.hpp +++ b/include/lexer.hpp @@ -51,7 +51,7 @@ struct Token TokenType token; long int_number; // null-terminated - char* string; + StringView string; long line_number; long offset_start; long offset_end; @@ -90,15 +90,10 @@ public: : m_filename(filename), m_code(code) {} Lexer(const Lexer&) = delete; - Lexer(Lexer&& other) - { - m_code = other.m_code; - other.m_code = StringView(); - } public: bool NextToken() { - if (m_pos >= m_code.size) + if (m_pos >= m_code.size || m_code.data[m_pos] == '\0') { m_token = Token(TokenType::Eof); return false; @@ -119,27 +114,27 @@ public: { StringBuilder s; long offset_start = m_pos - m_last_newline; - s.PushChar(c); + s.Push(c); // id while (std::isalpha(m_code.data[m_pos]) != 0) { - s.PushChar(m_code.data[m_pos++]); + s.Push(m_code.data[m_pos++]); } - s.PushChar('\0'); + s.Push('\0'); m_token = Token(TokenType::Id, m_line, offset_start, m_pos - m_last_newline); - m_token.string = s.data; + m_token.string = s.view(); - if (strcmp("extern", m_token.string) == 0) + if (strcmp("extern", m_token.string.c_str()) == 0) { m_token.token = TokenType::Extern; } - if (strcmp("fn", m_token.string) == 0) + if (strcmp("fn", m_token.string.c_str()) == 0) { m_token.token = TokenType::Fn; } - if (strcmp("local", m_token.string) == 0) + if (strcmp("local", m_token.string.c_str()) == 0) { m_token.token = TokenType::Local; } @@ -152,13 +147,13 @@ public: StringBuilder s; long offset_start = m_pos - m_last_newline; bool hex = c == '0' && m_code.data[m_pos] == 'x'; - s.PushChar(c); + s.Push(c); // integer (could be hex) while (std::isdigit(m_code.data[m_pos]) != 0 || (hex && std::isalpha(m_code.data[m_pos]) != 0)) { - s.PushChar(m_code.data[m_pos++]); + s.Push(m_code.data[m_pos++]); } - s.PushChar('\0'); + s.Push('\0'); m_token = Token(TokenType::IntLiteral, m_line, offset_start, m_pos - m_last_newline); m_token.int_number = std::strtol(s.data, nullptr, hex ? 16 : 10); m_token.string = s.data; diff --git a/include/string.hpp b/include/string.hpp index 7cc7ed6..738f709 100644 --- a/include/string.hpp +++ b/include/string.hpp @@ -1,47 +1,275 @@ #pragma once #include +#include +#include +#include +#include +#include -struct StringView +template +struct View { size_t size; - const char* data; + T* data; // owns its memory + public: - StringView() + View() : size(0), data(nullptr) {} + + // Deep copy constructor + View(const T* src, size_t count) : size(count) { - data = nullptr; + if (count == 0) { data = nullptr; return; } + + data = static_cast(::operator new[](count * sizeof(T))); + for (size_t i = 0; i < count; ++i) + new (&data[i]) T(src[i]); // copy-construct + } + + // Copy constructor + View(const View& other) : View(other.data, other.size) {} + + // Move constructor + View(View&& other) noexcept : size(other.size), data(other.data) + { + other.size = 0; + other.data = nullptr; + } + + View &operator=(const View &other) + { + if (this != &other) + { + // free old memory + for (size_t i = 0; i < size; ++i) + data[i].~T(); + ::operator delete[](data); + + // deep copy + size = other.size; + data = static_cast(::operator new[](size * sizeof(T))); + for (size_t i = 0; i < size; ++i) + new (&data[i]) T(other.data[i]); + } + return *this; + } + + // Destructor + ~View() + { + for (size_t i = 0; i < size; ++i) + data[i].~T(); + ::operator delete[](data); + } + + const T* begin() const { return data; } + const T* end() const { return data + size; } +}; + + +// using StringView = View; + +class StringView final : public View +{ +public: + StringView() : View() {} + + // Construct from C-string — deep copy, including null terminator + StringView(const char* s) + : View(s, strlen(s) + 1) {} + + // Construct from View — ensure null termination + StringView(const View& v) + : View(v) + { + if (size == 0 || data[size - 1] != '\0') + { + // Reallocate and append null terminator + size_t newSize = size + 1; + char* newData = static_cast(::operator new[](newSize * sizeof(char))); + + for (size_t i = 0; i < size; ++i) + newData[i] = data[i]; + + newData[newSize - 1] = '\0'; + + // destroy old + // for (size_t i = 0; i < size; ++i) + // data[i].~char(); + ::operator delete[](data); + + data = newData; + size = newSize; + } + } + + const char* c_str() const { return data; } +}; + +template +struct Builder +{ + size_t size = 0; + size_t capacity = 8; + T* data; + +public: + Builder() + { + data = static_cast(::operator new[](capacity * sizeof(T))); + } + + ~Builder() + { + clear(); + ::operator delete[](data); + } + +protected: + void grow(size_t newSize) + { + if (newSize <= capacity) return; + + size_t newCap = capacity; + while (newCap < newSize) + newCap += newCap / 2; + + T* newData = static_cast(::operator new[](newCap * sizeof(T))); + + // Move-construct into the new buffer + for (size_t i = 0; i < size; ++i) + new (&newData[i]) T(std::move(data[i])); + + // Destroy old items + for (size_t i = 0; i < size; ++i) + data[i].~T(); + + ::operator delete[](data); + + data = newData; + capacity = newCap; + } + +public: + void Push(const T& value) + { + grow(size + 1); + new (&data[size]) T(value); + size++; + } + + void Push(T&& value) + { + grow(size + 1); + new (&data[size]) T(std::move(value)); + size++; + } + + // Clear Builder storage but keep capacity + void clear() + { + for (size_t i = 0; i < size; ++i) + data[i].~T(); size = 0; } - StringView(const char* data, size_t size) + // ALWAYS produce a deep-copied View + View view() const { - this->data = data; - this->size = size; + return View(data, size); } }; -struct StringBuilder +// using StringBuilder = Builder; + +class StringBuilder final : public Builder { - size_t size; - size_t capacity; - char* data; public: - StringBuilder() + StringBuilder() : Builder() {} + + // Ensure there is room for `n` more characters (not counting terminator) + void ensure_extra(size_t n) { - size = 0; - capacity = 10; - data = (char*)malloc(capacity * sizeof(char)); + // grow size + n (but not including terminator) + this->grow(this->size + n); } -private: - void ensureSize(size_t newSize) + + // Append raw C string (WITHOUT copying terminator) + void Extend(const char* str) { - if (newSize <= capacity) return; - capacity = capacity + (capacity / 2); - data = (char*)realloc(data, capacity * sizeof(char)); + if (!str) return; + + size_t len = strlen(str); + ensure_extra(len); + + for (size_t i = 0; i < len; ++i) + this->Push(str[i]); } -public: - void PushChar(char c) + + // Append a single char + void Append(char c) { - ensureSize(size + 1); - data[size++] = c; + this->Push(c); } -}; \ No newline at end of file + + void AppendIndent(int indent) { + grow(size + indent); + memset(data + size, ' ', indent); + size += indent; + } + + // Return a null-terminated string pointer owned by builder + const char* c_str() + { + // ensure space for terminator + this->grow(this->size + 1); + + // add terminator (overwrite or place it at end) + if (this->size == 0 || this->data[this->size - 1] != '\0') + { + // If already ended with \0, fine + this->Push('\0'); + } + return this->data; + } + + // Produce a deep-copied StringView + StringView view() + { + return StringView(this->c_str()); + } + + // streaming operators + StringBuilder& operator<<(const char* s) + { + Extend(s); + return *this; + } + + StringBuilder& operator<<(const StringView& sv) + { + Extend(sv.c_str()); + return *this; + } + + StringBuilder& operator<<(char c) + { + Append(c); + return *this; + } + + StringBuilder& operator<<(int v) + { + char buf[32]; + snprintf(buf, sizeof(buf), "%d", v); + Extend(buf); + return *this; + } + + StringBuilder& operator<<(long v) + { + char buf[32]; + snprintf(buf, sizeof(buf), "%ld", v); + Extend(buf); + return *this; + } +}; diff --git a/src/ir.cpp b/src/ir.cpp new file mode 100644 index 0000000..6e7ba8a --- /dev/null +++ b/src/ir.cpp @@ -0,0 +1,9 @@ +#include "ir.hpp" + +IR::FnOp::FnOp(StringView name, const CompoundNode* body) + : m_name(name) +{ + IRBuilder ir(body); // Now IRBuilder is complete → OK + ir.ParseBlock(body); + m_ops = ir.ops(); +} \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index af320c7..309529a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -3,6 +3,8 @@ #include #include "lexer.hpp" #include "ast.hpp" +#include "ir.hpp" +#include "codegen.hpp" void dump_tokens(const char* filename, Lexer* lexer) { @@ -19,9 +21,6 @@ void dump_tokens(const char* filename, Lexer* lexer) int main(int argc, char** argv) { - for (int i = 0; i < argc; ++i) { - std::println("arg#{}: {}", i, argv[i]); - } char* filename; if (argc > 1) { filename = (++argv)[0]; @@ -40,11 +39,25 @@ int main(int argc, char** argv) f.close(); - Lexer lexer(filename, StringView(content.c_str(), content.size())); + Lexer lexer(filename, StringView(content.c_str())); AstParser parser(&lexer); auto program = parser.Parse(); + IR::IRBuilder irBuilder(program); + + auto ops = irBuilder.Build(); + + // printf("\n"); + // for (size_t i = 0; i < ops.size; ++i) + // { + // printf("%s\n", ops.data[i]->Format(0).c_str()); + // } + + StackFasmX86_64Generator gen; + + gen.Generate(filename, ops); + return 0; } diff --git a/test b/test deleted file mode 100755 index c35eebf8936f981858b6d562ffd694df12431550..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 15360 zcmeHOU2Ggz6~4Q6$d=YWwHoRsX)~%))K>DuKd}`kZr1-}a1xWa5~ym2wRdc9W$&8Z zad9G{S`7%SDo_MMv@ca3kl+ES4+vG_fsI0&wBo@P5;EceQ?+VZ{vx%YmP#4UxpThp z_+keT5(w$u$oISF-1FUg=FFYBvzaqbj7*GXGZ}@_pdM5#ETXCqOVxpt9Tl0Z;=FPCkZPP!+9${>eec5w;qSb z!g$pDmkQgr?O=J+F8a@q-r2!^ZWsOIyXX&){_Y*dQA^9=%zUA$e4mbX#;=8iYUnQ( zN@dc_tQ7p&Qn|2DIu|JJ=40j0b2MpoyS>UEpE}_egK98Ws)a#y>cr4O zr5sEZrWXPlYi_YpmeKv#bVoy_vUE-|`X8U#49WKsVmN+)S&n_$hzhIB??_u z9j7)IT;Eu)owvsN4Y_`y-;@Dc^Kn1)kn1uauUT1Q+VDzBFPEy;uc!1_^W3qK((C)4 zwx;_^O26&8$j1hI3I5%+B|}@V5wH=k5wH=k5wH=k5wH>XKZwA)EqDFRJO78A_tc(? zN0jouS`V|)x_AEPxl7vnsP|P;NB8`K>Xt)!YOy@eeQ$3@QN;YNKaQ`x_Nce^x_ADK zYm-wWUG=V4ybJxmBb#X7!({&6tlM(vD|$TAKTjI(!k%XtKJk9IkEVW*+cfn}(Pqn` zRW`gNNcLaU_PviW+;`nuyXIZ|({b^W$(4?;cYZPlMIlHHfLK74R7(;zt#Q@ z>Q>7=4|(VN-*Ktwt-TpGc~A8>lRnzFL8_}WbTBXPX(UDFkxN_tk{@|ByCw5@Z_kk} z{JVU4ga*9&#`P%jE?@pAw>NaVPEBUj1gvp(gXL{5guat(sd$o7+F#a`TsRtLpgv2k!4VtX~K6yDBE|yC!}Q z%7u0OXN!%1jew1Sjew1Sjew1Sjew1Sjew1Sjle&T0P+rzXNX)r&Sm6sOAJY3W0S~V zkbK2EL`FU%=WlXBp5p#kRLE;YzAgIyay_cB$qSOCh}^>!`C*5g!*%KB7p7dEjzQfl z$iGLq?293(k^6X6@)(gL348orFRa6 z-7crQv%9yetE(H9J0r&-LubvLFFwz$s?pibyHnfWu5dol+e3{u2q_%fMPDSnld?ne zn_Azla(QTCTqS6wv)rDKWz=7zMq9R7;d+4n0yWs)9OqfmJ1|P&52<}?wp}5f==urk zRZBv`|98AZ`c`#^a`Lc@@!JG#sww#%L;qh3>0YHjwm1F%73}4DCQ+B)P|g0B_HU8i z(H7e){v<^IN6mJ%I!OAqEM29>x>5e{=T8ot7$2gPYg#q_i06-<9yl@Ld&8%dKQ?h{ zaA3keH9C4`WXhi!7@QdKkpZ4ZTDQ*SHnYK#UP}7DVWM|XEQAHcY2zhwfrU8h`yW!k z{bH@+&lk!?P7fbHMOMX9*&Mdb%&s{H9%O}LVePATR7$jM>|$7$CJd`F&ZAYXgn>I(UUrwNm8GB>o>gw5nqDr^mjI=ra!X}OZ(mZ{aK2ER zS8nlanFft9tj0Z$2i01sQr;%`)K?7_3QUmJ(n821l4EWdtWd2Z=T<8^j&3mT&sGbI zfj?g)E+lblU~s%6EX>7xzfi3f$YJKAP16(>OEcs{g+`{z)zig8Q+Sm-Q(0UL$`qCV zu<-uG_jK$7_>KhV7~|u94>e~4rTI&8qXr&^p&+hTs0XM)mqarEZr?G%o^8`*oP@v9 z9s&PfkokewUu8Q<_F;^1h8nzQg1;_&;19%*w?Cd6$B+H(VbU=LeC*Re?8{`Li|PLi zX?YI>ANwA#S$OatcEBa-=RFlN_Elh~@J;_kt)8F;pA+!0uLJ91i22RqUlabI*x@+= zh&VBypCwE_-=8@q@_4azUzGbJo}b7>7jyl5gS2VBBfJEK2axXM_WO*%&x--jF-{ox z81FfQk9`k_=R??={(oQi$zJVP(;vV4V zVlk<4{-Ah?8tLnQx9`|sZ_fWq>fkjlmtFE55WX8T=YtIVEph2-AmQV=dZUAsRA3iN zTQc;&rw(3wm>~r`3*WToxzeRc<`?s%3w-nX1wV~gdkL8u v$Kbzo@;c_V1AlN|;olqZotjDUJfjWMqH=Mg}_u1P><4z@UI6=l~XWU|?lnVemM9+!e@u0TKeTxe)9FAfdw? zAQA{zfS4JG|NMsl5s)Adpo>F9F-b9?5Hl8+$pFQLu!t)H#RV{pVPMcJuFNe-Oajs+ zMPRxpH78N8B(_Y|s D