feat: implement proper IR with value handles with dynamic and transferable types

This commit is contained in:
2026-01-01 15:50:26 +01:00
parent 3b8dfc4dae
commit 6f4ab269e2
8 changed files with 621 additions and 375 deletions

View File

@@ -18,3 +18,11 @@
- Linear scan allocator with ~6 fake regs - Linear scan allocator with ~6 fake regs
- Spill to stack slots - Spill to stack slots
- Add “CALL clobbers regs” rule - Add “CALL clobbers regs” rule
function's reserved registers:
EAX, ECX, EDX
return registers:
int => EAX
float => ST0

View File

@@ -2,17 +2,17 @@ extern putchar
fn hello() { fn hello() {
local h = 72 local h = 72
local e = 69
local l = 76
local o = 79
putchar(h) putchar(h)
local e = h - 3
putchar(e) putchar(e)
local l = h + 4
putchar(l) putchar(l)
putchar(l) putchar(l)
local o = 100 - 21
putchar(o) putchar(o)
} }
fn main() { fn main() {
hello() hello()
putchar(9 + 1) putchar(3 * 3 + 1)
} }

View File

@@ -10,9 +10,9 @@ public:
~StackFasmX86_64Generator() override = default; ~StackFasmX86_64Generator() override = default;
private: private:
StringView GetTempAddr(IR::Value reg) StringView GetTempAddr(IR::ValueHandle *reg)
{ {
return std::move((StringBuilder() << reg.Format()).view()); return std::move((StringBuilder() << reg->Format()).view());
} }
StringView GetSlotAddr(const IR::IRSlot &slot) StringView GetSlotAddr(const IR::IRSlot &slot)
@@ -101,18 +101,17 @@ private:
appendf("mov %s, rax\n", GetSlotAddr(result_slot).c_str()); appendf("mov %s, rax\n", GetSlotAddr(result_slot).c_str());
} }
break; break;
case IR::OpType::LOAD_CONST: // case IR::OpType::LOAD_CONST:
{ // {
auto lc = reinterpret_cast<const IR::LoadConstOp *>(op); // auto lc = reinterpret_cast<const IR::LoadConstOp *>(op);
auto addr = GetTempAddr(lc->result()); // auto addr = GetTempAddr(lc->result());
auto slot = m_allocator->Allocate(addr); // auto slot = m_allocator->Allocate(addr);
appendf("mov %s, %ld\n", GetSlotAddr(slot).c_str(), lc->value()); // appendf("mov %s, %ld\n", GetSlotAddr(slot).c_str(), lc->value());
} // }
break; // break;
case IR::OpType::STORE: case IR::OpType::STORE:
{ {
auto s = reinterpret_cast<const IR::StoreOp *>(op); auto s = reinterpret_cast<const IR::StoreOp *>(op);
appendf("; DEBUG: resolving stack slot at %s\n", s->addr().c_str());
auto slot = m_allocator->Allocate(s->addr()); auto slot = m_allocator->Allocate(s->addr());
auto value_slot = m_allocator->Resolve(GetTempAddr(s->src())); auto value_slot = m_allocator->Resolve(GetTempAddr(s->src()));
appendf("mov rax, %s\n", GetSlotAddr(value_slot).c_str()); appendf("mov rax, %s\n", GetSlotAddr(value_slot).c_str());

View File

@@ -15,28 +15,38 @@ class IRBuilder
public: public:
IRBuilder(const Node *root) IRBuilder(const Node *root)
: m_root(root), m_ops(new OpBuilder()) {} : m_root(root), m_ops(new OpBuilder()) {}
public: public:
// TODO: support other literals // TODO: support other literals
Value ParseIntLiteral(const IntLiteralNode* literal) ValueHandle *ParseIntLiteral(const IntLiteralNode *literal)
{ {
auto dst = AllocateRegister(); auto dst = AllocateUnnamed<ConstantInt>(literal->integer());
m_ops->Push(new LoadConstOp(dst, literal->integer()));
return dst; return dst;
} }
Value ParseVariable(const VariableNode* var) void ParseVarDecl(const VarDeclNode *varDecl)
{ {
// auto dst = AllocateRegister(); auto value = ParseExpression(varDecl->value());
// TODO: gather type information from var decl signature, aka local <int> v = 0;
auto dst = AllocateNamed<Pointer>();
m_ops->Push(new AllocateOp(dst, value->GetType()));
m_ops->Push(new StoreOp(value, reinterpret_cast<Pointer *>(dst)));
m_locals.insert(std::make_pair(varDecl->name(), reinterpret_cast<Pointer *>(dst)));
}
ValueHandle *ParseVariable(const VariableNode *var)
{
// auto dst = AllocateValue();
// m_ops->Push(new LoadOp(dst, var->name())); // m_ops->Push(new LoadOp(dst, var->name()));
if (m_locals.find(var->name()) == m_locals.end()) if (m_locals.find(var->name()) == m_locals.end())
{ {
// TODO: throw proper error // TODO: throw proper error
assert(0 && "ERROR: variable does not exist"); assert(0 && "ERROR: variable does not exist");
} }
return m_locals[var->name()]; return reinterpret_cast<ValueHandle *>(m_locals[var->name()]);
} }
Value ParseFnCall(const FnCallNode* fn) ValueHandle *ParseFnCall(const FnCallNode *fn)
{ {
// TODO: support multiple args // TODO: support multiple args
auto argRegs = ValueBuilder(); auto argRegs = ValueBuilder();
@@ -45,39 +55,58 @@ public:
auto arg = ParseExpression(fn->arg()); auto arg = ParseExpression(fn->arg());
argRegs.Push(arg); argRegs.Push(arg);
} }
auto dst = AllocateRegister(); // TODO: gather return type of the function
auto dst = AllocateUnnamed<Void>();
m_ops->Push(new CallOp(dst, fn->name(), argRegs.view())); m_ops->Push(new CallOp(dst, fn->name(), argRegs.view()));
return dst; return dst;
} }
Value ParseFactor(const Node* factor) ValueHandle *ParseFactor(const Node *factor)
{ {
switch (factor->GetType()) switch (factor->GetType())
{ {
case NodeType::IntLiteral: return ParseIntLiteral(reinterpret_cast<const IntLiteralNode*>(factor)); case NodeType::IntLiteral:
case NodeType::Variable: return ParseVariable(reinterpret_cast<const VariableNode*>(factor)); return ParseIntLiteral(reinterpret_cast<const IntLiteralNode *>(factor));
case NodeType::FnCall: return ParseFnCall(reinterpret_cast<const FnCallNode*>(factor)); case NodeType::Variable:
default: assert(0 && "some factor may not be handled"); break; return ParseVariable(reinterpret_cast<const VariableNode *>(factor));
case NodeType::FnCall:
return ParseFnCall(reinterpret_cast<const FnCallNode *>(factor));
default:
assert(0 && "some factor may not be handled");
break;
} }
assert(0 && "unreachable"); assert(0 && "unreachable");
return Value(); return reinterpret_cast<ValueHandle *>(new Void(0));
} }
Value ParseExpression(const Node* expression) ValueHandle *ParseExpression(const Node *expression)
{ {
if (expression->GetType() == NodeType::Expression) if (expression->GetType() == NodeType::Expression)
{ {
auto expr = reinterpret_cast<const ExpressionNode *>(expression); auto expr = reinterpret_cast<const ExpressionNode *>(expression);
auto lhs = ParseExpression(expr->left()); auto lhs = ParseExpression(expr->left());
auto rhs = ParseExpression(expr->right()); auto rhs = ParseExpression(expr->right());
auto dst = AllocateRegister(); auto dst = AllocateNamed<Instruction>(lhs->GetType());
assert(4 == static_cast<int>(ExpressionNode::Operator::COUNT_OPERATORS) && "some operators may not be handled"); assert(4 == static_cast<int>(ExpressionNode::Operator::COUNT_OPERATORS) && "some operators may not be handled");
switch (expr->op()) switch (expr->op())
{ {
case ExpressionNode::Operator::Plus: m_ops->Push(new AddOp(dst, lhs, rhs)); break; case ExpressionNode::Operator::Plus:
default: assert(0 && "TODO: implement other operations"); break; m_ops->Push(new MathOp(dst, lhs, rhs, OpType::ADD));
break;
case ExpressionNode::Operator::Multiply:
m_ops->Push(new MathOp(dst, lhs, rhs, OpType::MUL));
break;
case ExpressionNode::Operator::Minus:
m_ops->Push(new MathOp(dst, lhs, rhs, OpType::SUB));
break;
case ExpressionNode::Operator::Divide:
m_ops->Push(new MathOp(dst, lhs, rhs, OpType::DIV));
break;
default:
assert(0 && "unreachable");
break;
} }
return dst; return dst;
@@ -86,13 +115,6 @@ public:
return ParseFactor(expression); return ParseFactor(expression);
} }
void ParseVarDecl(const VarDeclNode* varDecl)
{
auto value = ParseExpression(varDecl->value());
// m_ops->Push(new StoreOp(varDecl->name(), value));
m_locals.insert(std::make_pair(varDecl->name(), value));
}
Block ParseBlock(const CompoundNode *compound) Block ParseBlock(const CompoundNode *compound)
{ {
StartBlock(); StartBlock();
@@ -100,8 +122,12 @@ public:
{ {
switch (statement->GetType()) switch (statement->GetType())
{ {
case NodeType::VarDecl: ParseVarDecl(reinterpret_cast<VarDeclNode*>(statement)); continue; case NodeType::VarDecl:
default: ParseExpression(statement); continue; ParseVarDecl(reinterpret_cast<VarDeclNode *>(statement));
continue;
default:
ParseExpression(statement);
continue;
} }
} }
auto ops = EndBlock(); auto ops = EndBlock();
@@ -130,9 +156,11 @@ public:
return OpView(m_ops->data, m_ops->size); return OpView(m_ops->data, m_ops->size);
} }
public: public:
// TODO: think about safety (copying m_ops->data before giving) // TODO: think about safety (copying m_ops->data before giving)
OpView ops() const { return OpView(m_ops->data, m_ops->size); } OpView ops() const { return OpView(m_ops->data, m_ops->size); }
private: private:
void StartBlock() void StartBlock()
{ {
@@ -148,18 +176,27 @@ private:
m_containers.size--; m_containers.size--;
return current; return current;
} }
private: private:
Value AllocateRegister() template <typename V, typename... Args>
ValueHandle *AllocateNamed(Args &&...args)
{ {
return Value(m_value_counter++); return new V(++m_value_counter, std::forward<Args>(args)...);
} }
template <typename V, typename... Args>
ValueHandle *AllocateUnnamed(Args &&...args)
{
return new V(ValueHandle::kNoId, std::forward<Args>(args)...);
}
private: private:
const Node *m_root = nullptr; const Node *m_root = nullptr;
OpBuilder *m_ops = nullptr; OpBuilder *m_ops = nullptr;
unsigned int m_value_counter = 0; unsigned int m_value_counter = 0;
unsigned int m_block_counter = 0; unsigned int m_block_counter = 0;
std::unordered_map<StringView, Value> m_locals; std::unordered_map<StringView, Pointer *> m_locals;
Builder<OpBuilder *> m_containers; Builder<OpBuilder *> m_containers;
}; };

View File

@@ -9,10 +9,13 @@ enum class OpType
{ {
EXTERN = 0, EXTERN = 0,
FN, FN,
LOAD_CONST, ALLOCATE,
LOAD, LOAD,
STORE, STORE,
ADD, ADD,
SUB,
MUL,
DIV,
CALL, CALL,
COUNT_OPS, COUNT_OPS,
}; };
@@ -36,15 +39,18 @@ using OpBuilder = Builder<Op*>;
class OpValued : public Op class OpValued : public Op
{ {
public: public:
OpValued(Value dest) OpValued(ValueHandle *dest)
: m_dest(dest) {} : m_dest(dest) {}
~OpValued() = default; ~OpValued() = default;
public: public:
Value result() const { return m_dest; } ValueHandle *result() const { return m_dest; }
protected: protected:
bool Valued() const override { return true; } bool Valued() const override { return true; }
private: private:
Value m_dest; ValueHandle *m_dest;
}; };
} // namespace IR } // namespace IR

View File

@@ -24,8 +24,10 @@ public:
sb << "EXTRN " << m_symbol.c_str(); sb << "EXTRN " << m_symbol.c_str();
return sb.view(); return sb.view();
} }
public: public:
const StringView &symbol() const { return m_symbol; } const StringView &symbol() const { return m_symbol; }
private: private:
StringView m_symbol; StringView m_symbol;
}; };
@@ -48,43 +50,71 @@ public:
sb << m_body.Format(indent); sb << m_body.Format(indent);
return sb.view(); return sb.view();
} }
public: public:
const StringView &name() const { return m_name; } const StringView &name() const { return m_name; }
const Block &body() const { return m_body; } const Block &body() const { return m_body; }
const View<StringView> &params() const { return m_params; } const View<StringView> &params() const { return m_params; }
private: private:
StringView m_name; StringView m_name;
ValueView m_slots;
View<StringView> m_params; View<StringView> m_params;
Block m_body; Block m_body;
}; };
class LoadConstOp : public OpValued // Allocate slot on the stack for variable
// with the size of destination value,
// aka (dest.GetSize() will be used)
class AllocateOp : public OpValued
{ {
public: public:
LoadConstOp(Value dest, long value) AllocateOp(ValueHandle *dest, const ValueHandle::Type *typ)
: OpValued(dest), m_value(value) {} : OpValued(dest), m_typ(new ValueHandle::Type(typ)) {}
~LoadConstOp() {} ~AllocateOp() {}
OP_TYPE(ALLOCATE)
OP_TYPE(LOAD_CONST)
public: public:
StringView Format(int indent) const override StringView Format(int indent) const override
{ {
StringBuilder sb; StringBuilder sb;
sb.AppendIndent(indent); sb.AppendIndent(indent);
sb << result().Format() << " = LOAD_CONST " << m_value; sb << result()->Format() << " = ALLOCATE " << m_typ->Format();
return sb.view(); return sb.view();
} }
public:
long value() const { return m_value; }
private: private:
long m_value; ValueHandle::Type *m_typ;
}; };
// class LoadConstOp : public OpValued
// {
// public:
// LoadConstOp(ValueHandle *dest, long value)
// : OpValued(dest), m_value(value) {}
// ~LoadConstOp() {}
// OP_TYPE(LOAD_CONST)
// public:
// StringView Format(int indent) const override
// {
// StringBuilder sb;
// sb.AppendIndent(indent);
// sb << result()->Format() << " = LOAD_CONST " << m_value;
// return sb.view();
// }
// public:
// long value() const { return m_value; }
// private:
// long m_value;
// };
class LoadOp : public OpValued class LoadOp : public OpValued
{ {
public: public:
LoadOp(Value dest, StringView addr) LoadOp(ValueHandle *dest, StringView addr)
: OpValued(dest), m_addr(addr) {} : OpValued(dest), m_addr(addr) {}
~LoadOp() {} ~LoadOp() {}
@@ -94,11 +124,13 @@ public:
{ {
StringBuilder sb; StringBuilder sb;
sb.AppendIndent(indent); sb.AppendIndent(indent);
sb << result().Format() << " = LOAD \"" << m_addr.c_str() << "\""; sb << result()->Format() << " = LOAD \"" << m_addr.c_str() << "\"";
return sb.view(); return sb.view();
} }
public: public:
const StringView &addr() const { return m_addr; } const StringView &addr() const { return m_addr; }
private: private:
StringView m_addr; StringView m_addr;
}; };
@@ -106,8 +138,8 @@ private:
class StoreOp : public Op class StoreOp : public Op
{ {
public: public:
StoreOp(StringView addr, Value src) StoreOp(ValueHandle *src, Pointer *dst)
: m_addr(addr), m_src(src) {} : m_dst(dst), m_src(src) {}
~StoreOp() {} ~StoreOp() {}
OP_TYPE(STORE) OP_TYPE(STORE)
@@ -116,45 +148,69 @@ public:
{ {
StringBuilder sb; StringBuilder sb;
sb.AppendIndent(indent); sb.AppendIndent(indent);
sb << "STORE \"" << m_addr.c_str() << "\", " << m_src.Format(); sb << "STORE " << src()->Format() << ", " << dst()->Format();
return sb.view(); return sb.view();
} }
public: public:
const StringView& addr() const { return m_addr; } const Pointer *dst() const { return m_dst; }
Value src() const { return m_src; } const ValueHandle *src() const { return m_src; }
private: private:
StringView m_addr; Pointer *m_dst;
Value m_src; ValueHandle *m_src;
}; };
class AddOp : public OpValued class MathOp : public OpValued
{ {
public: public:
AddOp(Value dest, Value lhs, Value rhs) MathOp(ValueHandle *dest, ValueHandle *lhs, ValueHandle *rhs, OpType typ)
: OpValued(dest), m_lhs(lhs), m_rhs(rhs) {} : OpValued(dest), m_lhs(lhs), m_rhs(rhs), m_typ(typ) {}
~AddOp() {} ~MathOp() {}
OpType GetType() const override { return m_typ; }
private:
StringView FormatOperation() const
{
switch (m_typ)
{
case OpType::ADD:
return StringView("ADD");
case OpType::MUL:
return StringView("MUL");
case OpType::SUB:
return StringView("SUB");
case OpType::DIV:
return StringView("DIV");
}
assert(false && "unreachable");
}
OP_TYPE(ADD)
public: public:
StringView Format(int indent) const override StringView Format(int indent) const override
{ {
StringBuilder sb; StringBuilder sb;
sb.AppendIndent(indent); sb.AppendIndent(indent);
sb << result().Format() << " = ADD " << m_lhs.Format() << ", " << m_rhs.Format(); sb << result()->Format() << " = " << FormatOperation() << " " << m_lhs->Format() << ", " << m_rhs->Format();
return sb.view(); return sb.view();
} }
public: public:
Value lhs() const { return m_lhs; } ValueHandle *lhs() const { return m_lhs; }
Value rhs() const { return m_rhs; } ValueHandle *rhs() const { return m_rhs; }
private: private:
Value m_lhs; ValueHandle *m_lhs;
Value m_rhs; ValueHandle *m_rhs;
OpType m_typ;
}; };
class CallOp : public OpValued class CallOp : public OpValued
{ {
public: public:
CallOp(Value dest, StringView callee, ValueView args) CallOp(ValueHandle *dest, StringView callee, ValueView args)
: OpValued(dest), m_callee(callee), m_args(args) {} : OpValued(dest), m_callee(callee), m_args(args) {}
~CallOp() {} ~CallOp() {}
@@ -166,15 +222,17 @@ public:
for (size_t i = 0; i < m_args.size; ++i) for (size_t i = 0; i < m_args.size; ++i)
{ {
sb.AppendIndent(indent); sb.AppendIndent(indent);
sb << "PARAM " << m_args.data[i].Format() << '\n'; sb << "PARAM " << m_args.data[i]->Format() << '\n';
} }
sb.AppendIndent(indent); sb.AppendIndent(indent);
sb << result().Format() << " = CALL " << m_callee.c_str(); sb << result()->Format() << " = CALL " << m_callee.c_str();
return sb.view(); return sb.view();
} }
public: public:
const StringView &callee() const { return m_callee; } const StringView &callee() const { return m_callee; }
const ValueView &args() const { return m_args; } const ValueView &args() const { return m_args; }
private: private:
StringView m_callee; StringView m_callee;
ValueView m_args; ValueView m_args;

View File

@@ -4,23 +4,161 @@
namespace IR namespace IR
{ {
class Value class ValueHandle
{ {
public: public:
Value(unsigned int id) : m_id(id) {} static constexpr uint32_t kNoId = 0;
Value() : m_id(0) {}
struct Type
{
enum class Kind
{
Void,
Int,
Ptr
};
Kind kind;
uint32_t bits;
public:
Type(Kind kind_) : kind(kind_), bits(32) {}
Type(Kind kind_, uint32_t bits_) : kind(kind_), bits(bits_) {}
Type(const Type *typ) : kind(typ->kind), bits(32) {}
public: public:
StringView Format() const StringView Format() const
{ {
auto sb = StringBuilder(); switch (kind)
sb.AppendFormat("%%%d", m_id); {
case Kind::Int:
{
StringBuilder sb;
sb.AppendFormat("i%d", bits);
return sb.view(); return sb.view();
} }
private: case Kind::Void:
unsigned int m_id; {
return StringView("void");
}
case Kind::Ptr:
{
return StringView("ptr");
}
}
assert(false && "unreachable");
return StringView();
}
}; };
using ValueView = View<Value>; public:
using ValueBuilder = Builder<Value>; ValueHandle(uint32_t id = kNoId) : m_id(id) {}
bool HasId() const { return m_id != kNoId; }
uint32_t GetId() const { return m_id; }
public:
virtual StringView Format() const
{
auto sb = StringBuilder();
if (HasId())
sb.AppendFormat("%%%d", m_id);
else
sb.Extend("<?>");
return sb.view();
}
public:
virtual const Type *GetType() const = 0;
private:
uint32_t m_id;
};
class ConstantInt : public ValueHandle
{
public:
ConstantInt(unsigned int id, long value)
: ValueHandle(id), m_value(value)
{
m_type = new Type{Type::Kind::Int};
}
const Type *GetType() const override { return m_type; }
public:
virtual StringView Format() const override
{
auto sb = StringBuilder();
sb.AppendFormat("%s %d", m_type->Format().c_str(), m_value);
return sb.view();
}
public:
long GetValue() const { return m_value; }
private:
long m_value;
Type *m_type;
};
class Pointer : public ValueHandle
{
public:
Pointer(unsigned int id)
: ValueHandle(id)
{
m_type = new Type{Type::Kind::Ptr};
}
const Type *GetType() const override { return m_type; }
public:
virtual StringView Format() const override
{
auto sb = StringBuilder();
sb.AppendFormat("%s %%%d", m_type->Format().c_str(), GetId());
return sb.view();
}
private:
Type *m_type;
};
// TODO: Remove void value and use void type only
class Void : public ValueHandle
{
public:
Void(unsigned int id)
: ValueHandle(id)
{
m_type = new Type{Type::Kind::Void};
}
const Type *GetType() const override { return m_type; }
public:
virtual StringView Format() const override
{
return m_type->Format();
}
private:
Type *m_type;
};
class Instruction : public ValueHandle
{
public:
Instruction(unsigned int id, const Type *typ)
: ValueHandle(id), m_type(new Type(typ)) {}
const Type *GetType() const override { return m_type; }
private:
Type *m_type;
};
using ValueView = View<ValueHandle *>;
using ValueBuilder = Builder<ValueHandle *>;
} // namespace IR } // namespace IR

View File

@@ -7,7 +7,7 @@
#include "parser/ast.hpp" #include "parser/ast.hpp"
#include "ir/ir.hpp" #include "ir/ir.hpp"
#include "codegen/fasm_stack.hpp" // #include "codegen/fasm_stack.hpp"
void dump_tokens(const char *filename, Lexer *lexer) void dump_tokens(const char *filename, Lexer *lexer)
{ {
@@ -62,20 +62,20 @@ int main(int argc, char **argv)
printf("%s\n", ops.data[i]->Format(0).c_str()); printf("%s\n", ops.data[i]->Format(0).c_str());
} }
StackFasmX86_64Generator gen; // StackFasmX86_64Generator gen;
gen.Generate(filename, ops); // gen.Generate(filename, ops);
StringView output = gen.GetOutput(); // StringView output = gen.GetOutput();
FILE *file = fopen("out.asm", "w"); // FILE *file = fopen("out.asm", "w");
fwrite(output.c_str(), output.size - 1, sizeof(char), file); // fwrite(output.c_str(), output.size - 1, sizeof(char), file);
fclose(file); // fclose(file);
system("fasm out.asm"); // system("fasm out.asm");
system("gcc -o out out.o"); // system("gcc -o out out.o");
return 0; return 0;
} }