feat: implement proper IR with value handles with dynamic and transferable types

This commit is contained in:
2026-01-01 15:50:26 +01:00
parent 3b8dfc4dae
commit 6f4ab269e2
8 changed files with 621 additions and 375 deletions

View File

@@ -18,3 +18,11 @@
- Linear scan allocator with ~6 fake regs
- Spill to stack slots
- Add “CALL clobbers regs” rule
function's reserved registers:
EAX, ECX, EDX
return registers:
int => EAX
float => ST0

View File

@@ -2,17 +2,17 @@ extern putchar
fn hello() {
local h = 72
local e = 69
local l = 76
local o = 79
putchar(h)
local e = h - 3
putchar(e)
local l = h + 4
putchar(l)
putchar(l)
local o = 100 - 21
putchar(o)
}
fn main() {
hello()
putchar(9 + 1)
putchar(3 * 3 + 1)
}

View File

@@ -10,9 +10,9 @@ public:
~StackFasmX86_64Generator() override = default;
private:
StringView GetTempAddr(IR::Value reg)
StringView GetTempAddr(IR::ValueHandle *reg)
{
return std::move((StringBuilder() << reg.Format()).view());
return std::move((StringBuilder() << reg->Format()).view());
}
StringView GetSlotAddr(const IR::IRSlot &slot)
@@ -101,18 +101,17 @@ private:
appendf("mov %s, rax\n", GetSlotAddr(result_slot).c_str());
}
break;
case IR::OpType::LOAD_CONST:
{
auto lc = reinterpret_cast<const IR::LoadConstOp *>(op);
auto addr = GetTempAddr(lc->result());
auto slot = m_allocator->Allocate(addr);
appendf("mov %s, %ld\n", GetSlotAddr(slot).c_str(), lc->value());
}
break;
// case IR::OpType::LOAD_CONST:
// {
// auto lc = reinterpret_cast<const IR::LoadConstOp *>(op);
// auto addr = GetTempAddr(lc->result());
// auto slot = m_allocator->Allocate(addr);
// appendf("mov %s, %ld\n", GetSlotAddr(slot).c_str(), lc->value());
// }
// break;
case IR::OpType::STORE:
{
auto s = reinterpret_cast<const IR::StoreOp *>(op);
appendf("; DEBUG: resolving stack slot at %s\n", s->addr().c_str());
auto slot = m_allocator->Allocate(s->addr());
auto value_slot = m_allocator->Resolve(GetTempAddr(s->src()));
appendf("mov rax, %s\n", GetSlotAddr(value_slot).c_str());

View File

@@ -15,28 +15,38 @@ class IRBuilder
public:
IRBuilder(const Node *root)
: m_root(root), m_ops(new OpBuilder()) {}
public:
// TODO: support other literals
Value ParseIntLiteral(const IntLiteralNode* literal)
ValueHandle *ParseIntLiteral(const IntLiteralNode *literal)
{
auto dst = AllocateRegister();
m_ops->Push(new LoadConstOp(dst, literal->integer()));
auto dst = AllocateUnnamed<ConstantInt>(literal->integer());
return dst;
}
Value ParseVariable(const VariableNode* var)
void ParseVarDecl(const VarDeclNode *varDecl)
{
// auto dst = AllocateRegister();
auto value = ParseExpression(varDecl->value());
// TODO: gather type information from var decl signature, aka local <int> v = 0;
auto dst = AllocateNamed<Pointer>();
m_ops->Push(new AllocateOp(dst, value->GetType()));
m_ops->Push(new StoreOp(value, reinterpret_cast<Pointer *>(dst)));
m_locals.insert(std::make_pair(varDecl->name(), reinterpret_cast<Pointer *>(dst)));
}
ValueHandle *ParseVariable(const VariableNode *var)
{
// auto dst = AllocateValue();
// m_ops->Push(new LoadOp(dst, var->name()));
if (m_locals.find(var->name()) == m_locals.end())
{
// TODO: throw proper error
assert(0 && "ERROR: variable does not exist");
}
return m_locals[var->name()];
return reinterpret_cast<ValueHandle *>(m_locals[var->name()]);
}
Value ParseFnCall(const FnCallNode* fn)
ValueHandle *ParseFnCall(const FnCallNode *fn)
{
// TODO: support multiple args
auto argRegs = ValueBuilder();
@@ -45,39 +55,58 @@ public:
auto arg = ParseExpression(fn->arg());
argRegs.Push(arg);
}
auto dst = AllocateRegister();
// TODO: gather return type of the function
auto dst = AllocateUnnamed<Void>();
m_ops->Push(new CallOp(dst, fn->name(), argRegs.view()));
return dst;
}
Value ParseFactor(const Node* factor)
ValueHandle *ParseFactor(const Node *factor)
{
switch (factor->GetType())
{
case NodeType::IntLiteral: return ParseIntLiteral(reinterpret_cast<const IntLiteralNode*>(factor));
case NodeType::Variable: return ParseVariable(reinterpret_cast<const VariableNode*>(factor));
case NodeType::FnCall: return ParseFnCall(reinterpret_cast<const FnCallNode*>(factor));
default: assert(0 && "some factor may not be handled"); break;
case NodeType::IntLiteral:
return ParseIntLiteral(reinterpret_cast<const IntLiteralNode *>(factor));
case NodeType::Variable:
return ParseVariable(reinterpret_cast<const VariableNode *>(factor));
case NodeType::FnCall:
return ParseFnCall(reinterpret_cast<const FnCallNode *>(factor));
default:
assert(0 && "some factor may not be handled");
break;
}
assert(0 && "unreachable");
return Value();
return reinterpret_cast<ValueHandle *>(new Void(0));
}
Value ParseExpression(const Node* expression)
ValueHandle *ParseExpression(const Node *expression)
{
if (expression->GetType() == NodeType::Expression)
{
auto expr = reinterpret_cast<const ExpressionNode *>(expression);
auto lhs = ParseExpression(expr->left());
auto rhs = ParseExpression(expr->right());
auto dst = AllocateRegister();
auto dst = AllocateNamed<Instruction>(lhs->GetType());
assert(4 == static_cast<int>(ExpressionNode::Operator::COUNT_OPERATORS) && "some operators may not be handled");
switch (expr->op())
{
case ExpressionNode::Operator::Plus: m_ops->Push(new AddOp(dst, lhs, rhs)); break;
default: assert(0 && "TODO: implement other operations"); break;
case ExpressionNode::Operator::Plus:
m_ops->Push(new MathOp(dst, lhs, rhs, OpType::ADD));
break;
case ExpressionNode::Operator::Multiply:
m_ops->Push(new MathOp(dst, lhs, rhs, OpType::MUL));
break;
case ExpressionNode::Operator::Minus:
m_ops->Push(new MathOp(dst, lhs, rhs, OpType::SUB));
break;
case ExpressionNode::Operator::Divide:
m_ops->Push(new MathOp(dst, lhs, rhs, OpType::DIV));
break;
default:
assert(0 && "unreachable");
break;
}
return dst;
@@ -86,13 +115,6 @@ public:
return ParseFactor(expression);
}
void ParseVarDecl(const VarDeclNode* varDecl)
{
auto value = ParseExpression(varDecl->value());
// m_ops->Push(new StoreOp(varDecl->name(), value));
m_locals.insert(std::make_pair(varDecl->name(), value));
}
Block ParseBlock(const CompoundNode *compound)
{
StartBlock();
@@ -100,8 +122,12 @@ public:
{
switch (statement->GetType())
{
case NodeType::VarDecl: ParseVarDecl(reinterpret_cast<VarDeclNode*>(statement)); continue;
default: ParseExpression(statement); continue;
case NodeType::VarDecl:
ParseVarDecl(reinterpret_cast<VarDeclNode *>(statement));
continue;
default:
ParseExpression(statement);
continue;
}
}
auto ops = EndBlock();
@@ -130,9 +156,11 @@ public:
return OpView(m_ops->data, m_ops->size);
}
public:
// TODO: think about safety (copying m_ops->data before giving)
OpView ops() const { return OpView(m_ops->data, m_ops->size); }
private:
void StartBlock()
{
@@ -148,18 +176,27 @@ private:
m_containers.size--;
return current;
}
private:
Value AllocateRegister()
template <typename V, typename... Args>
ValueHandle *AllocateNamed(Args &&...args)
{
return Value(m_value_counter++);
return new V(++m_value_counter, std::forward<Args>(args)...);
}
template <typename V, typename... Args>
ValueHandle *AllocateUnnamed(Args &&...args)
{
return new V(ValueHandle::kNoId, std::forward<Args>(args)...);
}
private:
const Node *m_root = nullptr;
OpBuilder *m_ops = nullptr;
unsigned int m_value_counter = 0;
unsigned int m_block_counter = 0;
std::unordered_map<StringView, Value> m_locals;
std::unordered_map<StringView, Pointer *> m_locals;
Builder<OpBuilder *> m_containers;
};

View File

@@ -9,10 +9,13 @@ enum class OpType
{
EXTERN = 0,
FN,
LOAD_CONST,
ALLOCATE,
LOAD,
STORE,
ADD,
SUB,
MUL,
DIV,
CALL,
COUNT_OPS,
};
@@ -36,15 +39,18 @@ using OpBuilder = Builder<Op*>;
class OpValued : public Op
{
public:
OpValued(Value dest)
OpValued(ValueHandle *dest)
: m_dest(dest) {}
~OpValued() = default;
public:
Value result() const { return m_dest; }
ValueHandle *result() const { return m_dest; }
protected:
bool Valued() const override { return true; }
private:
Value m_dest;
ValueHandle *m_dest;
};
} // namespace IR

View File

@@ -24,8 +24,10 @@ public:
sb << "EXTRN " << m_symbol.c_str();
return sb.view();
}
public:
const StringView &symbol() const { return m_symbol; }
private:
StringView m_symbol;
};
@@ -48,43 +50,71 @@ public:
sb << m_body.Format(indent);
return sb.view();
}
public:
const StringView &name() const { return m_name; }
const Block &body() const { return m_body; }
const View<StringView> &params() const { return m_params; }
private:
StringView m_name;
ValueView m_slots;
View<StringView> m_params;
Block m_body;
};
class LoadConstOp : public OpValued
// Allocate slot on the stack for variable
// with the size of destination value,
// aka (dest.GetSize() will be used)
class AllocateOp : public OpValued
{
public:
LoadConstOp(Value dest, long value)
: OpValued(dest), m_value(value) {}
~LoadConstOp() {}
AllocateOp(ValueHandle *dest, const ValueHandle::Type *typ)
: OpValued(dest), m_typ(new ValueHandle::Type(typ)) {}
~AllocateOp() {}
OP_TYPE(ALLOCATE)
OP_TYPE(LOAD_CONST)
public:
StringView Format(int indent) const override
{
StringBuilder sb;
sb.AppendIndent(indent);
sb << result().Format() << " = LOAD_CONST " << m_value;
sb << result()->Format() << " = ALLOCATE " << m_typ->Format();
return sb.view();
}
public:
long value() const { return m_value; }
private:
long m_value;
ValueHandle::Type *m_typ;
};
// class LoadConstOp : public OpValued
// {
// public:
// LoadConstOp(ValueHandle *dest, long value)
// : OpValued(dest), m_value(value) {}
// ~LoadConstOp() {}
// OP_TYPE(LOAD_CONST)
// public:
// StringView Format(int indent) const override
// {
// StringBuilder sb;
// sb.AppendIndent(indent);
// sb << result()->Format() << " = LOAD_CONST " << m_value;
// return sb.view();
// }
// public:
// long value() const { return m_value; }
// private:
// long m_value;
// };
class LoadOp : public OpValued
{
public:
LoadOp(Value dest, StringView addr)
LoadOp(ValueHandle *dest, StringView addr)
: OpValued(dest), m_addr(addr) {}
~LoadOp() {}
@@ -94,11 +124,13 @@ public:
{
StringBuilder sb;
sb.AppendIndent(indent);
sb << result().Format() << " = LOAD \"" << m_addr.c_str() << "\"";
sb << result()->Format() << " = LOAD \"" << m_addr.c_str() << "\"";
return sb.view();
}
public:
const StringView &addr() const { return m_addr; }
private:
StringView m_addr;
};
@@ -106,8 +138,8 @@ private:
class StoreOp : public Op
{
public:
StoreOp(StringView addr, Value src)
: m_addr(addr), m_src(src) {}
StoreOp(ValueHandle *src, Pointer *dst)
: m_dst(dst), m_src(src) {}
~StoreOp() {}
OP_TYPE(STORE)
@@ -116,45 +148,69 @@ public:
{
StringBuilder sb;
sb.AppendIndent(indent);
sb << "STORE \"" << m_addr.c_str() << "\", " << m_src.Format();
sb << "STORE " << src()->Format() << ", " << dst()->Format();
return sb.view();
}
public:
const StringView& addr() const { return m_addr; }
Value src() const { return m_src; }
const Pointer *dst() const { return m_dst; }
const ValueHandle *src() const { return m_src; }
private:
StringView m_addr;
Value m_src;
Pointer *m_dst;
ValueHandle *m_src;
};
class AddOp : public OpValued
class MathOp : public OpValued
{
public:
AddOp(Value dest, Value lhs, Value rhs)
: OpValued(dest), m_lhs(lhs), m_rhs(rhs) {}
~AddOp() {}
MathOp(ValueHandle *dest, ValueHandle *lhs, ValueHandle *rhs, OpType typ)
: OpValued(dest), m_lhs(lhs), m_rhs(rhs), m_typ(typ) {}
~MathOp() {}
OpType GetType() const override { return m_typ; }
private:
StringView FormatOperation() const
{
switch (m_typ)
{
case OpType::ADD:
return StringView("ADD");
case OpType::MUL:
return StringView("MUL");
case OpType::SUB:
return StringView("SUB");
case OpType::DIV:
return StringView("DIV");
}
assert(false && "unreachable");
}
OP_TYPE(ADD)
public:
StringView Format(int indent) const override
{
StringBuilder sb;
sb.AppendIndent(indent);
sb << result().Format() << " = ADD " << m_lhs.Format() << ", " << m_rhs.Format();
sb << result()->Format() << " = " << FormatOperation() << " " << m_lhs->Format() << ", " << m_rhs->Format();
return sb.view();
}
public:
Value lhs() const { return m_lhs; }
Value rhs() const { return m_rhs; }
ValueHandle *lhs() const { return m_lhs; }
ValueHandle *rhs() const { return m_rhs; }
private:
Value m_lhs;
Value m_rhs;
ValueHandle *m_lhs;
ValueHandle *m_rhs;
OpType m_typ;
};
class CallOp : public OpValued
{
public:
CallOp(Value dest, StringView callee, ValueView args)
CallOp(ValueHandle *dest, StringView callee, ValueView args)
: OpValued(dest), m_callee(callee), m_args(args) {}
~CallOp() {}
@@ -166,15 +222,17 @@ public:
for (size_t i = 0; i < m_args.size; ++i)
{
sb.AppendIndent(indent);
sb << "PARAM " << m_args.data[i].Format() << '\n';
sb << "PARAM " << m_args.data[i]->Format() << '\n';
}
sb.AppendIndent(indent);
sb << result().Format() << " = CALL " << m_callee.c_str();
sb << result()->Format() << " = CALL " << m_callee.c_str();
return sb.view();
}
public:
const StringView &callee() const { return m_callee; }
const ValueView &args() const { return m_args; }
private:
StringView m_callee;
ValueView m_args;

View File

@@ -4,23 +4,161 @@
namespace IR
{
class Value
class ValueHandle
{
public:
Value(unsigned int id) : m_id(id) {}
Value() : m_id(0) {}
static constexpr uint32_t kNoId = 0;
struct Type
{
enum class Kind
{
Void,
Int,
Ptr
};
Kind kind;
uint32_t bits;
public:
Type(Kind kind_) : kind(kind_), bits(32) {}
Type(Kind kind_, uint32_t bits_) : kind(kind_), bits(bits_) {}
Type(const Type *typ) : kind(typ->kind), bits(32) {}
public:
StringView Format() const
{
auto sb = StringBuilder();
sb.AppendFormat("%%%d", m_id);
switch (kind)
{
case Kind::Int:
{
StringBuilder sb;
sb.AppendFormat("i%d", bits);
return sb.view();
}
private:
unsigned int m_id;
case Kind::Void:
{
return StringView("void");
}
case Kind::Ptr:
{
return StringView("ptr");
}
}
assert(false && "unreachable");
return StringView();
}
};
using ValueView = View<Value>;
using ValueBuilder = Builder<Value>;
public:
ValueHandle(uint32_t id = kNoId) : m_id(id) {}
bool HasId() const { return m_id != kNoId; }
uint32_t GetId() const { return m_id; }
public:
virtual StringView Format() const
{
auto sb = StringBuilder();
if (HasId())
sb.AppendFormat("%%%d", m_id);
else
sb.Extend("<?>");
return sb.view();
}
public:
virtual const Type *GetType() const = 0;
private:
uint32_t m_id;
};
class ConstantInt : public ValueHandle
{
public:
ConstantInt(unsigned int id, long value)
: ValueHandle(id), m_value(value)
{
m_type = new Type{Type::Kind::Int};
}
const Type *GetType() const override { return m_type; }
public:
virtual StringView Format() const override
{
auto sb = StringBuilder();
sb.AppendFormat("%s %d", m_type->Format().c_str(), m_value);
return sb.view();
}
public:
long GetValue() const { return m_value; }
private:
long m_value;
Type *m_type;
};
class Pointer : public ValueHandle
{
public:
Pointer(unsigned int id)
: ValueHandle(id)
{
m_type = new Type{Type::Kind::Ptr};
}
const Type *GetType() const override { return m_type; }
public:
virtual StringView Format() const override
{
auto sb = StringBuilder();
sb.AppendFormat("%s %%%d", m_type->Format().c_str(), GetId());
return sb.view();
}
private:
Type *m_type;
};
// TODO: Remove void value and use void type only
class Void : public ValueHandle
{
public:
Void(unsigned int id)
: ValueHandle(id)
{
m_type = new Type{Type::Kind::Void};
}
const Type *GetType() const override { return m_type; }
public:
virtual StringView Format() const override
{
return m_type->Format();
}
private:
Type *m_type;
};
class Instruction : public ValueHandle
{
public:
Instruction(unsigned int id, const Type *typ)
: ValueHandle(id), m_type(new Type(typ)) {}
const Type *GetType() const override { return m_type; }
private:
Type *m_type;
};
using ValueView = View<ValueHandle *>;
using ValueBuilder = Builder<ValueHandle *>;
} // namespace IR

View File

@@ -7,7 +7,7 @@
#include "parser/ast.hpp"
#include "ir/ir.hpp"
#include "codegen/fasm_stack.hpp"
// #include "codegen/fasm_stack.hpp"
void dump_tokens(const char *filename, Lexer *lexer)
{
@@ -62,20 +62,20 @@ int main(int argc, char **argv)
printf("%s\n", ops.data[i]->Format(0).c_str());
}
StackFasmX86_64Generator gen;
// StackFasmX86_64Generator gen;
gen.Generate(filename, ops);
// gen.Generate(filename, ops);
StringView output = gen.GetOutput();
// StringView output = gen.GetOutput();
FILE *file = fopen("out.asm", "w");
// FILE *file = fopen("out.asm", "w");
fwrite(output.c_str(), output.size - 1, sizeof(char), file);
// fwrite(output.c_str(), output.size - 1, sizeof(char), file);
fclose(file);
// fclose(file);
system("fasm out.asm");
system("gcc -o out out.o");
// system("fasm out.asm");
// system("gcc -o out out.o");
return 0;
}