feat: implement proper IR with value handles with dynamic and transferable types

This commit is contained in:
2026-01-01 15:50:26 +01:00
parent 3b8dfc4dae
commit 6f4ab269e2
8 changed files with 621 additions and 375 deletions

View File

@@ -10,158 +10,195 @@
namespace IR
{
class IRBuilder
{
public:
IRBuilder(const Node* root)
: m_root(root), m_ops(new OpBuilder()) {}
public:
// TODO: support other literals
Value ParseIntLiteral(const IntLiteralNode* literal)
class IRBuilder
{
auto dst = AllocateRegister();
m_ops->Push(new LoadConstOp(dst, literal->integer()));
return dst;
}
public:
IRBuilder(const Node *root)
: m_root(root), m_ops(new OpBuilder()) {}
Value ParseVariable(const VariableNode* var)
{
// auto dst = AllocateRegister();
// m_ops->Push(new LoadOp(dst, var->name()));
if (m_locals.find(var->name()) == m_locals.end())
public:
// TODO: support other literals
ValueHandle *ParseIntLiteral(const IntLiteralNode *literal)
{
// TODO: throw proper error
assert(0 && "ERROR: variable does not exist");
}
return m_locals[var->name()];
}
Value ParseFnCall(const FnCallNode* fn)
{
// TODO: support multiple args
auto argRegs = ValueBuilder();
if (fn->arg() != nullptr)
{
auto arg = ParseExpression(fn->arg());
argRegs.Push(arg);
}
auto dst = AllocateRegister();
m_ops->Push(new CallOp(dst, fn->name(), argRegs.view()));
return dst;
}
Value ParseFactor(const Node* factor)
{
switch(factor->GetType())
{
case NodeType::IntLiteral: return ParseIntLiteral(reinterpret_cast<const IntLiteralNode*>(factor));
case NodeType::Variable: return ParseVariable(reinterpret_cast<const VariableNode*>(factor));
case NodeType::FnCall: return ParseFnCall(reinterpret_cast<const FnCallNode*>(factor));
default: assert(0 && "some factor may not be handled"); break;
}
assert(0 && "unreachable");
return Value();
}
Value ParseExpression(const Node* expression)
{
if (expression->GetType() == NodeType::Expression)
{
auto expr = reinterpret_cast<const ExpressionNode*>(expression);
auto lhs = ParseExpression(expr->left());
auto rhs = ParseExpression(expr->right());
auto dst = AllocateRegister();
assert(4 == static_cast<int>(ExpressionNode::Operator::COUNT_OPERATORS) && "some operators may not be handled");
switch (expr->op())
{
case ExpressionNode::Operator::Plus: m_ops->Push(new AddOp(dst, lhs, rhs)); break;
default: assert(0 && "TODO: implement other operations"); break;
}
auto dst = AllocateUnnamed<ConstantInt>(literal->integer());
return dst;
}
}
return ParseFactor(expression);
}
void ParseVarDecl(const VarDeclNode* varDecl)
{
auto value = ParseExpression(varDecl->value());
// m_ops->Push(new StoreOp(varDecl->name(), value));
m_locals.insert(std::make_pair(varDecl->name(), value));
}
Block ParseBlock(const CompoundNode* compound)
{
StartBlock();
for (auto &statement : *compound)
void ParseVarDecl(const VarDeclNode *varDecl)
{
switch(statement->GetType())
auto value = ParseExpression(varDecl->value());
// TODO: gather type information from var decl signature, aka local <int> v = 0;
auto dst = AllocateNamed<Pointer>();
m_ops->Push(new AllocateOp(dst, value->GetType()));
m_ops->Push(new StoreOp(value, reinterpret_cast<Pointer *>(dst)));
m_locals.insert(std::make_pair(varDecl->name(), reinterpret_cast<Pointer *>(dst)));
}
ValueHandle *ParseVariable(const VariableNode *var)
{
// auto dst = AllocateValue();
// m_ops->Push(new LoadOp(dst, var->name()));
if (m_locals.find(var->name()) == m_locals.end())
{
case NodeType::VarDecl: ParseVarDecl(reinterpret_cast<VarDeclNode*>(statement)); continue;
default: ParseExpression(statement); continue;
// TODO: throw proper error
assert(0 && "ERROR: variable does not exist");
}
return reinterpret_cast<ValueHandle *>(m_locals[var->name()]);
}
auto ops = EndBlock();
auto block = Block(m_block_counter++, std::move(ops->view()));
operator delete(ops);
return block;
}
OpView Build()
{
assert(m_root->GetType() == NodeType::Program && "root should be a program");
auto program = reinterpret_cast<const ProgramNode*>(m_root);
// Externs
for (auto &extrn : program->externs())
ValueHandle *ParseFnCall(const FnCallNode *fn)
{
m_ops->Push(new ExternOp(extrn->symbol()));
// TODO: support multiple args
auto argRegs = ValueBuilder();
if (fn->arg() != nullptr)
{
auto arg = ParseExpression(fn->arg());
argRegs.Push(arg);
}
// TODO: gather return type of the function
auto dst = AllocateUnnamed<Void>();
m_ops->Push(new CallOp(dst, fn->name(), argRegs.view()));
return dst;
}
// Functions
for (auto &fn : program->funcs())
ValueHandle *ParseFactor(const Node *factor)
{
auto block = ParseBlock(fn->body());
m_ops->Push(new FnOp(fn->name(), fn->params(), std::move(block)));
switch (factor->GetType())
{
case NodeType::IntLiteral:
return ParseIntLiteral(reinterpret_cast<const IntLiteralNode *>(factor));
case NodeType::Variable:
return ParseVariable(reinterpret_cast<const VariableNode *>(factor));
case NodeType::FnCall:
return ParseFnCall(reinterpret_cast<const FnCallNode *>(factor));
default:
assert(0 && "some factor may not be handled");
break;
}
assert(0 && "unreachable");
return reinterpret_cast<ValueHandle *>(new Void(0));
}
return OpView(m_ops->data, m_ops->size);
}
public:
// TODO: think about safety (copying m_ops->data before giving)
OpView ops() const { return OpView(m_ops->data, m_ops->size); }
private:
void StartBlock()
{
m_containers.Push(m_ops);
m_ops = new OpBuilder();
}
ValueHandle *ParseExpression(const Node *expression)
{
if (expression->GetType() == NodeType::Expression)
{
auto expr = reinterpret_cast<const ExpressionNode *>(expression);
auto lhs = ParseExpression(expr->left());
auto rhs = ParseExpression(expr->right());
auto dst = AllocateNamed<Instruction>(lhs->GetType());
OpBuilder* EndBlock()
{
assert(m_containers.size > 0 && "containers stack is empty");
auto current = m_ops;
m_ops = m_containers.data[m_containers.size - 1];
m_containers.size--;
return current;
}
private:
Value AllocateRegister()
{
return Value(m_value_counter++);
}
private:
const Node* m_root = nullptr;
assert(4 == static_cast<int>(ExpressionNode::Operator::COUNT_OPERATORS) && "some operators may not be handled");
switch (expr->op())
{
case ExpressionNode::Operator::Plus:
m_ops->Push(new MathOp(dst, lhs, rhs, OpType::ADD));
break;
case ExpressionNode::Operator::Multiply:
m_ops->Push(new MathOp(dst, lhs, rhs, OpType::MUL));
break;
case ExpressionNode::Operator::Minus:
m_ops->Push(new MathOp(dst, lhs, rhs, OpType::SUB));
break;
case ExpressionNode::Operator::Divide:
m_ops->Push(new MathOp(dst, lhs, rhs, OpType::DIV));
break;
default:
assert(0 && "unreachable");
break;
}
OpBuilder* m_ops = nullptr;
unsigned int m_value_counter = 0;
unsigned int m_block_counter = 0;
std::unordered_map<StringView, Value> m_locals;
return dst;
}
Builder<OpBuilder*> m_containers;
};
return ParseFactor(expression);
}
Block ParseBlock(const CompoundNode *compound)
{
StartBlock();
for (auto &statement : *compound)
{
switch (statement->GetType())
{
case NodeType::VarDecl:
ParseVarDecl(reinterpret_cast<VarDeclNode *>(statement));
continue;
default:
ParseExpression(statement);
continue;
}
}
auto ops = EndBlock();
auto block = Block(m_block_counter++, std::move(ops->view()));
operator delete(ops);
return block;
}
OpView Build()
{
assert(m_root->GetType() == NodeType::Program && "root should be a program");
auto program = reinterpret_cast<const ProgramNode *>(m_root);
// Externs
for (auto &extrn : program->externs())
{
m_ops->Push(new ExternOp(extrn->symbol()));
}
// Functions
for (auto &fn : program->funcs())
{
auto block = ParseBlock(fn->body());
m_ops->Push(new FnOp(fn->name(), fn->params(), std::move(block)));
}
return OpView(m_ops->data, m_ops->size);
}
public:
// TODO: think about safety (copying m_ops->data before giving)
OpView ops() const { return OpView(m_ops->data, m_ops->size); }
private:
void StartBlock()
{
m_containers.Push(m_ops);
m_ops = new OpBuilder();
}
OpBuilder *EndBlock()
{
assert(m_containers.size > 0 && "containers stack is empty");
auto current = m_ops;
m_ops = m_containers.data[m_containers.size - 1];
m_containers.size--;
return current;
}
private:
template <typename V, typename... Args>
ValueHandle *AllocateNamed(Args &&...args)
{
return new V(++m_value_counter, std::forward<Args>(args)...);
}
template <typename V, typename... Args>
ValueHandle *AllocateUnnamed(Args &&...args)
{
return new V(ValueHandle::kNoId, std::forward<Args>(args)...);
}
private:
const Node *m_root = nullptr;
OpBuilder *m_ops = nullptr;
unsigned int m_value_counter = 0;
unsigned int m_block_counter = 0;
std::unordered_map<StringView, Pointer *> m_locals;
Builder<OpBuilder *> m_containers;
};
} // namespace IR

View File

@@ -5,46 +5,52 @@
namespace IR
{
enum class OpType
{
EXTERN = 0,
FN,
LOAD_CONST,
LOAD,
STORE,
ADD,
CALL,
COUNT_OPS,
};
enum class OpType
{
EXTERN = 0,
FN,
ALLOCATE,
LOAD,
STORE,
ADD,
SUB,
MUL,
DIV,
CALL,
COUNT_OPS,
};
#define OP_TYPE(x) \
OpType GetType() const override { return OpType::x; }
class Op
{
public:
virtual OpType GetType() const = 0;
virtual bool Valued() const { return false; };
virtual ~Op() {}
class Op
{
public:
virtual OpType GetType() const = 0;
virtual bool Valued() const { return false; };
virtual ~Op() {}
virtual StringView Format(int indent) const = 0;
};
virtual StringView Format(int indent) const = 0;
};
using OpView = View<Op*>;
using OpBuilder = Builder<Op*>;
using OpView = View<Op *>;
using OpBuilder = Builder<Op *>;
class OpValued : public Op
{
public:
OpValued(Value dest)
: m_dest(dest) {}
~OpValued() = default;
public:
Value result() const { return m_dest; }
protected:
bool Valued() const override { return true; }
private:
Value m_dest;
};
class OpValued : public Op
{
public:
OpValued(ValueHandle *dest)
: m_dest(dest) {}
~OpValued() = default;
public:
ValueHandle *result() const { return m_dest; }
protected:
bool Valued() const override { return true; }
private:
ValueHandle *m_dest;
};
} // namespace IR

View File

@@ -8,176 +8,234 @@
namespace IR
{
class ExternOp : public Op
{
public:
ExternOp(StringView symbol)
: m_symbol(symbol) {}
~ExternOp() {}
OP_TYPE(EXTERN)
public:
StringView Format(int indent) const override
class ExternOp : public Op
{
StringBuilder sb;
sb.AppendIndent(indent);
sb << "EXTRN " << m_symbol.c_str();
return sb.view();
}
public:
const StringView& symbol() const { return m_symbol; }
private:
StringView m_symbol;
};
public:
ExternOp(StringView symbol)
: m_symbol(symbol) {}
~ExternOp() {}
// TODO: Make function return value (i.e. inhreit the OpValued class instead)
class FnOp : public Op
{
public:
FnOp(StringView name, const View<StringView>& params, Block&& block)
: m_name(name), m_params(params), m_body(block) {}
~FnOp() {}
OP_TYPE(FN)
public:
StringView Format(int indent) const override
{
StringBuilder sb;
sb.AppendIndent(indent);
sb << "LABEL " << m_name.c_str() << ':' << '\n';
sb << m_body.Format(indent);
return sb.view();
}
public:
const StringView& name() const { return m_name; }
const Block& body() const { return m_body; }
const View<StringView>& params() const { return m_params; }
private:
StringView m_name;
ValueView m_slots;
View<StringView> m_params;
Block m_body;
};
class LoadConstOp : public OpValued
{
public:
LoadConstOp(Value dest, long value)
: OpValued(dest), m_value(value) {}
~LoadConstOp() {}
OP_TYPE(LOAD_CONST)
public:
StringView Format(int indent) const override
{
StringBuilder sb;
sb.AppendIndent(indent);
sb << result().Format() << " = LOAD_CONST " << m_value;
return sb.view();
}
public:
long value() const { return m_value; }
private:
long m_value;
};
class LoadOp : public OpValued
{
public:
LoadOp(Value dest, StringView addr)
: OpValued(dest), m_addr(addr) {}
~LoadOp() {}
OP_TYPE(LOAD)
public:
StringView Format(int indent) const override
{
StringBuilder sb;
sb.AppendIndent(indent);
sb << result().Format() << " = LOAD \"" << m_addr.c_str() << "\"";
return sb.view();
}
public:
const StringView& addr() const { return m_addr; }
private:
StringView m_addr;
};
class StoreOp : public Op
{
public:
StoreOp(StringView addr, Value src)
: m_addr(addr), m_src(src) {}
~StoreOp() {}
OP_TYPE(STORE)
public:
StringView Format(int indent) const override
{
StringBuilder sb;
sb.AppendIndent(indent);
sb << "STORE \"" << m_addr.c_str() << "\", " << m_src.Format();
return sb.view();
}
public:
const StringView& addr() const { return m_addr; }
Value src() const { return m_src; }
private:
StringView m_addr;
Value m_src;
};
class AddOp : public OpValued
{
public:
AddOp(Value dest, Value lhs, Value rhs)
: OpValued(dest), m_lhs(lhs), m_rhs(rhs) {}
~AddOp() {}
OP_TYPE(ADD)
public:
StringView Format(int indent) const override
{
StringBuilder sb;
sb.AppendIndent(indent);
sb << result().Format() << " = ADD " << m_lhs.Format() << ", " << m_rhs.Format();
return sb.view();
}
public:
Value lhs() const { return m_lhs; }
Value rhs() const { return m_rhs; }
private:
Value m_lhs;
Value m_rhs;
};
class CallOp : public OpValued
{
public:
CallOp(Value dest, StringView callee, ValueView args)
: OpValued(dest), m_callee(callee), m_args(args) {}
~CallOp() {}
OP_TYPE(CALL)
public:
StringView Format(int indent) const override
{
StringBuilder sb;
for (size_t i = 0; i < m_args.size; ++i)
OP_TYPE(EXTERN)
public:
StringView Format(int indent) const override
{
StringBuilder sb;
sb.AppendIndent(indent);
sb << "PARAM " << m_args.data[i].Format() << '\n';
sb << "EXTRN " << m_symbol.c_str();
return sb.view();
}
sb.AppendIndent(indent);
sb << result().Format() << " = CALL " << m_callee.c_str();
return sb.view();
}
public:
const StringView& callee() const { return m_callee; }
const ValueView& args() const { return m_args; }
private:
StringView m_callee;
ValueView m_args;
};
public:
const StringView &symbol() const { return m_symbol; }
private:
StringView m_symbol;
};
// TODO: Make function return value (i.e. inhreit the OpValued class instead)
class FnOp : public Op
{
public:
FnOp(StringView name, const View<StringView> &params, Block &&block)
: m_name(name), m_params(params), m_body(block) {}
~FnOp() {}
OP_TYPE(FN)
public:
StringView Format(int indent) const override
{
StringBuilder sb;
sb.AppendIndent(indent);
sb << "LABEL " << m_name.c_str() << ':' << '\n';
sb << m_body.Format(indent);
return sb.view();
}
public:
const StringView &name() const { return m_name; }
const Block &body() const { return m_body; }
const View<StringView> &params() const { return m_params; }
private:
StringView m_name;
View<StringView> m_params;
Block m_body;
};
// Allocate slot on the stack for variable
// with the size of destination value,
// aka (dest.GetSize() will be used)
class AllocateOp : public OpValued
{
public:
AllocateOp(ValueHandle *dest, const ValueHandle::Type *typ)
: OpValued(dest), m_typ(new ValueHandle::Type(typ)) {}
~AllocateOp() {}
OP_TYPE(ALLOCATE)
public:
StringView Format(int indent) const override
{
StringBuilder sb;
sb.AppendIndent(indent);
sb << result()->Format() << " = ALLOCATE " << m_typ->Format();
return sb.view();
}
private:
ValueHandle::Type *m_typ;
};
// class LoadConstOp : public OpValued
// {
// public:
// LoadConstOp(ValueHandle *dest, long value)
// : OpValued(dest), m_value(value) {}
// ~LoadConstOp() {}
// OP_TYPE(LOAD_CONST)
// public:
// StringView Format(int indent) const override
// {
// StringBuilder sb;
// sb.AppendIndent(indent);
// sb << result()->Format() << " = LOAD_CONST " << m_value;
// return sb.view();
// }
// public:
// long value() const { return m_value; }
// private:
// long m_value;
// };
class LoadOp : public OpValued
{
public:
LoadOp(ValueHandle *dest, StringView addr)
: OpValued(dest), m_addr(addr) {}
~LoadOp() {}
OP_TYPE(LOAD)
public:
StringView Format(int indent) const override
{
StringBuilder sb;
sb.AppendIndent(indent);
sb << result()->Format() << " = LOAD \"" << m_addr.c_str() << "\"";
return sb.view();
}
public:
const StringView &addr() const { return m_addr; }
private:
StringView m_addr;
};
class StoreOp : public Op
{
public:
StoreOp(ValueHandle *src, Pointer *dst)
: m_dst(dst), m_src(src) {}
~StoreOp() {}
OP_TYPE(STORE)
public:
StringView Format(int indent) const override
{
StringBuilder sb;
sb.AppendIndent(indent);
sb << "STORE " << src()->Format() << ", " << dst()->Format();
return sb.view();
}
public:
const Pointer *dst() const { return m_dst; }
const ValueHandle *src() const { return m_src; }
private:
Pointer *m_dst;
ValueHandle *m_src;
};
class MathOp : public OpValued
{
public:
MathOp(ValueHandle *dest, ValueHandle *lhs, ValueHandle *rhs, OpType typ)
: OpValued(dest), m_lhs(lhs), m_rhs(rhs), m_typ(typ) {}
~MathOp() {}
OpType GetType() const override { return m_typ; }
private:
StringView FormatOperation() const
{
switch (m_typ)
{
case OpType::ADD:
return StringView("ADD");
case OpType::MUL:
return StringView("MUL");
case OpType::SUB:
return StringView("SUB");
case OpType::DIV:
return StringView("DIV");
}
assert(false && "unreachable");
}
public:
StringView Format(int indent) const override
{
StringBuilder sb;
sb.AppendIndent(indent);
sb << result()->Format() << " = " << FormatOperation() << " " << m_lhs->Format() << ", " << m_rhs->Format();
return sb.view();
}
public:
ValueHandle *lhs() const { return m_lhs; }
ValueHandle *rhs() const { return m_rhs; }
private:
ValueHandle *m_lhs;
ValueHandle *m_rhs;
OpType m_typ;
};
class CallOp : public OpValued
{
public:
CallOp(ValueHandle *dest, StringView callee, ValueView args)
: OpValued(dest), m_callee(callee), m_args(args) {}
~CallOp() {}
OP_TYPE(CALL)
public:
StringView Format(int indent) const override
{
StringBuilder sb;
for (size_t i = 0; i < m_args.size; ++i)
{
sb.AppendIndent(indent);
sb << "PARAM " << m_args.data[i]->Format() << '\n';
}
sb.AppendIndent(indent);
sb << result()->Format() << " = CALL " << m_callee.c_str();
return sb.view();
}
public:
const StringView &callee() const { return m_callee; }
const ValueView &args() const { return m_args; }
private:
StringView m_callee;
ValueView m_args;
};
} // namespace IR

View File

@@ -4,23 +4,161 @@
namespace IR
{
class Value
{
public:
Value(unsigned int id) : m_id(id) {}
Value() : m_id(0) {}
public:
StringView Format() const
class ValueHandle
{
auto sb = StringBuilder();
sb.AppendFormat("%%%d", m_id);
return sb.view();
}
private:
unsigned int m_id;
};
public:
static constexpr uint32_t kNoId = 0;
using ValueView = View<Value>;
using ValueBuilder = Builder<Value>;
struct Type
{
enum class Kind
{
Void,
Int,
Ptr
};
Kind kind;
uint32_t bits;
public:
Type(Kind kind_) : kind(kind_), bits(32) {}
Type(Kind kind_, uint32_t bits_) : kind(kind_), bits(bits_) {}
Type(const Type *typ) : kind(typ->kind), bits(32) {}
public:
StringView Format() const
{
switch (kind)
{
case Kind::Int:
{
StringBuilder sb;
sb.AppendFormat("i%d", bits);
return sb.view();
}
case Kind::Void:
{
return StringView("void");
}
case Kind::Ptr:
{
return StringView("ptr");
}
}
assert(false && "unreachable");
return StringView();
}
};
public:
ValueHandle(uint32_t id = kNoId) : m_id(id) {}
bool HasId() const { return m_id != kNoId; }
uint32_t GetId() const { return m_id; }
public:
virtual StringView Format() const
{
auto sb = StringBuilder();
if (HasId())
sb.AppendFormat("%%%d", m_id);
else
sb.Extend("<?>");
return sb.view();
}
public:
virtual const Type *GetType() const = 0;
private:
uint32_t m_id;
};
class ConstantInt : public ValueHandle
{
public:
ConstantInt(unsigned int id, long value)
: ValueHandle(id), m_value(value)
{
m_type = new Type{Type::Kind::Int};
}
const Type *GetType() const override { return m_type; }
public:
virtual StringView Format() const override
{
auto sb = StringBuilder();
sb.AppendFormat("%s %d", m_type->Format().c_str(), m_value);
return sb.view();
}
public:
long GetValue() const { return m_value; }
private:
long m_value;
Type *m_type;
};
class Pointer : public ValueHandle
{
public:
Pointer(unsigned int id)
: ValueHandle(id)
{
m_type = new Type{Type::Kind::Ptr};
}
const Type *GetType() const override { return m_type; }
public:
virtual StringView Format() const override
{
auto sb = StringBuilder();
sb.AppendFormat("%s %%%d", m_type->Format().c_str(), GetId());
return sb.view();
}
private:
Type *m_type;
};
// TODO: Remove void value and use void type only
class Void : public ValueHandle
{
public:
Void(unsigned int id)
: ValueHandle(id)
{
m_type = new Type{Type::Kind::Void};
}
const Type *GetType() const override { return m_type; }
public:
virtual StringView Format() const override
{
return m_type->Format();
}
private:
Type *m_type;
};
class Instruction : public ValueHandle
{
public:
Instruction(unsigned int id, const Type *typ)
: ValueHandle(id), m_type(new Type(typ)) {}
const Type *GetType() const override { return m_type; }
private:
Type *m_type;
};
using ValueView = View<ValueHandle *>;
using ValueBuilder = Builder<ValueHandle *>;
} // namespace IR