394 lines
9.2 KiB
C++
394 lines
9.2 KiB
C++
#pragma once
|
|
#include <cstring>
|
|
#include <cassert>
|
|
#include <vector>
|
|
#include "lexer.hpp"
|
|
|
|
enum class NodeType
|
|
{
|
|
Expression = 0,
|
|
// TODO: abstract "Literal" node type
|
|
IntLiteral,
|
|
Extern,
|
|
FnDecl,
|
|
FnCall,
|
|
Variable,
|
|
VarDecl,
|
|
Compound,
|
|
Program,
|
|
COUNT_NODES,
|
|
};
|
|
|
|
class Node
|
|
{
|
|
public:
|
|
virtual NodeType GetType() const = 0;
|
|
virtual ~Node() {}
|
|
};
|
|
|
|
#define NODE_TYPE(x) \
|
|
NodeType GetType() const override { return NodeType::x; }
|
|
|
|
|
|
class ExpressionNode : public Node
|
|
{
|
|
public:
|
|
enum class Operator
|
|
{
|
|
Plus = 0,
|
|
Minus,
|
|
Divide,
|
|
Multiply,
|
|
COUNT_OPERATORS,
|
|
};
|
|
public:
|
|
ExpressionNode(Node* left, Node* right, Operator op)
|
|
: m_left(left), m_right(right), m_op(op) {}
|
|
~ExpressionNode() override {
|
|
delete m_left;
|
|
delete m_right;
|
|
}
|
|
|
|
NODE_TYPE(Expression)
|
|
public:
|
|
Node* left() const { return m_left; }
|
|
Node* right() const { return m_right; }
|
|
Operator op() const { return m_op; }
|
|
private:
|
|
Node* m_left;
|
|
Node* m_right;
|
|
Operator m_op;
|
|
};
|
|
|
|
// TODO: Maybe just LiteralNode with double or int literals support
|
|
class IntLiteralNode : public Node
|
|
{
|
|
public:
|
|
IntLiteralNode(long value)
|
|
: m_int_value(value) {}
|
|
~IntLiteralNode() override {}
|
|
|
|
NODE_TYPE(IntLiteral)
|
|
public:
|
|
long integer() const { return m_int_value; }
|
|
private:
|
|
long m_int_value;
|
|
};
|
|
|
|
class ExternNode : public Node
|
|
{
|
|
public:
|
|
// TODO: support multiple extern symbols
|
|
ExternNode(char* symbol)
|
|
: m_symbol(symbol) {}
|
|
~ExternNode() override {
|
|
delete m_symbol;
|
|
}
|
|
|
|
NODE_TYPE(Extern)
|
|
private:
|
|
char* m_symbol;
|
|
};
|
|
|
|
class FnDeclNode : public Node
|
|
{
|
|
public:
|
|
// TODO: support parameters
|
|
FnDeclNode(char* name, Node* body)
|
|
: m_name(name), m_body(body) {}
|
|
~FnDeclNode() override {
|
|
delete m_name;
|
|
delete m_body;
|
|
}
|
|
|
|
NODE_TYPE(FnDecl)
|
|
private:
|
|
char* m_name;
|
|
Node* m_body;
|
|
};
|
|
|
|
class FnCallNode : public Node
|
|
{
|
|
public:
|
|
// TODO: support multiple arguments
|
|
FnCallNode(char* name, Node* arg)
|
|
: m_name(name), m_arg(arg) {}
|
|
~FnCallNode() override {
|
|
delete m_name;
|
|
delete m_arg;
|
|
}
|
|
|
|
NODE_TYPE(FnCall)
|
|
private:
|
|
char* m_name;
|
|
Node* m_arg;
|
|
};
|
|
|
|
class VariableNode : public Node
|
|
{
|
|
public:
|
|
VariableNode(char* name)
|
|
: m_name(name) {}
|
|
~VariableNode() override {
|
|
delete m_name;
|
|
}
|
|
|
|
NODE_TYPE(Variable)
|
|
private:
|
|
char* m_name;
|
|
};
|
|
|
|
class VarDeclNode : public Node
|
|
{
|
|
public:
|
|
VarDeclNode(char* name, Node* value)
|
|
: m_name(name), m_value(value) {}
|
|
~VarDeclNode() override {
|
|
delete m_name;
|
|
delete m_value;
|
|
}
|
|
|
|
NODE_TYPE(VarDecl)
|
|
private:
|
|
char* m_name;
|
|
Node* m_value;
|
|
};
|
|
|
|
class CompoundNode : public Node
|
|
{
|
|
public:
|
|
CompoundNode() = default;
|
|
|
|
NODE_TYPE(Compound)
|
|
|
|
// --- Iteration ---
|
|
auto begin() { return m_nodes.begin(); }
|
|
auto end() { return m_nodes.end(); }
|
|
auto begin() const { return m_nodes.begin(); }
|
|
auto end() const { return m_nodes.end(); }
|
|
|
|
// --- Access by index ---
|
|
Node* operator[](size_t i) { return m_nodes[i]; }
|
|
const Node* operator[](size_t i) const { return m_nodes[i]; }
|
|
|
|
Node* at(size_t i) { return m_nodes.at(i); }
|
|
const Node* at(size_t i) const { return m_nodes.at(i); }
|
|
|
|
// --- Modifiers ---
|
|
void addNode(Node* n) { m_nodes.push_back(n); }
|
|
void removeNode(size_t idx) { m_nodes.erase(m_nodes.begin() + idx); }
|
|
|
|
// If you want full expose for iteration but not modification
|
|
const std::vector<Node*>& nodes() const { return m_nodes; }
|
|
|
|
size_t size() const { return m_nodes.size(); }
|
|
bool empty() const { return m_nodes.empty(); }
|
|
private:
|
|
std::vector<Node*> m_nodes;
|
|
};
|
|
|
|
class ProgramNode : public Node
|
|
{
|
|
public:
|
|
ProgramNode() = default;
|
|
|
|
NODE_TYPE(Program)
|
|
public:
|
|
void PushFunction(FnDeclNode* fn)
|
|
{
|
|
m_funcs.push_back(fn);
|
|
}
|
|
|
|
void PushExtern(ExternNode* extrn)
|
|
{
|
|
m_externs.push_back(extrn);
|
|
}
|
|
private:
|
|
std::vector<FnDeclNode*> m_funcs;
|
|
std::vector<ExternNode*> m_externs;
|
|
};
|
|
|
|
|
|
class AstParser
|
|
{
|
|
public:
|
|
AstParser(Lexer* lexer)
|
|
: m_lexer(lexer) {}
|
|
public:
|
|
ExternNode* ParseExtern()
|
|
{
|
|
m_lexer->NextExpect(TokenType::Id);
|
|
return new ExternNode(m_lexer->token().string);
|
|
}
|
|
|
|
FnDeclNode* ParseFnDecl()
|
|
{
|
|
// Function Declaration
|
|
m_lexer->NextExpect(TokenType::Id);
|
|
char *name = strdup(m_lexer->token().string);
|
|
m_lexer->NextExpect('(');
|
|
// TODO: parse parameters
|
|
m_lexer->NextExpect(')');
|
|
m_lexer->NextExpect('{');
|
|
auto compound = new CompoundNode();
|
|
while (m_lexer->seek_token()->token != '}')
|
|
{
|
|
compound->addNode(ParseStatement());
|
|
}
|
|
m_lexer->NextExpect('}');
|
|
return new FnDeclNode(name, compound);
|
|
}
|
|
|
|
FnCallNode* ParseFnCall(char* name)
|
|
{
|
|
// m_lexer->NextExpect(TokenType::Id);
|
|
// char* name = strdup(m_lexer->token().string);
|
|
m_lexer->NextExpect('(');
|
|
Node* arg = ParseExpression();
|
|
m_lexer->NextExpect(')');
|
|
return new FnCallNode(name, arg);
|
|
}
|
|
|
|
Node* ParseFactor()
|
|
{
|
|
auto token = m_lexer->seek_token();
|
|
|
|
switch (token->token)
|
|
{
|
|
case TokenType::IntLiteral: // integer
|
|
{
|
|
m_lexer->NextExpect(TokenType::IntLiteral);
|
|
auto node = new IntLiteralNode(m_lexer->token().int_number);
|
|
return node;
|
|
}
|
|
case TokenType::Id: // variable name or function call
|
|
{
|
|
m_lexer->NextExpect(TokenType::Id);
|
|
char *name = strdup(m_lexer->token().string);
|
|
token = m_lexer->seek_token();
|
|
if (token->token == '(')
|
|
{
|
|
return ParseFnCall(name);
|
|
}
|
|
return new VariableNode(name);
|
|
}
|
|
default:
|
|
fprintf(stderr, "%s:%d:%d: ERROR: unexpected token while parsing %ld\n", m_lexer->filename(), token->line_number, token->offset_start, token->token);
|
|
Exit(1);
|
|
break;
|
|
}
|
|
|
|
assert(0 && "unreachable");
|
|
}
|
|
|
|
Node* ParseTerm()
|
|
{
|
|
auto t = ParseFactor();
|
|
|
|
for (auto op = m_lexer->seek_token(); is_one_of(op->token, '/', '*'); op = m_lexer->seek_token())
|
|
{
|
|
m_lexer->NextToken();
|
|
ExpressionNode::Operator eop;
|
|
assert((int)ExpressionNode::Operator::COUNT_OPERATORS == 4 && "some operators may not be handled");
|
|
switch((char)op->token)
|
|
{
|
|
case '/':
|
|
eop = ExpressionNode::Operator::Divide;
|
|
break;
|
|
case '*':
|
|
eop = ExpressionNode::Operator::Multiply;
|
|
break;
|
|
default:
|
|
assert(false && "should be unreachable");
|
|
break;
|
|
}
|
|
auto expr = new ExpressionNode(t, ParseTerm(), eop);
|
|
t = expr;
|
|
}
|
|
|
|
return t;
|
|
}
|
|
|
|
Node* ParseExpression()
|
|
{
|
|
auto t = ParseTerm();
|
|
|
|
for (auto op = m_lexer->seek_token(); is_one_of(op->token, '+', '-'); op = m_lexer->seek_token())
|
|
{
|
|
m_lexer->NextToken();
|
|
ExpressionNode::Operator eop;
|
|
assert((int)ExpressionNode::Operator::COUNT_OPERATORS == 4 && "some operators may not be handled");
|
|
switch((char)op->token)
|
|
{
|
|
case '+':
|
|
eop = ExpressionNode::Operator::Plus;
|
|
break;
|
|
case '-':
|
|
eop = ExpressionNode::Operator::Minus;
|
|
break;
|
|
default:
|
|
assert(false && "should be unreachable");
|
|
break;
|
|
}
|
|
auto expr = new ExpressionNode(t, ParseTerm(), eop);
|
|
t = expr;
|
|
}
|
|
|
|
return t;
|
|
}
|
|
|
|
VarDeclNode* ParseVarDecl()
|
|
{
|
|
m_lexer->NextExpect(TokenType::Local);
|
|
m_lexer->NextExpect(TokenType::Id);
|
|
char *name = strdup(m_lexer->token().string);
|
|
m_lexer->NextExpect('=');
|
|
Node* value = ParseExpression();
|
|
return new VarDeclNode(name, value);
|
|
}
|
|
|
|
Node* ParseStatement()
|
|
{
|
|
auto token = m_lexer->seek_token();
|
|
// TODO: proper error handling
|
|
assert(token != nullptr && "next token should be available");
|
|
switch(token->token)
|
|
{
|
|
case TokenType::Local: return ParseVarDecl();
|
|
default: return ParseExpression();
|
|
}
|
|
|
|
assert(0 && "unreachable");
|
|
return nullptr;
|
|
}
|
|
|
|
ProgramNode* Parse()
|
|
{
|
|
auto program = new ProgramNode;
|
|
|
|
while (m_lexer->NextToken())
|
|
{
|
|
auto token = m_lexer->token();
|
|
switch(token.token)
|
|
{
|
|
case TokenType::Extern: program->PushExtern(ParseExtern()); break;
|
|
case TokenType::Fn: program->PushFunction(ParseFnDecl()); break;
|
|
default: {
|
|
fprintf(stderr, "%s:%d:%d: ERROR: unexpected token while parsing %ld\n", m_lexer->filename(), token.line_number, token.offset_start, token.token);
|
|
Exit(1);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return program;
|
|
}
|
|
private:
|
|
void Exit(int status)
|
|
{
|
|
std::exit(status);
|
|
}
|
|
private:
|
|
Lexer* m_lexer;
|
|
};
|