#pragma once #include #include #include #include "lexer.hpp" enum class NodeType { Expression = 0, // TODO: abstract "Literal" node type IntLiteral, Extern, FnDecl, FnCall, Variable, VarDecl, Compound, Program, COUNT_NODES, }; class Node { public: virtual NodeType GetType() const = 0; virtual ~Node() {} }; #define NODE_TYPE(x) \ NodeType GetType() const override { return NodeType::x; } class ExpressionNode : public Node { public: enum class Operator { Plus = 0, Minus, Divide, Multiply, COUNT_OPERATORS, }; public: ExpressionNode(Node* left, Node* right, Operator op) : m_left(left), m_right(right), m_op(op) {} ~ExpressionNode() override { delete m_left; delete m_right; } NODE_TYPE(Expression) public: Node* left() const { return m_left; } Node* right() const { return m_right; } Operator op() const { return m_op; } private: Node* m_left; Node* m_right; Operator m_op; }; // TODO: Maybe just LiteralNode with double or int literals support class IntLiteralNode : public Node { public: IntLiteralNode(long value) : m_int_value(value) {} ~IntLiteralNode() override {} NODE_TYPE(IntLiteral) public: long integer() const { return m_int_value; } private: long m_int_value; }; class ExternNode : public Node { public: // TODO: support multiple extern symbols ExternNode(char* symbol) : m_symbol(symbol) {} ~ExternNode() override { delete m_symbol; } NODE_TYPE(Extern) private: char* m_symbol; }; class FnDeclNode : public Node { public: // TODO: support parameters FnDeclNode(char* name, Node* body) : m_name(name), m_body(body) {} ~FnDeclNode() override { delete m_name; delete m_body; } NODE_TYPE(FnDecl) private: char* m_name; Node* m_body; }; class FnCallNode : public Node { public: // TODO: support multiple arguments FnCallNode(char* name, Node* arg) : m_name(name), m_arg(arg) {} ~FnCallNode() override { delete m_name; delete m_arg; } NODE_TYPE(FnCall) private: char* m_name; Node* m_arg; }; class VariableNode : public Node { public: VariableNode(char* name) : m_name(name) {} ~VariableNode() override { delete m_name; } NODE_TYPE(Variable) private: char* m_name; }; class VarDeclNode : public Node { public: VarDeclNode(char* name, Node* value) : m_name(name), m_value(value) {} ~VarDeclNode() override { delete m_name; delete m_value; } NODE_TYPE(VarDecl) private: char* m_name; Node* m_value; }; class CompoundNode : public Node { public: CompoundNode() = default; NODE_TYPE(Compound) // --- Iteration --- auto begin() { return m_nodes.begin(); } auto end() { return m_nodes.end(); } auto begin() const { return m_nodes.begin(); } auto end() const { return m_nodes.end(); } // --- Access by index --- Node* operator[](size_t i) { return m_nodes[i]; } const Node* operator[](size_t i) const { return m_nodes[i]; } Node* at(size_t i) { return m_nodes.at(i); } const Node* at(size_t i) const { return m_nodes.at(i); } // --- Modifiers --- void addNode(Node* n) { m_nodes.push_back(n); } void removeNode(size_t idx) { m_nodes.erase(m_nodes.begin() + idx); } // If you want full expose for iteration but not modification const std::vector& nodes() const { return m_nodes; } size_t size() const { return m_nodes.size(); } bool empty() const { return m_nodes.empty(); } private: std::vector m_nodes; }; class ProgramNode : public Node { public: ProgramNode() = default; NODE_TYPE(Program) public: void PushFunction(FnDeclNode* fn) { m_funcs.push_back(fn); } void PushExtern(ExternNode* extrn) { m_externs.push_back(extrn); } private: std::vector m_funcs; std::vector m_externs; }; class AstParser { public: AstParser(Lexer* lexer) : m_lexer(lexer) {} public: ExternNode* ParseExtern() { m_lexer->NextExpect(TokenType::Id); return new ExternNode(m_lexer->token().string); } FnDeclNode* ParseFnDecl() { // Function Declaration m_lexer->NextExpect(TokenType::Id); char *name = strdup(m_lexer->token().string); m_lexer->NextExpect('('); // TODO: parse parameters m_lexer->NextExpect(')'); m_lexer->NextExpect('{'); auto compound = new CompoundNode(); while (m_lexer->seek_token()->token != '}') { compound->addNode(ParseStatement()); } m_lexer->NextExpect('}'); return new FnDeclNode(name, compound); } FnCallNode* ParseFnCall(char* name) { // m_lexer->NextExpect(TokenType::Id); // char* name = strdup(m_lexer->token().string); m_lexer->NextExpect('('); Node* arg = ParseExpression(); m_lexer->NextExpect(')'); return new FnCallNode(name, arg); } Node* ParseFactor() { auto token = m_lexer->seek_token(); switch (token->token) { case TokenType::IntLiteral: // integer { m_lexer->NextExpect(TokenType::IntLiteral); auto node = new IntLiteralNode(m_lexer->token().int_number); return node; } case TokenType::Id: // variable name or function call { m_lexer->NextExpect(TokenType::Id); char *name = strdup(m_lexer->token().string); token = m_lexer->seek_token(); if (token->token == '(') { return ParseFnCall(name); } return new VariableNode(name); } default: fprintf(stderr, "%s:%d:%d: ERROR: unexpected token while parsing %ld\n", m_lexer->filename(), token->line_number, token->offset_start, token->token); Exit(1); break; } assert(0 && "unreachable"); } Node* ParseTerm() { auto t = ParseFactor(); for (auto op = m_lexer->seek_token(); is_one_of(op->token, '/', '*'); op = m_lexer->seek_token()) { m_lexer->NextToken(); ExpressionNode::Operator eop; assert((int)ExpressionNode::Operator::COUNT_OPERATORS == 4 && "some operators may not be handled"); switch((char)op->token) { case '/': eop = ExpressionNode::Operator::Divide; break; case '*': eop = ExpressionNode::Operator::Multiply; break; default: assert(false && "should be unreachable"); break; } auto expr = new ExpressionNode(t, ParseTerm(), eop); t = expr; } return t; } Node* ParseExpression() { auto t = ParseTerm(); for (auto op = m_lexer->seek_token(); is_one_of(op->token, '+', '-'); op = m_lexer->seek_token()) { m_lexer->NextToken(); ExpressionNode::Operator eop; assert((int)ExpressionNode::Operator::COUNT_OPERATORS == 4 && "some operators may not be handled"); switch((char)op->token) { case '+': eop = ExpressionNode::Operator::Plus; break; case '-': eop = ExpressionNode::Operator::Minus; break; default: assert(false && "should be unreachable"); break; } auto expr = new ExpressionNode(t, ParseTerm(), eop); t = expr; } return t; } VarDeclNode* ParseVarDecl() { m_lexer->NextExpect(TokenType::Local); m_lexer->NextExpect(TokenType::Id); char *name = strdup(m_lexer->token().string); m_lexer->NextExpect('='); Node* value = ParseExpression(); return new VarDeclNode(name, value); } Node* ParseStatement() { auto token = m_lexer->seek_token(); // TODO: proper error handling assert(token != nullptr && "next token should be available"); switch(token->token) { case TokenType::Local: return ParseVarDecl(); default: return ParseExpression(); } assert(0 && "unreachable"); return nullptr; } ProgramNode* Parse() { auto program = new ProgramNode; while (m_lexer->NextToken()) { auto token = m_lexer->token(); switch(token.token) { case TokenType::Extern: program->PushExtern(ParseExtern()); break; case TokenType::Fn: program->PushFunction(ParseFnDecl()); break; default: { fprintf(stderr, "%s:%d:%d: ERROR: unexpected token while parsing %ld\n", m_lexer->filename(), token.line_number, token.offset_start, token.token); Exit(1); break; } } } return program; } private: void Exit(int status) { std::exit(status); } private: Lexer* m_lexer; };