feat: basic parsing of example.rx is ready
This commit is contained in:
295
include/ast.hpp
295
include/ast.hpp
@ -1,14 +1,21 @@
|
||||
#pragma once
|
||||
#include <cstring>
|
||||
#include <cassert>
|
||||
#include <vector>
|
||||
#include "lexer.hpp"
|
||||
|
||||
enum NodeType
|
||||
enum class NodeType
|
||||
{
|
||||
Expression = 0,
|
||||
// TODO: abstract "Literal" node type
|
||||
IntLiteral,
|
||||
Extern,
|
||||
FnDecl,
|
||||
FnCall,
|
||||
Variable,
|
||||
VarDecl,
|
||||
Compound,
|
||||
Program,
|
||||
COUNT_NODES,
|
||||
};
|
||||
|
||||
@ -20,14 +27,23 @@ public:
|
||||
};
|
||||
|
||||
#define NODE_TYPE(x) \
|
||||
NodeType GetType() const override { return (x); }
|
||||
NodeType GetType() const override { return NodeType::x; }
|
||||
|
||||
|
||||
class ExpressionNode : public Node
|
||||
{
|
||||
public:
|
||||
ExpressionNode()
|
||||
: m_left(nullptr), m_right(nullptr) {}
|
||||
enum class Operator
|
||||
{
|
||||
Plus = 0,
|
||||
Minus,
|
||||
Divide,
|
||||
Multiply,
|
||||
COUNT_OPERATORS,
|
||||
};
|
||||
public:
|
||||
ExpressionNode(Node* left, Node* right, Operator op)
|
||||
: m_left(left), m_right(right), m_op(op) {}
|
||||
~ExpressionNode() override {
|
||||
delete m_left;
|
||||
delete m_right;
|
||||
@ -37,9 +53,26 @@ public:
|
||||
public:
|
||||
Node* left() const { return m_left; }
|
||||
Node* right() const { return m_right; }
|
||||
Operator op() const { return m_op; }
|
||||
private:
|
||||
Node* m_left;
|
||||
Node* m_right;
|
||||
Operator m_op;
|
||||
};
|
||||
|
||||
// TODO: Maybe just LiteralNode with double or int literals support
|
||||
class IntLiteralNode : public Node
|
||||
{
|
||||
public:
|
||||
IntLiteralNode(long value)
|
||||
: m_int_value(value) {}
|
||||
~IntLiteralNode() override {}
|
||||
|
||||
NODE_TYPE(IntLiteral)
|
||||
public:
|
||||
long integer() const { return m_int_value; }
|
||||
private:
|
||||
long m_int_value;
|
||||
};
|
||||
|
||||
class ExternNode : public Node
|
||||
@ -91,6 +124,20 @@ private:
|
||||
Node* m_arg;
|
||||
};
|
||||
|
||||
class VariableNode : public Node
|
||||
{
|
||||
public:
|
||||
VariableNode(char* name)
|
||||
: m_name(name) {}
|
||||
~VariableNode() override {
|
||||
delete m_name;
|
||||
}
|
||||
|
||||
NODE_TYPE(Variable)
|
||||
private:
|
||||
char* m_name;
|
||||
};
|
||||
|
||||
class VarDeclNode : public Node
|
||||
{
|
||||
public:
|
||||
@ -107,47 +154,239 @@ private:
|
||||
Node* m_value;
|
||||
};
|
||||
|
||||
class CompoundNode : public Node
|
||||
{
|
||||
public:
|
||||
CompoundNode() = default;
|
||||
|
||||
NODE_TYPE(Compound)
|
||||
|
||||
// --- Iteration ---
|
||||
auto begin() { return m_nodes.begin(); }
|
||||
auto end() { return m_nodes.end(); }
|
||||
auto begin() const { return m_nodes.begin(); }
|
||||
auto end() const { return m_nodes.end(); }
|
||||
|
||||
// --- Access by index ---
|
||||
Node* operator[](size_t i) { return m_nodes[i]; }
|
||||
const Node* operator[](size_t i) const { return m_nodes[i]; }
|
||||
|
||||
Node* at(size_t i) { return m_nodes.at(i); }
|
||||
const Node* at(size_t i) const { return m_nodes.at(i); }
|
||||
|
||||
// --- Modifiers ---
|
||||
void addNode(Node* n) { m_nodes.push_back(n); }
|
||||
void removeNode(size_t idx) { m_nodes.erase(m_nodes.begin() + idx); }
|
||||
|
||||
// If you want full expose for iteration but not modification
|
||||
const std::vector<Node*>& nodes() const { return m_nodes; }
|
||||
|
||||
size_t size() const { return m_nodes.size(); }
|
||||
bool empty() const { return m_nodes.empty(); }
|
||||
private:
|
||||
std::vector<Node*> m_nodes;
|
||||
};
|
||||
|
||||
class ProgramNode : public Node
|
||||
{
|
||||
public:
|
||||
ProgramNode() = default;
|
||||
|
||||
NODE_TYPE(Program)
|
||||
public:
|
||||
void PushFunction(FnDeclNode* fn)
|
||||
{
|
||||
m_funcs.push_back(fn);
|
||||
}
|
||||
|
||||
void PushExtern(ExternNode* extrn)
|
||||
{
|
||||
m_externs.push_back(extrn);
|
||||
}
|
||||
private:
|
||||
std::vector<FnDeclNode*> m_funcs;
|
||||
std::vector<ExternNode*> m_externs;
|
||||
};
|
||||
|
||||
|
||||
class AstParser
|
||||
{
|
||||
public:
|
||||
AstParser(Lexer* lexer)
|
||||
: m_lexer(lexer) {}
|
||||
public:
|
||||
ExternNode* ParseExtern()
|
||||
{
|
||||
m_lexer->NextExpect(TokenType::Id);
|
||||
return new ExternNode(m_lexer->token().string);
|
||||
}
|
||||
|
||||
FnDeclNode* ParseFnDecl()
|
||||
{
|
||||
// Function Declaration
|
||||
m_lexer->NextExpect(TokenType::Id);
|
||||
char *name = strdup(m_lexer->token().string);
|
||||
m_lexer->NextExpect('(');
|
||||
// TODO: parse parameters
|
||||
m_lexer->NextExpect(')');
|
||||
m_lexer->NextExpect('{');
|
||||
auto compound = new CompoundNode();
|
||||
while (m_lexer->seek_token()->token != '}')
|
||||
{
|
||||
compound->addNode(ParseStatement());
|
||||
}
|
||||
m_lexer->NextExpect('}');
|
||||
return new FnDeclNode(name, compound);
|
||||
}
|
||||
|
||||
FnCallNode* ParseFnCall(char* name)
|
||||
{
|
||||
// m_lexer->NextExpect(TokenType::Id);
|
||||
// char* name = strdup(m_lexer->token().string);
|
||||
m_lexer->NextExpect('(');
|
||||
Node* arg = ParseExpression();
|
||||
m_lexer->NextExpect(')');
|
||||
return new FnCallNode(name, arg);
|
||||
}
|
||||
|
||||
Node* ParseFactor()
|
||||
{
|
||||
auto token = m_lexer->seek_token();
|
||||
|
||||
switch (token->token)
|
||||
{
|
||||
case TokenType::IntLiteral: // integer
|
||||
{
|
||||
m_lexer->NextExpect(TokenType::IntLiteral);
|
||||
auto node = new IntLiteralNode(m_lexer->token().int_number);
|
||||
return node;
|
||||
}
|
||||
case TokenType::Id: // variable name or function call
|
||||
{
|
||||
m_lexer->NextExpect(TokenType::Id);
|
||||
char *name = strdup(m_lexer->token().string);
|
||||
token = m_lexer->seek_token();
|
||||
if (token->token == '(')
|
||||
{
|
||||
return ParseFnCall(name);
|
||||
}
|
||||
return new VariableNode(name);
|
||||
}
|
||||
default:
|
||||
fprintf(stderr, "%s:%d:%d: ERROR: unexpected token while parsing %ld\n", m_lexer->filename(), token->line_number, token->offset_start, token->token);
|
||||
Exit(1);
|
||||
break;
|
||||
}
|
||||
|
||||
assert(0 && "unreachable");
|
||||
}
|
||||
|
||||
Node* ParseTerm()
|
||||
{
|
||||
auto t = ParseFactor();
|
||||
|
||||
for (auto op = m_lexer->seek_token(); is_one_of(op->token, '/', '*'); op = m_lexer->seek_token())
|
||||
{
|
||||
m_lexer->NextToken();
|
||||
ExpressionNode::Operator eop;
|
||||
assert((int)ExpressionNode::Operator::COUNT_OPERATORS == 4 && "some operators may not be handled");
|
||||
switch((char)op->token)
|
||||
{
|
||||
case '/':
|
||||
eop = ExpressionNode::Operator::Divide;
|
||||
break;
|
||||
case '*':
|
||||
eop = ExpressionNode::Operator::Multiply;
|
||||
break;
|
||||
default:
|
||||
assert(false && "should be unreachable");
|
||||
break;
|
||||
}
|
||||
auto expr = new ExpressionNode(t, ParseTerm(), eop);
|
||||
t = expr;
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
Node* ParseExpression()
|
||||
{
|
||||
auto t = ParseTerm();
|
||||
|
||||
for (auto op = m_lexer->seek_token(); is_one_of(op->token, '+', '-'); op = m_lexer->seek_token())
|
||||
{
|
||||
m_lexer->NextToken();
|
||||
ExpressionNode::Operator eop;
|
||||
assert((int)ExpressionNode::Operator::COUNT_OPERATORS == 4 && "some operators may not be handled");
|
||||
switch((char)op->token)
|
||||
{
|
||||
case '+':
|
||||
eop = ExpressionNode::Operator::Plus;
|
||||
break;
|
||||
case '-':
|
||||
eop = ExpressionNode::Operator::Minus;
|
||||
break;
|
||||
default:
|
||||
assert(false && "should be unreachable");
|
||||
break;
|
||||
}
|
||||
auto expr = new ExpressionNode(t, ParseTerm(), eop);
|
||||
t = expr;
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
VarDeclNode* ParseVarDecl()
|
||||
{
|
||||
m_lexer->NextExpect(TokenType::Local);
|
||||
m_lexer->NextExpect(TokenType::Id);
|
||||
char *name = strdup(m_lexer->token().string);
|
||||
m_lexer->NextExpect('=');
|
||||
Node* value = ParseExpression();
|
||||
return new VarDeclNode(name, value);
|
||||
}
|
||||
|
||||
Node* ParseStatement()
|
||||
{
|
||||
auto token = m_lexer->seek_token();
|
||||
// TODO: proper error handling
|
||||
assert(token != nullptr && "next token should be available");
|
||||
switch(token->token)
|
||||
{
|
||||
case TokenType::Local: return ParseVarDecl();
|
||||
default: return ParseExpression();
|
||||
}
|
||||
|
||||
assert(0 && "unreachable");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ProgramNode* Parse()
|
||||
{
|
||||
auto program = new ProgramNode;
|
||||
|
||||
while (m_lexer->NextToken())
|
||||
{
|
||||
auto token = m_lexer->token();
|
||||
switch(token.token)
|
||||
{
|
||||
case Id:
|
||||
{
|
||||
if (strcmp(token.string, "extern") == 0)
|
||||
{
|
||||
// Extern
|
||||
m_lexer->Eat(Id);
|
||||
return new ExternNode(m_lexer->token().string);
|
||||
}
|
||||
else if (strcmp(token.string, "fn") == 0)
|
||||
{
|
||||
// Function Declaration
|
||||
m_lexer->Eat(Id);
|
||||
char* name = strdup(m_lexer->token().string);
|
||||
m_lexer->Eat('(');
|
||||
// TODO: parse parameters
|
||||
m_lexer->Eat(')');
|
||||
m_lexer->Eat('{');
|
||||
// TODO: parse function body
|
||||
m_lexer->Eat('}');
|
||||
return new FnDeclNode(name, nullptr);
|
||||
}
|
||||
case TokenType::Extern: program->PushExtern(ParseExtern()); break;
|
||||
case TokenType::Fn: program->PushFunction(ParseFnDecl()); break;
|
||||
default: {
|
||||
fprintf(stderr, "%s:%d:%d: ERROR: unexpected token while parsing %ld\n", m_lexer->filename(), token.line_number, token.offset_start, token.token);
|
||||
Exit(1);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: report parse error
|
||||
return nullptr;
|
||||
return program;
|
||||
}
|
||||
private:
|
||||
void Exit(int status)
|
||||
{
|
||||
std::exit(status);
|
||||
}
|
||||
private:
|
||||
Lexer* m_lexer;
|
||||
|
||||
@ -1,18 +1,51 @@
|
||||
#pragma once
|
||||
#include <iostream>
|
||||
#include <cstring>
|
||||
#include <cctype>
|
||||
|
||||
#include "string.hpp"
|
||||
|
||||
enum TokenType
|
||||
enum class TokenType
|
||||
{
|
||||
Eof = 256,
|
||||
Id,
|
||||
IntLiteral,
|
||||
|
||||
Extern,
|
||||
Fn,
|
||||
Local,
|
||||
|
||||
Unknown,
|
||||
};
|
||||
|
||||
template <typename... Ts>
|
||||
inline bool is_one_of(TokenType t, Ts... ts) {
|
||||
return ((t == ts) || ...);
|
||||
}
|
||||
|
||||
// operator== TokenType == char
|
||||
inline bool operator==(TokenType t, char c)
|
||||
{
|
||||
return static_cast<char>(t) == c;
|
||||
}
|
||||
|
||||
// operator== char == TokenType
|
||||
inline bool operator==(char c, TokenType t)
|
||||
{
|
||||
return t == c; // reuse the function above
|
||||
}
|
||||
|
||||
// operator!=
|
||||
inline bool operator!=(TokenType t, char c)
|
||||
{
|
||||
return !(t == c);
|
||||
}
|
||||
|
||||
inline bool operator!=(char c, TokenType t)
|
||||
{
|
||||
return !(t == c);
|
||||
}
|
||||
|
||||
struct Token
|
||||
{
|
||||
TokenType token;
|
||||
@ -26,13 +59,32 @@ public:
|
||||
Token(TokenType t) : token(t) {}
|
||||
Token(TokenType t, long lnumber, long soffset, long eoffset)
|
||||
: token(t), line_number(lnumber), offset_start(soffset), offset_end(eoffset) {}
|
||||
Token() : token(Unknown) {}
|
||||
Token() : token(TokenType::Unknown) {}
|
||||
};
|
||||
|
||||
class Lexer
|
||||
{
|
||||
public:
|
||||
const Token& token() { return m_token; }
|
||||
const Token& token() const { return m_token; }
|
||||
const Token* seek_token()
|
||||
{
|
||||
auto s = m_token;
|
||||
auto p = m_pos;
|
||||
auto l = m_line;
|
||||
auto lnl = m_last_newline;
|
||||
if (!NextToken())
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
auto seeked = m_token;
|
||||
m_token = s;
|
||||
m_pos = p;
|
||||
m_line = l;
|
||||
m_last_newline = lnl;
|
||||
return new Token(seeked);
|
||||
}
|
||||
|
||||
const char* filename() const { return m_filename; }
|
||||
public:
|
||||
Lexer(char* filename, StringView code)
|
||||
: m_filename(filename), m_code(code) {}
|
||||
@ -74,8 +126,24 @@ public:
|
||||
s.PushChar(m_code.data[m_pos++]);
|
||||
}
|
||||
s.PushChar('\0');
|
||||
m_token = Token(Id, m_line, offset_start, m_pos - m_last_newline);
|
||||
m_token = Token(TokenType::Id, m_line, offset_start, m_pos - m_last_newline);
|
||||
m_token.string = s.data;
|
||||
|
||||
if (strcmp("extern", m_token.string) == 0)
|
||||
{
|
||||
m_token.token = TokenType::Extern;
|
||||
}
|
||||
|
||||
if (strcmp("fn", m_token.string) == 0)
|
||||
{
|
||||
m_token.token = TokenType::Fn;
|
||||
}
|
||||
|
||||
if (strcmp("local", m_token.string) == 0)
|
||||
{
|
||||
m_token.token = TokenType::Local;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -91,7 +159,7 @@ public:
|
||||
s.PushChar(m_code.data[m_pos++]);
|
||||
}
|
||||
s.PushChar('\0');
|
||||
m_token = Token(IntLiteral, m_line, offset_start, m_pos - m_last_newline);
|
||||
m_token = Token(TokenType::IntLiteral, m_line, offset_start, m_pos - m_last_newline);
|
||||
m_token.int_number = std::strtol(s.data, nullptr, hex ? 16 : 10);
|
||||
m_token.string = s.data;
|
||||
return true;
|
||||
@ -101,23 +169,23 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
void Eat(TokenType expected)
|
||||
void NextExpect(TokenType expected)
|
||||
{
|
||||
if (!NextToken())
|
||||
{
|
||||
fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got EOF", m_filename, 0, 0, expected);
|
||||
fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got EOF\n", m_filename, 0, 0, expected);
|
||||
Exit(1);
|
||||
}
|
||||
if (token().token != expected)
|
||||
{
|
||||
fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got %ld", m_filename, token().line_number, token().offset_start, expected);
|
||||
fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got %ld\n", m_filename, token().line_number, token().offset_start, expected, token().token);
|
||||
Exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void Eat(char expected)
|
||||
void NextExpect(char expected)
|
||||
{
|
||||
Eat((TokenType)expected);
|
||||
NextExpect((TokenType)expected);
|
||||
}
|
||||
private:
|
||||
void Exit(int status)
|
||||
|
||||
Reference in New Issue
Block a user