feat: basic parsing of example.rx is ready

This commit is contained in:
2025-11-28 18:32:30 +01:00
parent 7febbb80d4
commit 13fbdad563
5 changed files with 354 additions and 47 deletions

View File

@ -1,5 +1,6 @@
{ {
"files.associations": { "files.associations": {
"print": "cpp" "print": "cpp",
"cctype": "cpp"
} }
} }

View File

@ -1,7 +1,7 @@
extern putchar; extern putchar
fn main() { fn main() {
local a = 34; local a = 34
local b = 35; local b = 35
putchar(a + b); putchar(a + b)
} }

View File

@ -1,14 +1,21 @@
#pragma once #pragma once
#include <cstring> #include <cstring>
#include <cassert>
#include <vector>
#include "lexer.hpp" #include "lexer.hpp"
enum NodeType enum class NodeType
{ {
Expression = 0, Expression = 0,
// TODO: abstract "Literal" node type
IntLiteral,
Extern, Extern,
FnDecl, FnDecl,
FnCall, FnCall,
Variable,
VarDecl, VarDecl,
Compound,
Program,
COUNT_NODES, COUNT_NODES,
}; };
@ -20,14 +27,23 @@ public:
}; };
#define NODE_TYPE(x) \ #define NODE_TYPE(x) \
NodeType GetType() const override { return (x); } NodeType GetType() const override { return NodeType::x; }
class ExpressionNode : public Node class ExpressionNode : public Node
{ {
public: public:
ExpressionNode() enum class Operator
: m_left(nullptr), m_right(nullptr) {} {
Plus = 0,
Minus,
Divide,
Multiply,
COUNT_OPERATORS,
};
public:
ExpressionNode(Node* left, Node* right, Operator op)
: m_left(left), m_right(right), m_op(op) {}
~ExpressionNode() override { ~ExpressionNode() override {
delete m_left; delete m_left;
delete m_right; delete m_right;
@ -37,9 +53,26 @@ public:
public: public:
Node* left() const { return m_left; } Node* left() const { return m_left; }
Node* right() const { return m_right; } Node* right() const { return m_right; }
Operator op() const { return m_op; }
private: private:
Node* m_left; Node* m_left;
Node* m_right; Node* m_right;
Operator m_op;
};
// TODO: Maybe just LiteralNode with double or int literals support
class IntLiteralNode : public Node
{
public:
IntLiteralNode(long value)
: m_int_value(value) {}
~IntLiteralNode() override {}
NODE_TYPE(IntLiteral)
public:
long integer() const { return m_int_value; }
private:
long m_int_value;
}; };
class ExternNode : public Node class ExternNode : public Node
@ -91,6 +124,20 @@ private:
Node* m_arg; Node* m_arg;
}; };
class VariableNode : public Node
{
public:
VariableNode(char* name)
: m_name(name) {}
~VariableNode() override {
delete m_name;
}
NODE_TYPE(Variable)
private:
char* m_name;
};
class VarDeclNode : public Node class VarDeclNode : public Node
{ {
public: public:
@ -107,47 +154,239 @@ private:
Node* m_value; Node* m_value;
}; };
class CompoundNode : public Node
{
public:
CompoundNode() = default;
NODE_TYPE(Compound)
// --- Iteration ---
auto begin() { return m_nodes.begin(); }
auto end() { return m_nodes.end(); }
auto begin() const { return m_nodes.begin(); }
auto end() const { return m_nodes.end(); }
// --- Access by index ---
Node* operator[](size_t i) { return m_nodes[i]; }
const Node* operator[](size_t i) const { return m_nodes[i]; }
Node* at(size_t i) { return m_nodes.at(i); }
const Node* at(size_t i) const { return m_nodes.at(i); }
// --- Modifiers ---
void addNode(Node* n) { m_nodes.push_back(n); }
void removeNode(size_t idx) { m_nodes.erase(m_nodes.begin() + idx); }
// If you want full expose for iteration but not modification
const std::vector<Node*>& nodes() const { return m_nodes; }
size_t size() const { return m_nodes.size(); }
bool empty() const { return m_nodes.empty(); }
private:
std::vector<Node*> m_nodes;
};
class ProgramNode : public Node
{
public:
ProgramNode() = default;
NODE_TYPE(Program)
public:
void PushFunction(FnDeclNode* fn)
{
m_funcs.push_back(fn);
}
void PushExtern(ExternNode* extrn)
{
m_externs.push_back(extrn);
}
private:
std::vector<FnDeclNode*> m_funcs;
std::vector<ExternNode*> m_externs;
};
class AstParser class AstParser
{ {
public: public:
AstParser(Lexer* lexer) AstParser(Lexer* lexer)
: m_lexer(lexer) {} : m_lexer(lexer) {}
public: public:
ExternNode* ParseExtern()
{
m_lexer->NextExpect(TokenType::Id);
return new ExternNode(m_lexer->token().string);
}
FnDeclNode* ParseFnDecl()
{
// Function Declaration
m_lexer->NextExpect(TokenType::Id);
char *name = strdup(m_lexer->token().string);
m_lexer->NextExpect('(');
// TODO: parse parameters
m_lexer->NextExpect(')');
m_lexer->NextExpect('{');
auto compound = new CompoundNode();
while (m_lexer->seek_token()->token != '}')
{
compound->addNode(ParseStatement());
}
m_lexer->NextExpect('}');
return new FnDeclNode(name, compound);
}
FnCallNode* ParseFnCall(char* name)
{
// m_lexer->NextExpect(TokenType::Id);
// char* name = strdup(m_lexer->token().string);
m_lexer->NextExpect('(');
Node* arg = ParseExpression();
m_lexer->NextExpect(')');
return new FnCallNode(name, arg);
}
Node* ParseFactor()
{
auto token = m_lexer->seek_token();
switch (token->token)
{
case TokenType::IntLiteral: // integer
{
m_lexer->NextExpect(TokenType::IntLiteral);
auto node = new IntLiteralNode(m_lexer->token().int_number);
return node;
}
case TokenType::Id: // variable name or function call
{
m_lexer->NextExpect(TokenType::Id);
char *name = strdup(m_lexer->token().string);
token = m_lexer->seek_token();
if (token->token == '(')
{
return ParseFnCall(name);
}
return new VariableNode(name);
}
default:
fprintf(stderr, "%s:%d:%d: ERROR: unexpected token while parsing %ld\n", m_lexer->filename(), token->line_number, token->offset_start, token->token);
Exit(1);
break;
}
assert(0 && "unreachable");
}
Node* ParseTerm()
{
auto t = ParseFactor();
for (auto op = m_lexer->seek_token(); is_one_of(op->token, '/', '*'); op = m_lexer->seek_token())
{
m_lexer->NextToken();
ExpressionNode::Operator eop;
assert((int)ExpressionNode::Operator::COUNT_OPERATORS == 4 && "some operators may not be handled");
switch((char)op->token)
{
case '/':
eop = ExpressionNode::Operator::Divide;
break;
case '*':
eop = ExpressionNode::Operator::Multiply;
break;
default:
assert(false && "should be unreachable");
break;
}
auto expr = new ExpressionNode(t, ParseTerm(), eop);
t = expr;
}
return t;
}
Node* ParseExpression()
{
auto t = ParseTerm();
for (auto op = m_lexer->seek_token(); is_one_of(op->token, '+', '-'); op = m_lexer->seek_token())
{
m_lexer->NextToken();
ExpressionNode::Operator eop;
assert((int)ExpressionNode::Operator::COUNT_OPERATORS == 4 && "some operators may not be handled");
switch((char)op->token)
{
case '+':
eop = ExpressionNode::Operator::Plus;
break;
case '-':
eop = ExpressionNode::Operator::Minus;
break;
default:
assert(false && "should be unreachable");
break;
}
auto expr = new ExpressionNode(t, ParseTerm(), eop);
t = expr;
}
return t;
}
VarDeclNode* ParseVarDecl()
{
m_lexer->NextExpect(TokenType::Local);
m_lexer->NextExpect(TokenType::Id);
char *name = strdup(m_lexer->token().string);
m_lexer->NextExpect('=');
Node* value = ParseExpression();
return new VarDeclNode(name, value);
}
Node* ParseStatement() Node* ParseStatement()
{ {
auto token = m_lexer->seek_token();
// TODO: proper error handling
assert(token != nullptr && "next token should be available");
switch(token->token)
{
case TokenType::Local: return ParseVarDecl();
default: return ParseExpression();
}
assert(0 && "unreachable");
return nullptr;
}
ProgramNode* Parse()
{
auto program = new ProgramNode;
while (m_lexer->NextToken()) while (m_lexer->NextToken())
{ {
auto token = m_lexer->token(); auto token = m_lexer->token();
switch(token.token) switch(token.token)
{ {
case Id: case TokenType::Extern: program->PushExtern(ParseExtern()); break;
{ case TokenType::Fn: program->PushFunction(ParseFnDecl()); break;
if (strcmp(token.string, "extern") == 0) default: {
{ fprintf(stderr, "%s:%d:%d: ERROR: unexpected token while parsing %ld\n", m_lexer->filename(), token.line_number, token.offset_start, token.token);
// Extern Exit(1);
m_lexer->Eat(Id);
return new ExternNode(m_lexer->token().string);
}
else if (strcmp(token.string, "fn") == 0)
{
// Function Declaration
m_lexer->Eat(Id);
char* name = strdup(m_lexer->token().string);
m_lexer->Eat('(');
// TODO: parse parameters
m_lexer->Eat(')');
m_lexer->Eat('{');
// TODO: parse function body
m_lexer->Eat('}');
return new FnDeclNode(name, nullptr);
}
}
break; break;
} }
} }
}
// TODO: report parse error return program;
return nullptr; }
private:
void Exit(int status)
{
std::exit(status);
} }
private: private:
Lexer* m_lexer; Lexer* m_lexer;

View File

@ -1,18 +1,51 @@
#pragma once #pragma once
#include <iostream> #include <iostream>
#include <cstring>
#include <cctype> #include <cctype>
#include "string.hpp" #include "string.hpp"
enum TokenType enum class TokenType
{ {
Eof = 256, Eof = 256,
Id, Id,
IntLiteral, IntLiteral,
Extern,
Fn,
Local,
Unknown, Unknown,
}; };
template <typename... Ts>
inline bool is_one_of(TokenType t, Ts... ts) {
return ((t == ts) || ...);
}
// operator== TokenType == char
inline bool operator==(TokenType t, char c)
{
return static_cast<char>(t) == c;
}
// operator== char == TokenType
inline bool operator==(char c, TokenType t)
{
return t == c; // reuse the function above
}
// operator!=
inline bool operator!=(TokenType t, char c)
{
return !(t == c);
}
inline bool operator!=(char c, TokenType t)
{
return !(t == c);
}
struct Token struct Token
{ {
TokenType token; TokenType token;
@ -26,13 +59,32 @@ public:
Token(TokenType t) : token(t) {} Token(TokenType t) : token(t) {}
Token(TokenType t, long lnumber, long soffset, long eoffset) Token(TokenType t, long lnumber, long soffset, long eoffset)
: token(t), line_number(lnumber), offset_start(soffset), offset_end(eoffset) {} : token(t), line_number(lnumber), offset_start(soffset), offset_end(eoffset) {}
Token() : token(Unknown) {} Token() : token(TokenType::Unknown) {}
}; };
class Lexer class Lexer
{ {
public: public:
const Token& token() { return m_token; } const Token& token() const { return m_token; }
const Token* seek_token()
{
auto s = m_token;
auto p = m_pos;
auto l = m_line;
auto lnl = m_last_newline;
if (!NextToken())
{
return nullptr;
}
auto seeked = m_token;
m_token = s;
m_pos = p;
m_line = l;
m_last_newline = lnl;
return new Token(seeked);
}
const char* filename() const { return m_filename; }
public: public:
Lexer(char* filename, StringView code) Lexer(char* filename, StringView code)
: m_filename(filename), m_code(code) {} : m_filename(filename), m_code(code) {}
@ -74,8 +126,24 @@ public:
s.PushChar(m_code.data[m_pos++]); s.PushChar(m_code.data[m_pos++]);
} }
s.PushChar('\0'); s.PushChar('\0');
m_token = Token(Id, m_line, offset_start, m_pos - m_last_newline); m_token = Token(TokenType::Id, m_line, offset_start, m_pos - m_last_newline);
m_token.string = s.data; m_token.string = s.data;
if (strcmp("extern", m_token.string) == 0)
{
m_token.token = TokenType::Extern;
}
if (strcmp("fn", m_token.string) == 0)
{
m_token.token = TokenType::Fn;
}
if (strcmp("local", m_token.string) == 0)
{
m_token.token = TokenType::Local;
}
return true; return true;
} }
@ -91,7 +159,7 @@ public:
s.PushChar(m_code.data[m_pos++]); s.PushChar(m_code.data[m_pos++]);
} }
s.PushChar('\0'); s.PushChar('\0');
m_token = Token(IntLiteral, m_line, offset_start, m_pos - m_last_newline); m_token = Token(TokenType::IntLiteral, m_line, offset_start, m_pos - m_last_newline);
m_token.int_number = std::strtol(s.data, nullptr, hex ? 16 : 10); m_token.int_number = std::strtol(s.data, nullptr, hex ? 16 : 10);
m_token.string = s.data; m_token.string = s.data;
return true; return true;
@ -101,23 +169,23 @@ public:
return true; return true;
} }
void Eat(TokenType expected) void NextExpect(TokenType expected)
{ {
if (!NextToken()) if (!NextToken())
{ {
fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got EOF", m_filename, 0, 0, expected); fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got EOF\n", m_filename, 0, 0, expected);
Exit(1); Exit(1);
} }
if (token().token != expected) if (token().token != expected)
{ {
fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got %ld", m_filename, token().line_number, token().offset_start, expected); fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got %ld\n", m_filename, token().line_number, token().offset_start, expected, token().token);
Exit(1); Exit(1);
} }
} }
void Eat(char expected) void NextExpect(char expected)
{ {
Eat((TokenType)expected); NextExpect((TokenType)expected);
} }
private: private:
void Exit(int status) void Exit(int status)

View File

@ -8,9 +8,9 @@ void dump_tokens(const char* filename, Lexer* lexer)
{ {
while (lexer->NextToken()) { while (lexer->NextToken()) {
std::print("{}:{}:{}: ", filename, lexer->token().line_number, lexer->token().offset_start); std::print("{}:{}:{}: ", filename, lexer->token().line_number, lexer->token().offset_start);
if (lexer->token().token == Id) if (lexer->token().token == TokenType::Id)
std::println("id = {}", lexer->token().string); std::println("id = {}", lexer->token().string);
else if (lexer->token().token == IntLiteral) else if (lexer->token().token == TokenType::IntLiteral)
std::println("int = {}", lexer->token().int_number); std::println("int = {}", lexer->token().int_number);
else else
std::println("token = {}", (char)lexer->token().token); std::println("token = {}", (char)lexer->token().token);
@ -48,8 +48,7 @@ int main(int argc, char** argv)
AstParser parser(&lexer); AstParser parser(&lexer);
auto node = parser.ParseStatement(); auto program = parser.Parse();
ExternNode* extrn = reinterpret_cast<ExternNode*>(node);
return 0; return 0;
} }