feat: basic lexer + parser with basic example

This commit is contained in:
2025-11-26 22:26:10 +01:00
parent 1975059b1b
commit 7febbb80d4
10 changed files with 414 additions and 1 deletions

5
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,5 @@
{
"files.associations": {
"print": "cpp"
}
}

View File

@ -6,4 +6,5 @@ set(SOURCES
src/main.cpp src/main.cpp
) )
include_directories(pl ${CMAKE_CURRENT_SOURCE_DIR}/include)
add_executable(pl ${SOURCES}) add_executable(pl ${SOURCES})

7
example.rx Normal file
View File

@ -0,0 +1,7 @@
extern putchar;
fn main() {
local a = 34;
local b = 35;
putchar(a + b);
}

154
include/ast.hpp Normal file
View File

@ -0,0 +1,154 @@
#pragma once
#include <cstring>
#include "lexer.hpp"
enum NodeType
{
Expression = 0,
Extern,
FnDecl,
FnCall,
VarDecl,
COUNT_NODES,
};
class Node
{
public:
virtual NodeType GetType() const = 0;
virtual ~Node() {}
};
#define NODE_TYPE(x) \
NodeType GetType() const override { return (x); }
class ExpressionNode : public Node
{
public:
ExpressionNode()
: m_left(nullptr), m_right(nullptr) {}
~ExpressionNode() override {
delete m_left;
delete m_right;
}
NODE_TYPE(Expression)
public:
Node* left() const { return m_left; }
Node* right() const { return m_right; }
private:
Node* m_left;
Node* m_right;
};
class ExternNode : public Node
{
public:
// TODO: support multiple extern symbols
ExternNode(char* symbol)
: m_symbol(symbol) {}
~ExternNode() override {
delete m_symbol;
}
NODE_TYPE(Extern)
private:
char* m_symbol;
};
class FnDeclNode : public Node
{
public:
// TODO: support parameters
FnDeclNode(char* name, Node* body)
: m_name(name), m_body(body) {}
~FnDeclNode() override {
delete m_name;
delete m_body;
}
NODE_TYPE(FnDecl)
private:
char* m_name;
Node* m_body;
};
class FnCallNode : public Node
{
public:
// TODO: support multiple arguments
FnCallNode(char* name, Node* arg)
: m_name(name), m_arg(arg) {}
~FnCallNode() override {
delete m_name;
delete m_arg;
}
NODE_TYPE(FnCall)
private:
char* m_name;
Node* m_arg;
};
class VarDeclNode : public Node
{
public:
VarDeclNode(char* name, Node* value)
: m_name(name), m_value(value) {}
~VarDeclNode() override {
delete m_name;
delete m_value;
}
NODE_TYPE(VarDecl)
private:
char* m_name;
Node* m_value;
};
class AstParser
{
public:
AstParser(Lexer* lexer)
: m_lexer(lexer) {}
public:
Node* ParseStatement()
{
while (m_lexer->NextToken())
{
auto token = m_lexer->token();
switch(token.token)
{
case Id:
{
if (strcmp(token.string, "extern") == 0)
{
// Extern
m_lexer->Eat(Id);
return new ExternNode(m_lexer->token().string);
}
else if (strcmp(token.string, "fn") == 0)
{
// Function Declaration
m_lexer->Eat(Id);
char* name = strdup(m_lexer->token().string);
m_lexer->Eat('(');
// TODO: parse parameters
m_lexer->Eat(')');
m_lexer->Eat('{');
// TODO: parse function body
m_lexer->Eat('}');
return new FnDeclNode(name, nullptr);
}
}
break;
}
}
// TODO: report parse error
return nullptr;
}
private:
Lexer* m_lexer;
};

136
include/lexer.hpp Normal file
View File

@ -0,0 +1,136 @@
#pragma once
#include <iostream>
#include <cctype>
#include "string.hpp"
enum TokenType
{
Eof = 256,
Id,
IntLiteral,
Unknown,
};
struct Token
{
TokenType token;
long int_number;
// null-terminated
char* string;
long line_number;
long offset_start;
long offset_end;
public:
Token(TokenType t) : token(t) {}
Token(TokenType t, long lnumber, long soffset, long eoffset)
: token(t), line_number(lnumber), offset_start(soffset), offset_end(eoffset) {}
Token() : token(Unknown) {}
};
class Lexer
{
public:
const Token& token() { return m_token; }
public:
Lexer(char* filename, StringView code)
: m_filename(filename), m_code(code) {}
Lexer(const Lexer&) = delete;
Lexer(Lexer&& other)
{
m_code = other.m_code;
other.m_code = StringView();
}
public:
bool NextToken()
{
if (m_pos >= m_code.size)
{
m_token = Token(TokenType::Eof);
return false;
}
char c = m_code.data[m_pos++];
while(std::isspace(c)) {
if (c == '\n')
{
m_line++;
m_last_newline = m_pos;
}
c = m_code.data[m_pos++];
}
if (std::isalpha(c) != 0)
{
StringBuilder s;
long offset_start = m_pos - m_last_newline;
s.PushChar(c);
// id
while (std::isalpha(m_code.data[m_pos]) != 0)
{
s.PushChar(m_code.data[m_pos++]);
}
s.PushChar('\0');
m_token = Token(Id, m_line, offset_start, m_pos - m_last_newline);
m_token.string = s.data;
return true;
}
if (std::isdigit(c) != 0)
{
StringBuilder s;
long offset_start = m_pos - m_last_newline;
bool hex = c == '0' && m_code.data[m_pos] == 'x';
s.PushChar(c);
// integer (could be hex)
while (std::isdigit(m_code.data[m_pos]) != 0 || (hex && std::isalpha(m_code.data[m_pos]) != 0))
{
s.PushChar(m_code.data[m_pos++]);
}
s.PushChar('\0');
m_token = Token(IntLiteral, m_line, offset_start, m_pos - m_last_newline);
m_token.int_number = std::strtol(s.data, nullptr, hex ? 16 : 10);
m_token.string = s.data;
return true;
}
m_token = Token((TokenType)c, m_line, m_pos - m_last_newline, m_pos - m_last_newline + 1);
return true;
}
void Eat(TokenType expected)
{
if (!NextToken())
{
fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got EOF", m_filename, 0, 0, expected);
Exit(1);
}
if (token().token != expected)
{
fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got %ld", m_filename, token().line_number, token().offset_start, expected);
Exit(1);
}
}
void Eat(char expected)
{
Eat((TokenType)expected);
}
private:
void Exit(int status)
{
std::exit(status);
}
private:
StringView m_code;
char* m_filename;
Token m_token;
size_t m_pos = 0;
size_t m_line = 1;
size_t m_last_newline = 0;
};

47
include/string.hpp Normal file
View File

@ -0,0 +1,47 @@
#pragma once
#include <cstddef>
struct StringView
{
size_t size;
const char* data;
public:
StringView()
{
data = nullptr;
size = 0;
}
StringView(const char* data, size_t size)
{
this->data = data;
this->size = size;
}
};
struct StringBuilder
{
size_t size;
size_t capacity;
char* data;
public:
StringBuilder()
{
size = 0;
capacity = 10;
data = (char*)malloc(capacity * sizeof(char));
}
private:
void ensureSize(size_t newSize)
{
if (newSize <= capacity) return;
capacity = capacity + (capacity / 2);
data = (char*)realloc(data, capacity * sizeof(char));
}
public:
void PushChar(char c)
{
ensureSize(size + 1);
data[size++] = c;
}
};

View File

@ -1,8 +1,55 @@
#include <iostream> #include <iostream>
#include <fstream>
#include <print> #include <print>
#include "lexer.hpp"
#include "ast.hpp"
void dump_tokens(const char* filename, Lexer* lexer)
{
while (lexer->NextToken()) {
std::print("{}:{}:{}: ", filename, lexer->token().line_number, lexer->token().offset_start);
if (lexer->token().token == Id)
std::println("id = {}", lexer->token().string);
else if (lexer->token().token == IntLiteral)
std::println("int = {}", lexer->token().int_number);
else
std::println("token = {}", (char)lexer->token().token);
}
}
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
std::println("Hello, World!"); for (int i = 0; i < argc; ++i) {
std::println("arg#{}: {}", i, argv[i]);
}
char* filename;
if (argc > 1) {
filename = (++argv)[0];
} else {
fprintf(stderr, "ERROR: Input file is required.\n");
return 1;
}
std::ifstream f(filename);
if (!f.is_open()) {
fprintf(stderr, "ERROR: Failed to open input file: %s\n", filename);
return 1;
}
std::string content((std::istreambuf_iterator<char>(f)), std::istreambuf_iterator<char>());
f.close();
// std::println("{}", content);
Lexer lexer(filename, StringView(content.c_str(), content.size()));
// dump_tokens(filename, &lexer);
AstParser parser(&lexer);
auto node = parser.ParseStatement();
ExternNode* extrn = reinterpret_cast<ExternNode*>(node);
return 0; return 0;
} }

BIN
test Executable file

Binary file not shown.

16
test.asm Normal file
View File

@ -0,0 +1,16 @@
format ELF64
section ".text" executable
public main
extrn 'putchar' as __putchar
putchar = PLT __putchar
main:
mov rdi, 69
call putchar
mov rdi, 10
call putchar
mov rax, 0
ret

BIN
test.o Normal file

Binary file not shown.