diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..a5a2d31 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "files.associations": { + "print": "cpp" + } +} \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 7c8690a..9d0e055 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,4 +6,5 @@ set(SOURCES src/main.cpp ) +include_directories(pl ${CMAKE_CURRENT_SOURCE_DIR}/include) add_executable(pl ${SOURCES}) diff --git a/example.rx b/example.rx new file mode 100644 index 0000000..47deb63 --- /dev/null +++ b/example.rx @@ -0,0 +1,7 @@ +extern putchar; + +fn main() { + local a = 34; + local b = 35; + putchar(a + b); +} \ No newline at end of file diff --git a/include/ast.hpp b/include/ast.hpp new file mode 100644 index 0000000..44de678 --- /dev/null +++ b/include/ast.hpp @@ -0,0 +1,154 @@ +#pragma once +#include +#include "lexer.hpp" + +enum NodeType +{ + Expression = 0, + Extern, + FnDecl, + FnCall, + VarDecl, + COUNT_NODES, +}; + +class Node +{ +public: + virtual NodeType GetType() const = 0; + virtual ~Node() {} +}; + +#define NODE_TYPE(x) \ + NodeType GetType() const override { return (x); } + + +class ExpressionNode : public Node +{ +public: + ExpressionNode() + : m_left(nullptr), m_right(nullptr) {} + ~ExpressionNode() override { + delete m_left; + delete m_right; + } + + NODE_TYPE(Expression) +public: + Node* left() const { return m_left; } + Node* right() const { return m_right; } +private: + Node* m_left; + Node* m_right; +}; + +class ExternNode : public Node +{ +public: + // TODO: support multiple extern symbols + ExternNode(char* symbol) + : m_symbol(symbol) {} + ~ExternNode() override { + delete m_symbol; + } + + NODE_TYPE(Extern) +private: + char* m_symbol; +}; + +class FnDeclNode : public Node +{ +public: + // TODO: support parameters + FnDeclNode(char* name, Node* body) + : m_name(name), m_body(body) {} + ~FnDeclNode() override { + delete m_name; + delete m_body; + } + + NODE_TYPE(FnDecl) +private: + char* m_name; + Node* m_body; +}; + +class FnCallNode : public Node +{ +public: + // TODO: support multiple arguments + FnCallNode(char* name, Node* arg) + : m_name(name), m_arg(arg) {} + ~FnCallNode() override { + delete m_name; + delete m_arg; + } + + NODE_TYPE(FnCall) +private: + char* m_name; + Node* m_arg; +}; + +class VarDeclNode : public Node +{ +public: + VarDeclNode(char* name, Node* value) + : m_name(name), m_value(value) {} + ~VarDeclNode() override { + delete m_name; + delete m_value; + } + + NODE_TYPE(VarDecl) +private: + char* m_name; + Node* m_value; +}; + +class AstParser +{ +public: + AstParser(Lexer* lexer) + : m_lexer(lexer) {} +public: + Node* ParseStatement() + { + while (m_lexer->NextToken()) + { + auto token = m_lexer->token(); + switch(token.token) + { + case Id: + { + if (strcmp(token.string, "extern") == 0) + { + // Extern + m_lexer->Eat(Id); + return new ExternNode(m_lexer->token().string); + } + else if (strcmp(token.string, "fn") == 0) + { + // Function Declaration + m_lexer->Eat(Id); + char* name = strdup(m_lexer->token().string); + m_lexer->Eat('('); + // TODO: parse parameters + m_lexer->Eat(')'); + m_lexer->Eat('{'); + // TODO: parse function body + m_lexer->Eat('}'); + return new FnDeclNode(name, nullptr); + } + } + break; + } + } + + // TODO: report parse error + return nullptr; + } +private: + Lexer* m_lexer; +}; diff --git a/include/lexer.hpp b/include/lexer.hpp new file mode 100644 index 0000000..612e955 --- /dev/null +++ b/include/lexer.hpp @@ -0,0 +1,136 @@ +#pragma once +#include +#include + +#include "string.hpp" + +enum TokenType +{ + Eof = 256, + Id, + IntLiteral, + + Unknown, +}; + +struct Token +{ + TokenType token; + long int_number; + // null-terminated + char* string; + long line_number; + long offset_start; + long offset_end; +public: + Token(TokenType t) : token(t) {} + Token(TokenType t, long lnumber, long soffset, long eoffset) + : token(t), line_number(lnumber), offset_start(soffset), offset_end(eoffset) {} + Token() : token(Unknown) {} +}; + +class Lexer +{ +public: + const Token& token() { return m_token; } +public: + Lexer(char* filename, StringView code) + : m_filename(filename), m_code(code) {} + + Lexer(const Lexer&) = delete; + Lexer(Lexer&& other) + { + m_code = other.m_code; + other.m_code = StringView(); + } +public: + bool NextToken() + { + if (m_pos >= m_code.size) + { + m_token = Token(TokenType::Eof); + return false; + } + + char c = m_code.data[m_pos++]; + + while(std::isspace(c)) { + if (c == '\n') + { + m_line++; + m_last_newline = m_pos; + } + c = m_code.data[m_pos++]; + } + + if (std::isalpha(c) != 0) + { + StringBuilder s; + long offset_start = m_pos - m_last_newline; + s.PushChar(c); + // id + while (std::isalpha(m_code.data[m_pos]) != 0) + { + s.PushChar(m_code.data[m_pos++]); + } + s.PushChar('\0'); + m_token = Token(Id, m_line, offset_start, m_pos - m_last_newline); + m_token.string = s.data; + return true; + } + + if (std::isdigit(c) != 0) + { + StringBuilder s; + long offset_start = m_pos - m_last_newline; + bool hex = c == '0' && m_code.data[m_pos] == 'x'; + s.PushChar(c); + // integer (could be hex) + while (std::isdigit(m_code.data[m_pos]) != 0 || (hex && std::isalpha(m_code.data[m_pos]) != 0)) + { + s.PushChar(m_code.data[m_pos++]); + } + s.PushChar('\0'); + m_token = Token(IntLiteral, m_line, offset_start, m_pos - m_last_newline); + m_token.int_number = std::strtol(s.data, nullptr, hex ? 16 : 10); + m_token.string = s.data; + return true; + } + + m_token = Token((TokenType)c, m_line, m_pos - m_last_newline, m_pos - m_last_newline + 1); + return true; + } + + void Eat(TokenType expected) + { + if (!NextToken()) + { + fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got EOF", m_filename, 0, 0, expected); + Exit(1); + } + if (token().token != expected) + { + fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got %ld", m_filename, token().line_number, token().offset_start, expected); + Exit(1); + } + } + + void Eat(char expected) + { + Eat((TokenType)expected); + } +private: + void Exit(int status) + { + std::exit(status); + } +private: + StringView m_code; + char* m_filename; + + Token m_token; + + size_t m_pos = 0; + size_t m_line = 1; + size_t m_last_newline = 0; +}; \ No newline at end of file diff --git a/include/string.hpp b/include/string.hpp new file mode 100644 index 0000000..7cc7ed6 --- /dev/null +++ b/include/string.hpp @@ -0,0 +1,47 @@ +#pragma once +#include + +struct StringView +{ + size_t size; + const char* data; +public: + StringView() + { + data = nullptr; + size = 0; + } + + StringView(const char* data, size_t size) + { + this->data = data; + this->size = size; + } +}; + +struct StringBuilder +{ + size_t size; + size_t capacity; + char* data; +public: + StringBuilder() + { + size = 0; + capacity = 10; + data = (char*)malloc(capacity * sizeof(char)); + } +private: + void ensureSize(size_t newSize) + { + if (newSize <= capacity) return; + capacity = capacity + (capacity / 2); + data = (char*)realloc(data, capacity * sizeof(char)); + } +public: + void PushChar(char c) + { + ensureSize(size + 1); + data[size++] = c; + } +}; \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index fcf145f..771705c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,8 +1,55 @@ #include +#include #include +#include "lexer.hpp" +#include "ast.hpp" + +void dump_tokens(const char* filename, Lexer* lexer) +{ + while (lexer->NextToken()) { + std::print("{}:{}:{}: ", filename, lexer->token().line_number, lexer->token().offset_start); + if (lexer->token().token == Id) + std::println("id = {}", lexer->token().string); + else if (lexer->token().token == IntLiteral) + std::println("int = {}", lexer->token().int_number); + else + std::println("token = {}", (char)lexer->token().token); + } +} int main(int argc, char** argv) { - std::println("Hello, World!"); + for (int i = 0; i < argc; ++i) { + std::println("arg#{}: {}", i, argv[i]); + } + char* filename; + if (argc > 1) { + filename = (++argv)[0]; + } else { + fprintf(stderr, "ERROR: Input file is required.\n"); + return 1; + } + + std::ifstream f(filename); + if (!f.is_open()) { + fprintf(stderr, "ERROR: Failed to open input file: %s\n", filename); + return 1; + } + + std::string content((std::istreambuf_iterator(f)), std::istreambuf_iterator()); + + f.close(); + + // std::println("{}", content); + + Lexer lexer(filename, StringView(content.c_str(), content.size())); + + // dump_tokens(filename, &lexer); + + AstParser parser(&lexer); + + auto node = parser.ParseStatement(); + ExternNode* extrn = reinterpret_cast(node); + return 0; } diff --git a/test b/test new file mode 100755 index 0000000..c35eebf Binary files /dev/null and b/test differ diff --git a/test.asm b/test.asm new file mode 100644 index 0000000..235c87e --- /dev/null +++ b/test.asm @@ -0,0 +1,16 @@ +format ELF64 + +section ".text" executable + +public main + +extrn 'putchar' as __putchar +putchar = PLT __putchar + +main: + mov rdi, 69 + call putchar + mov rdi, 10 + call putchar + mov rax, 0 + ret diff --git a/test.o b/test.o new file mode 100644 index 0000000..ff075db Binary files /dev/null and b/test.o differ