diff --git a/example.ll b/example.ll deleted file mode 100644 index 2212dd0..0000000 --- a/example.ll +++ /dev/null @@ -1,7 +0,0 @@ -label main: -b0: - %1 = mul i32 3, i32 3 - %2 = add %1, i32 1 - param %2 - %3 = call putchar - diff --git a/example.rx b/example.rx index 6dd3718..e5db68f 100644 --- a/example.rx +++ b/example.rx @@ -1,3 +1,18 @@ +extern putchar + +fn hello() { + local h = 72 + putchar(h) + local e = h - 3 + putchar(e) + local l = h + 4 + putchar(l) + putchar(l) + local o = 100 - 21 + putchar(o) +} + fn main() { + hello() putchar(3 * 3 + 1) } \ No newline at end of file diff --git a/include/parser/lexer.hpp b/include/parser/lexer.hpp index 06a0e33..376f5ed 100644 --- a/include/parser/lexer.hpp +++ b/include/parser/lexer.hpp @@ -92,72 +92,98 @@ public: public: bool NextToken() { - if (m_pos > m_code.len()) + auto len = m_code.len(); + + auto peek = [&]() -> char { + return (m_pos < len) ? m_code.data[m_pos] : '\0'; + }; + + auto advance = [&]() -> char { + return (m_pos < len) ? m_code.data[m_pos++] : '\0'; + }; + + // IMPORTANT: >= not > + if (m_pos >= len) { m_token = Token(TokenType::Eof); return false; } - char c = m_code.data[m_pos++]; + char c = advance(); - while(std::isspace(c)) { + // skip whitespace safely + while (c != '\0' && std::isspace((unsigned char)c)) + { if (c == '\n') { m_line++; m_last_newline = m_pos; } - c = m_code.data[m_pos++]; + + if (m_pos >= len) // reached real EOF while skipping whitespace + { + m_token = Token(TokenType::Eof); + return false; + } + + c = advance(); } - if (m_pos-1 > m_code.len()) + if (c == '\0' || m_pos > len) // paranoia guard { m_token = Token(TokenType::Eof); return false; } - if (std::isalpha(c) != 0 || c == '_') + // identifier + if (std::isalpha((unsigned char)c) || c == '_') { StringBuilder s; - long offset_start = m_pos - m_last_newline; + long offset_start = m_pos - m_last_newline - 1; // -1 because we already consumed c s.Push(c); - // id - while (std::isalpha(m_code.data[m_pos]) != 0 || m_code.data[m_pos] == '_') + + // NOTE: usually identifiers allow digits after first char; add isdigit if you want + while (true) { - s.Push(m_code.data[m_pos++]); + char p = peek(); + if (!(std::isalpha((unsigned char)p) || p == '_')) + break; + s.Push(advance()); } + s.Push('\0'); - m_token = Token(TokenType::Id, m_line, offset_start, offset_start); + m_token = Token(TokenType::Id, m_line, offset_start, m_pos - m_last_newline); m_token.string = s.view(); - if (strcmp("extern", m_token.string.c_str()) == 0) - { - m_token.token = TokenType::Extern; - } - - if (strcmp("fn", m_token.string.c_str()) == 0) - { - m_token.token = TokenType::Fn; - } - - if (strcmp("local", m_token.string.c_str()) == 0) - { - m_token.token = TokenType::Local; - } + if (strcmp("extern", m_token.string.c_str()) == 0) m_token.token = TokenType::Extern; + else if (strcmp("fn", m_token.string.c_str()) == 0) m_token.token = TokenType::Fn; + else if (strcmp("local", m_token.string.c_str()) == 0) m_token.token = TokenType::Local; return true; } - if (std::isdigit(c) != 0) + // integer (hex supported) + if (std::isdigit((unsigned char)c)) { StringBuilder s; - long offset_start = m_pos - m_last_newline; - bool hex = c == '0' && m_code.data[m_pos] == 'x'; + long offset_start = m_pos - m_last_newline - 1; + + bool hex = (c == '0' && peek() == 'x'); s.Push(c); - // integer (could be hex) - while (std::isdigit(m_code.data[m_pos]) != 0 || (hex && std::isalpha(m_code.data[m_pos]) != 0)) + + if (hex) s.Push(advance()); // consume 'x' + + while (true) { - s.Push(m_code.data[m_pos++]); + char p = peek(); + if (std::isdigit((unsigned char)p) || + (hex && std::isxdigit((unsigned char)p))) + { + s.Push(advance()); + } + else break; } + s.Push('\0'); m_token = Token(TokenType::IntLiteral, m_line, offset_start, m_pos - m_last_newline); m_token.int_number = std::strtol(s.data, nullptr, hex ? 16 : 10); @@ -165,7 +191,8 @@ public: return true; } - m_token = Token((TokenType)c, m_line, m_pos - m_last_newline, m_pos - m_last_newline + 1); + // single-char token fallback + m_token = Token((TokenType)c, m_line, m_pos - m_last_newline - 1, m_pos - m_last_newline); return true; }