fix: lexer out of bounds

This commit is contained in:
2026-01-03 15:03:45 +01:00
parent 629b65e151
commit 1c04a058d7
3 changed files with 74 additions and 39 deletions

View File

@@ -1,7 +0,0 @@
label main:
b0:
%1 = mul i32 3, i32 3
%2 = add %1, i32 1
param %2
%3 = call putchar

View File

@@ -1,3 +1,18 @@
extern putchar
fn hello() {
local h = 72
putchar(h)
local e = h - 3
putchar(e)
local l = h + 4
putchar(l)
putchar(l)
local o = 100 - 21
putchar(o)
}
fn main() { fn main() {
hello()
putchar(3 * 3 + 1) putchar(3 * 3 + 1)
} }

View File

@@ -92,72 +92,98 @@ public:
public: public:
bool NextToken() bool NextToken()
{ {
if (m_pos > m_code.len()) auto len = m_code.len();
auto peek = [&]() -> char {
return (m_pos < len) ? m_code.data[m_pos] : '\0';
};
auto advance = [&]() -> char {
return (m_pos < len) ? m_code.data[m_pos++] : '\0';
};
// IMPORTANT: >= not >
if (m_pos >= len)
{ {
m_token = Token(TokenType::Eof); m_token = Token(TokenType::Eof);
return false; return false;
} }
char c = m_code.data[m_pos++]; char c = advance();
while(std::isspace(c)) { // skip whitespace safely
while (c != '\0' && std::isspace((unsigned char)c))
{
if (c == '\n') if (c == '\n')
{ {
m_line++; m_line++;
m_last_newline = m_pos; m_last_newline = m_pos;
} }
c = m_code.data[m_pos++];
if (m_pos >= len) // reached real EOF while skipping whitespace
{
m_token = Token(TokenType::Eof);
return false;
}
c = advance();
} }
if (m_pos-1 > m_code.len()) if (c == '\0' || m_pos > len) // paranoia guard
{ {
m_token = Token(TokenType::Eof); m_token = Token(TokenType::Eof);
return false; return false;
} }
if (std::isalpha(c) != 0 || c == '_') // identifier
if (std::isalpha((unsigned char)c) || c == '_')
{ {
StringBuilder s; StringBuilder s;
long offset_start = m_pos - m_last_newline; long offset_start = m_pos - m_last_newline - 1; // -1 because we already consumed c
s.Push(c); s.Push(c);
// id
while (std::isalpha(m_code.data[m_pos]) != 0 || m_code.data[m_pos] == '_') // NOTE: usually identifiers allow digits after first char; add isdigit if you want
while (true)
{ {
s.Push(m_code.data[m_pos++]); char p = peek();
if (!(std::isalpha((unsigned char)p) || p == '_'))
break;
s.Push(advance());
} }
s.Push('\0'); s.Push('\0');
m_token = Token(TokenType::Id, m_line, offset_start, offset_start); m_token = Token(TokenType::Id, m_line, offset_start, m_pos - m_last_newline);
m_token.string = s.view(); m_token.string = s.view();
if (strcmp("extern", m_token.string.c_str()) == 0) if (strcmp("extern", m_token.string.c_str()) == 0) m_token.token = TokenType::Extern;
{ else if (strcmp("fn", m_token.string.c_str()) == 0) m_token.token = TokenType::Fn;
m_token.token = TokenType::Extern; else if (strcmp("local", m_token.string.c_str()) == 0) m_token.token = TokenType::Local;
}
if (strcmp("fn", m_token.string.c_str()) == 0)
{
m_token.token = TokenType::Fn;
}
if (strcmp("local", m_token.string.c_str()) == 0)
{
m_token.token = TokenType::Local;
}
return true; return true;
} }
if (std::isdigit(c) != 0) // integer (hex supported)
if (std::isdigit((unsigned char)c))
{ {
StringBuilder s; StringBuilder s;
long offset_start = m_pos - m_last_newline; long offset_start = m_pos - m_last_newline - 1;
bool hex = c == '0' && m_code.data[m_pos] == 'x';
bool hex = (c == '0' && peek() == 'x');
s.Push(c); s.Push(c);
// integer (could be hex)
while (std::isdigit(m_code.data[m_pos]) != 0 || (hex && std::isalpha(m_code.data[m_pos]) != 0)) if (hex) s.Push(advance()); // consume 'x'
while (true)
{ {
s.Push(m_code.data[m_pos++]); char p = peek();
if (std::isdigit((unsigned char)p) ||
(hex && std::isxdigit((unsigned char)p)))
{
s.Push(advance());
}
else break;
} }
s.Push('\0'); s.Push('\0');
m_token = Token(TokenType::IntLiteral, m_line, offset_start, m_pos - m_last_newline); m_token = Token(TokenType::IntLiteral, m_line, offset_start, m_pos - m_last_newline);
m_token.int_number = std::strtol(s.data, nullptr, hex ? 16 : 10); m_token.int_number = std::strtol(s.data, nullptr, hex ? 16 : 10);
@@ -165,7 +191,8 @@ public:
return true; return true;
} }
m_token = Token((TokenType)c, m_line, m_pos - m_last_newline, m_pos - m_last_newline + 1); // single-char token fallback
m_token = Token((TokenType)c, m_line, m_pos - m_last_newline - 1, m_pos - m_last_newline);
return true; return true;
} }