fix: lexer out of bounds
This commit is contained in:
@@ -1,7 +0,0 @@
|
||||
label main:
|
||||
b0:
|
||||
%1 = mul i32 3, i32 3
|
||||
%2 = add %1, i32 1
|
||||
param %2
|
||||
%3 = call putchar
|
||||
|
||||
15
example.rx
15
example.rx
@@ -1,3 +1,18 @@
|
||||
extern putchar
|
||||
|
||||
fn hello() {
|
||||
local h = 72
|
||||
putchar(h)
|
||||
local e = h - 3
|
||||
putchar(e)
|
||||
local l = h + 4
|
||||
putchar(l)
|
||||
putchar(l)
|
||||
local o = 100 - 21
|
||||
putchar(o)
|
||||
}
|
||||
|
||||
fn main() {
|
||||
hello()
|
||||
putchar(3 * 3 + 1)
|
||||
}
|
||||
@@ -92,72 +92,98 @@ public:
|
||||
public:
|
||||
bool NextToken()
|
||||
{
|
||||
if (m_pos > m_code.len())
|
||||
auto len = m_code.len();
|
||||
|
||||
auto peek = [&]() -> char {
|
||||
return (m_pos < len) ? m_code.data[m_pos] : '\0';
|
||||
};
|
||||
|
||||
auto advance = [&]() -> char {
|
||||
return (m_pos < len) ? m_code.data[m_pos++] : '\0';
|
||||
};
|
||||
|
||||
// IMPORTANT: >= not >
|
||||
if (m_pos >= len)
|
||||
{
|
||||
m_token = Token(TokenType::Eof);
|
||||
return false;
|
||||
}
|
||||
|
||||
char c = m_code.data[m_pos++];
|
||||
char c = advance();
|
||||
|
||||
while(std::isspace(c)) {
|
||||
// skip whitespace safely
|
||||
while (c != '\0' && std::isspace((unsigned char)c))
|
||||
{
|
||||
if (c == '\n')
|
||||
{
|
||||
m_line++;
|
||||
m_last_newline = m_pos;
|
||||
}
|
||||
c = m_code.data[m_pos++];
|
||||
|
||||
if (m_pos >= len) // reached real EOF while skipping whitespace
|
||||
{
|
||||
m_token = Token(TokenType::Eof);
|
||||
return false;
|
||||
}
|
||||
|
||||
c = advance();
|
||||
}
|
||||
|
||||
if (m_pos-1 > m_code.len())
|
||||
if (c == '\0' || m_pos > len) // paranoia guard
|
||||
{
|
||||
m_token = Token(TokenType::Eof);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (std::isalpha(c) != 0 || c == '_')
|
||||
// identifier
|
||||
if (std::isalpha((unsigned char)c) || c == '_')
|
||||
{
|
||||
StringBuilder s;
|
||||
long offset_start = m_pos - m_last_newline;
|
||||
long offset_start = m_pos - m_last_newline - 1; // -1 because we already consumed c
|
||||
s.Push(c);
|
||||
// id
|
||||
while (std::isalpha(m_code.data[m_pos]) != 0 || m_code.data[m_pos] == '_')
|
||||
|
||||
// NOTE: usually identifiers allow digits after first char; add isdigit if you want
|
||||
while (true)
|
||||
{
|
||||
s.Push(m_code.data[m_pos++]);
|
||||
char p = peek();
|
||||
if (!(std::isalpha((unsigned char)p) || p == '_'))
|
||||
break;
|
||||
s.Push(advance());
|
||||
}
|
||||
|
||||
s.Push('\0');
|
||||
m_token = Token(TokenType::Id, m_line, offset_start, offset_start);
|
||||
m_token = Token(TokenType::Id, m_line, offset_start, m_pos - m_last_newline);
|
||||
m_token.string = s.view();
|
||||
|
||||
if (strcmp("extern", m_token.string.c_str()) == 0)
|
||||
{
|
||||
m_token.token = TokenType::Extern;
|
||||
}
|
||||
|
||||
if (strcmp("fn", m_token.string.c_str()) == 0)
|
||||
{
|
||||
m_token.token = TokenType::Fn;
|
||||
}
|
||||
|
||||
if (strcmp("local", m_token.string.c_str()) == 0)
|
||||
{
|
||||
m_token.token = TokenType::Local;
|
||||
}
|
||||
if (strcmp("extern", m_token.string.c_str()) == 0) m_token.token = TokenType::Extern;
|
||||
else if (strcmp("fn", m_token.string.c_str()) == 0) m_token.token = TokenType::Fn;
|
||||
else if (strcmp("local", m_token.string.c_str()) == 0) m_token.token = TokenType::Local;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
if (std::isdigit(c) != 0)
|
||||
// integer (hex supported)
|
||||
if (std::isdigit((unsigned char)c))
|
||||
{
|
||||
StringBuilder s;
|
||||
long offset_start = m_pos - m_last_newline;
|
||||
bool hex = c == '0' && m_code.data[m_pos] == 'x';
|
||||
long offset_start = m_pos - m_last_newline - 1;
|
||||
|
||||
bool hex = (c == '0' && peek() == 'x');
|
||||
s.Push(c);
|
||||
// integer (could be hex)
|
||||
while (std::isdigit(m_code.data[m_pos]) != 0 || (hex && std::isalpha(m_code.data[m_pos]) != 0))
|
||||
|
||||
if (hex) s.Push(advance()); // consume 'x'
|
||||
|
||||
while (true)
|
||||
{
|
||||
s.Push(m_code.data[m_pos++]);
|
||||
char p = peek();
|
||||
if (std::isdigit((unsigned char)p) ||
|
||||
(hex && std::isxdigit((unsigned char)p)))
|
||||
{
|
||||
s.Push(advance());
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
||||
s.Push('\0');
|
||||
m_token = Token(TokenType::IntLiteral, m_line, offset_start, m_pos - m_last_newline);
|
||||
m_token.int_number = std::strtol(s.data, nullptr, hex ? 16 : 10);
|
||||
@@ -165,7 +191,8 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
m_token = Token((TokenType)c, m_line, m_pos - m_last_newline, m_pos - m_last_newline + 1);
|
||||
// single-char token fallback
|
||||
m_token = Token((TokenType)c, m_line, m_pos - m_last_newline - 1, m_pos - m_last_newline);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user