Compare commits
2 Commits
6176d549c1
...
1c04a058d7
| Author | SHA1 | Date | |
|---|---|---|---|
| 1c04a058d7 | |||
| 629b65e151 |
@@ -80,7 +80,7 @@ public:
|
|||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
// fprintf(stderr, "%s:%d:%d: ERROR: unexpected token while parsing %ld\n", m_lexer->filename(), token->line_number, token->offset_start, token->token);
|
// fprintf(stderr, "%s:%d:%d: ERROR: unexpected token while parsing %ld\n", m_lexer->filename(), token->line_number, token->offset_start, token->token);
|
||||||
ErrorLogger::Raise(Error::ParseError(m_lexer->filename(), StringView::FromFormat("unexpected token while parsing '%c'", token->token), token->line_number, token->offset_start));
|
ErrorLogger::Raise(Error::ParseError(m_lexer->filename(), StringView::FromFormat("unexpected token while parsing '%d'", token->token), token->line_number, token->offset_start));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -179,7 +179,7 @@ public:
|
|||||||
case TokenType::Fn: program->PushFunction(ParseFnDecl()); break;
|
case TokenType::Fn: program->PushFunction(ParseFnDecl()); break;
|
||||||
case TokenType::Extern: program->PushExtern(ParseExtern()); break;
|
case TokenType::Extern: program->PushExtern(ParseExtern()); break;
|
||||||
default: {
|
default: {
|
||||||
ErrorLogger::Raise(Error::ParseError(m_lexer->filename(), StringView::FromFormat("unexpected token while parsing '%c'", token.token), token.line_number, token.offset_start));
|
ErrorLogger::Raise(Error::ParseError(m_lexer->filename(), StringView::FromFormat("unexpected token while parsing '%d'", token.token), token.line_number, token.offset_start));
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -73,7 +73,7 @@ public:
|
|||||||
auto lnl = m_last_newline;
|
auto lnl = m_last_newline;
|
||||||
if (!NextToken())
|
if (!NextToken())
|
||||||
{
|
{
|
||||||
return nullptr;
|
return new Token(TokenType::Eof, m_line, m_pos - m_last_newline, m_pos - m_last_newline);
|
||||||
}
|
}
|
||||||
auto seeked = m_token;
|
auto seeked = m_token;
|
||||||
m_token = s;
|
m_token = s;
|
||||||
@@ -92,72 +92,98 @@ public:
|
|||||||
public:
|
public:
|
||||||
bool NextToken()
|
bool NextToken()
|
||||||
{
|
{
|
||||||
// if (m_pos >= m_code.len())
|
auto len = m_code.len();
|
||||||
// {
|
|
||||||
// m_token = Token(TokenType::Eof);
|
|
||||||
// return false;
|
|
||||||
// }
|
|
||||||
|
|
||||||
char c = m_code.data[m_pos++];
|
auto peek = [&]() -> char {
|
||||||
|
return (m_pos < len) ? m_code.data[m_pos] : '\0';
|
||||||
|
};
|
||||||
|
|
||||||
while(std::isspace(c)) {
|
auto advance = [&]() -> char {
|
||||||
if (c == '\n')
|
return (m_pos < len) ? m_code.data[m_pos++] : '\0';
|
||||||
{
|
};
|
||||||
m_line++;
|
|
||||||
m_last_newline = m_pos;
|
|
||||||
}
|
|
||||||
c = m_code.data[m_pos++];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (m_pos >= m_code.len())
|
// IMPORTANT: >= not >
|
||||||
|
if (m_pos >= len)
|
||||||
{
|
{
|
||||||
m_token = Token(TokenType::Eof);
|
m_token = Token(TokenType::Eof);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (std::isalpha(c) != 0 || c == '_')
|
char c = advance();
|
||||||
|
|
||||||
|
// skip whitespace safely
|
||||||
|
while (c != '\0' && std::isspace((unsigned char)c))
|
||||||
|
{
|
||||||
|
if (c == '\n')
|
||||||
|
{
|
||||||
|
m_line++;
|
||||||
|
m_last_newline = m_pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_pos >= len) // reached real EOF while skipping whitespace
|
||||||
|
{
|
||||||
|
m_token = Token(TokenType::Eof);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
c = advance();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == '\0' || m_pos > len) // paranoia guard
|
||||||
|
{
|
||||||
|
m_token = Token(TokenType::Eof);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// identifier
|
||||||
|
if (std::isalpha((unsigned char)c) || c == '_')
|
||||||
{
|
{
|
||||||
StringBuilder s;
|
StringBuilder s;
|
||||||
long offset_start = m_pos - m_last_newline;
|
long offset_start = m_pos - m_last_newline - 1; // -1 because we already consumed c
|
||||||
s.Push(c);
|
s.Push(c);
|
||||||
// id
|
|
||||||
while (std::isalpha(m_code.data[m_pos]) != 0 || m_code.data[m_pos] == '_')
|
// NOTE: usually identifiers allow digits after first char; add isdigit if you want
|
||||||
|
while (true)
|
||||||
{
|
{
|
||||||
s.Push(m_code.data[m_pos++]);
|
char p = peek();
|
||||||
|
if (!(std::isalpha((unsigned char)p) || p == '_'))
|
||||||
|
break;
|
||||||
|
s.Push(advance());
|
||||||
}
|
}
|
||||||
|
|
||||||
s.Push('\0');
|
s.Push('\0');
|
||||||
m_token = Token(TokenType::Id, m_line, offset_start, m_pos - m_last_newline);
|
m_token = Token(TokenType::Id, m_line, offset_start, m_pos - m_last_newline);
|
||||||
m_token.string = s.view();
|
m_token.string = s.view();
|
||||||
|
|
||||||
if (strcmp("extern", m_token.string.c_str()) == 0)
|
if (strcmp("extern", m_token.string.c_str()) == 0) m_token.token = TokenType::Extern;
|
||||||
{
|
else if (strcmp("fn", m_token.string.c_str()) == 0) m_token.token = TokenType::Fn;
|
||||||
m_token.token = TokenType::Extern;
|
else if (strcmp("local", m_token.string.c_str()) == 0) m_token.token = TokenType::Local;
|
||||||
}
|
|
||||||
|
|
||||||
if (strcmp("fn", m_token.string.c_str()) == 0)
|
|
||||||
{
|
|
||||||
m_token.token = TokenType::Fn;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (strcmp("local", m_token.string.c_str()) == 0)
|
|
||||||
{
|
|
||||||
m_token.token = TokenType::Local;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (std::isdigit(c) != 0)
|
// integer (hex supported)
|
||||||
|
if (std::isdigit((unsigned char)c))
|
||||||
{
|
{
|
||||||
StringBuilder s;
|
StringBuilder s;
|
||||||
long offset_start = m_pos - m_last_newline;
|
long offset_start = m_pos - m_last_newline - 1;
|
||||||
bool hex = c == '0' && m_code.data[m_pos] == 'x';
|
|
||||||
|
bool hex = (c == '0' && peek() == 'x');
|
||||||
s.Push(c);
|
s.Push(c);
|
||||||
// integer (could be hex)
|
|
||||||
while (std::isdigit(m_code.data[m_pos]) != 0 || (hex && std::isalpha(m_code.data[m_pos]) != 0))
|
if (hex) s.Push(advance()); // consume 'x'
|
||||||
|
|
||||||
|
while (true)
|
||||||
{
|
{
|
||||||
s.Push(m_code.data[m_pos++]);
|
char p = peek();
|
||||||
|
if (std::isdigit((unsigned char)p) ||
|
||||||
|
(hex && std::isxdigit((unsigned char)p)))
|
||||||
|
{
|
||||||
|
s.Push(advance());
|
||||||
}
|
}
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
|
||||||
s.Push('\0');
|
s.Push('\0');
|
||||||
m_token = Token(TokenType::IntLiteral, m_line, offset_start, m_pos - m_last_newline);
|
m_token = Token(TokenType::IntLiteral, m_line, offset_start, m_pos - m_last_newline);
|
||||||
m_token.int_number = std::strtol(s.data, nullptr, hex ? 16 : 10);
|
m_token.int_number = std::strtol(s.data, nullptr, hex ? 16 : 10);
|
||||||
@@ -165,7 +191,8 @@ public:
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_token = Token((TokenType)c, m_line, m_pos - m_last_newline, m_pos - m_last_newline + 1);
|
// single-char token fallback
|
||||||
|
m_token = Token((TokenType)c, m_line, m_pos - m_last_newline - 1, m_pos - m_last_newline);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user