Files
pl/include/lexer.hpp

199 lines
4.6 KiB
C++

#pragma once
#include <iostream>
#include <cstring>
#include <cctype>
#include "string.hpp"
enum class TokenType
{
Eof = 256,
Id,
IntLiteral,
Extern,
Fn,
Local,
Unknown,
};
template <typename... Ts>
inline bool is_one_of(TokenType t, Ts... ts) {
return ((t == ts) || ...);
}
// operator== TokenType == char
inline bool operator==(TokenType t, char c)
{
return static_cast<char>(t) == c;
}
// operator== char == TokenType
inline bool operator==(char c, TokenType t)
{
return t == c; // reuse the function above
}
// operator!=
inline bool operator!=(TokenType t, char c)
{
return !(t == c);
}
inline bool operator!=(char c, TokenType t)
{
return !(t == c);
}
struct Token
{
TokenType token;
long int_number;
// null-terminated
StringView string;
long line_number;
long offset_start;
long offset_end;
public:
Token(TokenType t) : token(t) {}
Token(TokenType t, long lnumber, long soffset, long eoffset)
: token(t), line_number(lnumber), offset_start(soffset), offset_end(eoffset) {}
Token() : token(TokenType::Unknown) {}
};
class Lexer
{
public:
const Token& token() const { return m_token; }
const Token* seek_token()
{
auto s = m_token;
auto p = m_pos;
auto l = m_line;
auto lnl = m_last_newline;
if (!NextToken())
{
return nullptr;
}
auto seeked = m_token;
m_token = s;
m_pos = p;
m_line = l;
m_last_newline = lnl;
return new Token(seeked);
}
const char* filename() const { return m_filename; }
public:
Lexer(char* filename, StringView code)
: m_filename(filename), m_code(code) {}
Lexer(const Lexer&) = delete;
public:
bool NextToken()
{
if (m_pos >= m_code.size || m_code.data[m_pos] == '\0')
{
m_token = Token(TokenType::Eof);
return false;
}
char c = m_code.data[m_pos++];
while(std::isspace(c)) {
if (c == '\n')
{
m_line++;
m_last_newline = m_pos;
}
c = m_code.data[m_pos++];
}
if (std::isalpha(c) != 0)
{
StringBuilder s;
long offset_start = m_pos - m_last_newline;
s.Push(c);
// id
while (std::isalpha(m_code.data[m_pos]) != 0)
{
s.Push(m_code.data[m_pos++]);
}
s.Push('\0');
m_token = Token(TokenType::Id, m_line, offset_start, m_pos - m_last_newline);
m_token.string = s.view();
if (strcmp("extern", m_token.string.c_str()) == 0)
{
m_token.token = TokenType::Extern;
}
if (strcmp("fn", m_token.string.c_str()) == 0)
{
m_token.token = TokenType::Fn;
}
if (strcmp("local", m_token.string.c_str()) == 0)
{
m_token.token = TokenType::Local;
}
return true;
}
if (std::isdigit(c) != 0)
{
StringBuilder s;
long offset_start = m_pos - m_last_newline;
bool hex = c == '0' && m_code.data[m_pos] == 'x';
s.Push(c);
// integer (could be hex)
while (std::isdigit(m_code.data[m_pos]) != 0 || (hex && std::isalpha(m_code.data[m_pos]) != 0))
{
s.Push(m_code.data[m_pos++]);
}
s.Push('\0');
m_token = Token(TokenType::IntLiteral, m_line, offset_start, m_pos - m_last_newline);
m_token.int_number = std::strtol(s.data, nullptr, hex ? 16 : 10);
m_token.string = s.data;
return true;
}
m_token = Token((TokenType)c, m_line, m_pos - m_last_newline, m_pos - m_last_newline + 1);
return true;
}
void NextExpect(TokenType expected)
{
if (!NextToken())
{
fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got EOF\n", m_filename, 0, 0, expected);
Exit(1);
}
if (token().token != expected)
{
fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got %ld\n", m_filename, token().line_number, token().offset_start, expected, token().token);
Exit(1);
}
}
void NextExpect(char expected)
{
NextExpect((TokenType)expected);
}
private:
void Exit(int status)
{
std::exit(status);
}
private:
StringView m_code;
char* m_filename;
Token m_token;
size_t m_pos = 0;
size_t m_line = 1;
size_t m_last_newline = 0;
};