136 lines
3.3 KiB
C++
136 lines
3.3 KiB
C++
#pragma once
|
|
#include <iostream>
|
|
#include <cctype>
|
|
|
|
#include "string.hpp"
|
|
|
|
enum TokenType
|
|
{
|
|
Eof = 256,
|
|
Id,
|
|
IntLiteral,
|
|
|
|
Unknown,
|
|
};
|
|
|
|
struct Token
|
|
{
|
|
TokenType token;
|
|
long int_number;
|
|
// null-terminated
|
|
char* string;
|
|
long line_number;
|
|
long offset_start;
|
|
long offset_end;
|
|
public:
|
|
Token(TokenType t) : token(t) {}
|
|
Token(TokenType t, long lnumber, long soffset, long eoffset)
|
|
: token(t), line_number(lnumber), offset_start(soffset), offset_end(eoffset) {}
|
|
Token() : token(Unknown) {}
|
|
};
|
|
|
|
class Lexer
|
|
{
|
|
public:
|
|
const Token& token() { return m_token; }
|
|
public:
|
|
Lexer(char* filename, StringView code)
|
|
: m_filename(filename), m_code(code) {}
|
|
|
|
Lexer(const Lexer&) = delete;
|
|
Lexer(Lexer&& other)
|
|
{
|
|
m_code = other.m_code;
|
|
other.m_code = StringView();
|
|
}
|
|
public:
|
|
bool NextToken()
|
|
{
|
|
if (m_pos >= m_code.size)
|
|
{
|
|
m_token = Token(TokenType::Eof);
|
|
return false;
|
|
}
|
|
|
|
char c = m_code.data[m_pos++];
|
|
|
|
while(std::isspace(c)) {
|
|
if (c == '\n')
|
|
{
|
|
m_line++;
|
|
m_last_newline = m_pos;
|
|
}
|
|
c = m_code.data[m_pos++];
|
|
}
|
|
|
|
if (std::isalpha(c) != 0)
|
|
{
|
|
StringBuilder s;
|
|
long offset_start = m_pos - m_last_newline;
|
|
s.PushChar(c);
|
|
// id
|
|
while (std::isalpha(m_code.data[m_pos]) != 0)
|
|
{
|
|
s.PushChar(m_code.data[m_pos++]);
|
|
}
|
|
s.PushChar('\0');
|
|
m_token = Token(Id, m_line, offset_start, m_pos - m_last_newline);
|
|
m_token.string = s.data;
|
|
return true;
|
|
}
|
|
|
|
if (std::isdigit(c) != 0)
|
|
{
|
|
StringBuilder s;
|
|
long offset_start = m_pos - m_last_newline;
|
|
bool hex = c == '0' && m_code.data[m_pos] == 'x';
|
|
s.PushChar(c);
|
|
// integer (could be hex)
|
|
while (std::isdigit(m_code.data[m_pos]) != 0 || (hex && std::isalpha(m_code.data[m_pos]) != 0))
|
|
{
|
|
s.PushChar(m_code.data[m_pos++]);
|
|
}
|
|
s.PushChar('\0');
|
|
m_token = Token(IntLiteral, m_line, offset_start, m_pos - m_last_newline);
|
|
m_token.int_number = std::strtol(s.data, nullptr, hex ? 16 : 10);
|
|
m_token.string = s.data;
|
|
return true;
|
|
}
|
|
|
|
m_token = Token((TokenType)c, m_line, m_pos - m_last_newline, m_pos - m_last_newline + 1);
|
|
return true;
|
|
}
|
|
|
|
void Eat(TokenType expected)
|
|
{
|
|
if (!NextToken())
|
|
{
|
|
fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got EOF", m_filename, 0, 0, expected);
|
|
Exit(1);
|
|
}
|
|
if (token().token != expected)
|
|
{
|
|
fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got %ld", m_filename, token().line_number, token().offset_start, expected);
|
|
Exit(1);
|
|
}
|
|
}
|
|
|
|
void Eat(char expected)
|
|
{
|
|
Eat((TokenType)expected);
|
|
}
|
|
private:
|
|
void Exit(int status)
|
|
{
|
|
std::exit(status);
|
|
}
|
|
private:
|
|
StringView m_code;
|
|
char* m_filename;
|
|
|
|
Token m_token;
|
|
|
|
size_t m_pos = 0;
|
|
size_t m_line = 1;
|
|
size_t m_last_newline = 0;
|
|
}; |