#pragma once #include #include #include "string.hpp" enum TokenType { Eof = 256, Id, IntLiteral, Unknown, }; struct Token { TokenType token; long int_number; // null-terminated char* string; long line_number; long offset_start; long offset_end; public: Token(TokenType t) : token(t) {} Token(TokenType t, long lnumber, long soffset, long eoffset) : token(t), line_number(lnumber), offset_start(soffset), offset_end(eoffset) {} Token() : token(Unknown) {} }; class Lexer { public: const Token& token() { return m_token; } public: Lexer(char* filename, StringView code) : m_filename(filename), m_code(code) {} Lexer(const Lexer&) = delete; Lexer(Lexer&& other) { m_code = other.m_code; other.m_code = StringView(); } public: bool NextToken() { if (m_pos >= m_code.size) { m_token = Token(TokenType::Eof); return false; } char c = m_code.data[m_pos++]; while(std::isspace(c)) { if (c == '\n') { m_line++; m_last_newline = m_pos; } c = m_code.data[m_pos++]; } if (std::isalpha(c) != 0) { StringBuilder s; long offset_start = m_pos - m_last_newline; s.PushChar(c); // id while (std::isalpha(m_code.data[m_pos]) != 0) { s.PushChar(m_code.data[m_pos++]); } s.PushChar('\0'); m_token = Token(Id, m_line, offset_start, m_pos - m_last_newline); m_token.string = s.data; return true; } if (std::isdigit(c) != 0) { StringBuilder s; long offset_start = m_pos - m_last_newline; bool hex = c == '0' && m_code.data[m_pos] == 'x'; s.PushChar(c); // integer (could be hex) while (std::isdigit(m_code.data[m_pos]) != 0 || (hex && std::isalpha(m_code.data[m_pos]) != 0)) { s.PushChar(m_code.data[m_pos++]); } s.PushChar('\0'); m_token = Token(IntLiteral, m_line, offset_start, m_pos - m_last_newline); m_token.int_number = std::strtol(s.data, nullptr, hex ? 16 : 10); m_token.string = s.data; return true; } m_token = Token((TokenType)c, m_line, m_pos - m_last_newline, m_pos - m_last_newline + 1); return true; } void Eat(TokenType expected) { if (!NextToken()) { fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got EOF", m_filename, 0, 0, expected); Exit(1); } if (token().token != expected) { fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got %ld", m_filename, token().line_number, token().offset_start, expected); Exit(1); } } void Eat(char expected) { Eat((TokenType)expected); } private: void Exit(int status) { std::exit(status); } private: StringView m_code; char* m_filename; Token m_token; size_t m_pos = 0; size_t m_line = 1; size_t m_last_newline = 0; };