#pragma once #include #include #include "prelude/string.hpp" enum class TokenType { Eof = 256, Id, IntLiteral, Extern, Fn, Local, Unknown, }; template inline bool is_one_of(TokenType t, Ts... ts) { return ((t == ts) || ...); } // operator== TokenType == char inline bool operator==(TokenType t, char c) { return static_cast(t) == c; } // operator== char == TokenType inline bool operator==(char c, TokenType t) { return t == c; // reuse the function above } // operator!= inline bool operator!=(TokenType t, char c) { return !(t == c); } inline bool operator!=(char c, TokenType t) { return !(t == c); } struct Token { TokenType token = TokenType::Unknown; long int_number = 0; // null-terminated StringView string; long line_number = 0; long offset_start = 0; long offset_end = 0; public: Token(TokenType t) : token(t) {} Token(TokenType t, long lnumber, long soffset, long eoffset) : token(t), line_number(lnumber), offset_start(soffset), offset_end(eoffset) {} Token() : token(TokenType::Unknown) {} }; class Lexer { public: const Token& token() const { return m_token; } const Token* seek_token() { auto s = m_token; auto p = m_pos; auto l = m_line; auto lnl = m_last_newline; if (!NextToken()) { return new Token(TokenType::Eof, m_line, m_pos - m_last_newline, m_pos - m_last_newline); } auto seeked = m_token; m_token = s; m_pos = p; m_line = l; m_last_newline = lnl; return new Token(seeked); } const char* filename() const { return m_filename; } public: Lexer(char* filename, StringView code) : m_filename(filename), m_code(code) {} Lexer(const Lexer&) = delete; public: bool NextToken() { auto len = m_code.len(); auto peek = [&]() -> char { return (m_pos < len) ? m_code.data[m_pos] : '\0'; }; auto advance = [&]() -> char { return (m_pos < len) ? m_code.data[m_pos++] : '\0'; }; // IMPORTANT: >= not > if (m_pos >= len) { m_token = Token(TokenType::Eof); return false; } char c = advance(); // skip whitespace safely while (c != '\0' && std::isspace((unsigned char)c)) { if (c == '\n') { m_line++; m_last_newline = m_pos; } if (m_pos >= len) // reached real EOF while skipping whitespace { m_token = Token(TokenType::Eof); return false; } c = advance(); } if (c == '\0' || m_pos > len) // paranoia guard { m_token = Token(TokenType::Eof); return false; } // identifier if (std::isalpha((unsigned char)c) || c == '_') { StringBuilder s; long offset_start = m_pos - m_last_newline - 1; // -1 because we already consumed c s.Push(c); // NOTE: usually identifiers allow digits after first char; add isdigit if you want while (true) { char p = peek(); if (!(std::isalpha((unsigned char)p) || p == '_')) break; s.Push(advance()); } s.Push('\0'); m_token = Token(TokenType::Id, m_line, offset_start, m_pos - m_last_newline); m_token.string = s.view(); if (strcmp("extern", m_token.string.c_str()) == 0) m_token.token = TokenType::Extern; else if (strcmp("fn", m_token.string.c_str()) == 0) m_token.token = TokenType::Fn; else if (strcmp("local", m_token.string.c_str()) == 0) m_token.token = TokenType::Local; return true; } // integer (hex supported) if (std::isdigit((unsigned char)c)) { StringBuilder s; long offset_start = m_pos - m_last_newline - 1; bool hex = (c == '0' && peek() == 'x'); s.Push(c); if (hex) s.Push(advance()); // consume 'x' while (true) { char p = peek(); if (std::isdigit((unsigned char)p) || (hex && std::isxdigit((unsigned char)p))) { s.Push(advance()); } else break; } s.Push('\0'); m_token = Token(TokenType::IntLiteral, m_line, offset_start, m_pos - m_last_newline); m_token.int_number = std::strtol(s.data, nullptr, hex ? 16 : 10); m_token.string = s.data; return true; } // single-char token fallback m_token = Token((TokenType)c, m_line, m_pos - m_last_newline - 1, m_pos - m_last_newline); return true; } void NextExpect(TokenType expected) { if (!NextToken()) { fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got EOF\n", m_filename, 0, 0, expected); Exit(1); } if (token().token != expected) { fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got %ld\n", m_filename, token().line_number, token().offset_start, expected, token().token); Exit(1); } } void NextExpect(char expected) { NextExpect((TokenType)expected); } private: void Exit(int status) { std::exit(status); } private: StringView m_code; char* m_filename; Token m_token; size_t m_pos = 0; size_t m_line = 1; size_t m_last_newline = 0; };