#pragma once #include #include #include #include "string.hpp" enum class TokenType { Eof = 256, Id, IntLiteral, Extern, Fn, Local, Unknown, }; template inline bool is_one_of(TokenType t, Ts... ts) { return ((t == ts) || ...); } // operator== TokenType == char inline bool operator==(TokenType t, char c) { return static_cast(t) == c; } // operator== char == TokenType inline bool operator==(char c, TokenType t) { return t == c; // reuse the function above } // operator!= inline bool operator!=(TokenType t, char c) { return !(t == c); } inline bool operator!=(char c, TokenType t) { return !(t == c); } struct Token { TokenType token; long int_number; // null-terminated char* string; long line_number; long offset_start; long offset_end; public: Token(TokenType t) : token(t) {} Token(TokenType t, long lnumber, long soffset, long eoffset) : token(t), line_number(lnumber), offset_start(soffset), offset_end(eoffset) {} Token() : token(TokenType::Unknown) {} }; class Lexer { public: const Token& token() const { return m_token; } const Token* seek_token() { auto s = m_token; auto p = m_pos; auto l = m_line; auto lnl = m_last_newline; if (!NextToken()) { return nullptr; } auto seeked = m_token; m_token = s; m_pos = p; m_line = l; m_last_newline = lnl; return new Token(seeked); } const char* filename() const { return m_filename; } public: Lexer(char* filename, StringView code) : m_filename(filename), m_code(code) {} Lexer(const Lexer&) = delete; Lexer(Lexer&& other) { m_code = other.m_code; other.m_code = StringView(); } public: bool NextToken() { if (m_pos >= m_code.size) { m_token = Token(TokenType::Eof); return false; } char c = m_code.data[m_pos++]; while(std::isspace(c)) { if (c == '\n') { m_line++; m_last_newline = m_pos; } c = m_code.data[m_pos++]; } if (std::isalpha(c) != 0) { StringBuilder s; long offset_start = m_pos - m_last_newline; s.PushChar(c); // id while (std::isalpha(m_code.data[m_pos]) != 0) { s.PushChar(m_code.data[m_pos++]); } s.PushChar('\0'); m_token = Token(TokenType::Id, m_line, offset_start, m_pos - m_last_newline); m_token.string = s.data; if (strcmp("extern", m_token.string) == 0) { m_token.token = TokenType::Extern; } if (strcmp("fn", m_token.string) == 0) { m_token.token = TokenType::Fn; } if (strcmp("local", m_token.string) == 0) { m_token.token = TokenType::Local; } return true; } if (std::isdigit(c) != 0) { StringBuilder s; long offset_start = m_pos - m_last_newline; bool hex = c == '0' && m_code.data[m_pos] == 'x'; s.PushChar(c); // integer (could be hex) while (std::isdigit(m_code.data[m_pos]) != 0 || (hex && std::isalpha(m_code.data[m_pos]) != 0)) { s.PushChar(m_code.data[m_pos++]); } s.PushChar('\0'); m_token = Token(TokenType::IntLiteral, m_line, offset_start, m_pos - m_last_newline); m_token.int_number = std::strtol(s.data, nullptr, hex ? 16 : 10); m_token.string = s.data; return true; } m_token = Token((TokenType)c, m_line, m_pos - m_last_newline, m_pos - m_last_newline + 1); return true; } void NextExpect(TokenType expected) { if (!NextToken()) { fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got EOF\n", m_filename, 0, 0, expected); Exit(1); } if (token().token != expected) { fprintf(stderr, "%s:%d:%d: ERROR: expected %ld, but got %ld\n", m_filename, token().line_number, token().offset_start, expected, token().token); Exit(1); } } void NextExpect(char expected) { NextExpect((TokenType)expected); } private: void Exit(int status) { std::exit(status); } private: StringView m_code; char* m_filename; Token m_token; size_t m_pos = 0; size_t m_line = 1; size_t m_last_newline = 0; };