feat: project init, basic lexer implementation

2025-08-27 18:39:00 +02:00
commit a5324195f9
6 changed files with 3356 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
+nob
+build/
+
--- a/external/nob.h
+++ b/external/nob.h
--- a/external/stb_c_lexer.h
+++ b/external/stb_c_lexer.h
@@ -0,0 +1,774 @@
+// stb_c_lexer.h 0.01 -- public domain Sean Barrett 2013
+// lexer for making little C-like languages with recursive-descent parsers
+//
+// This file provides both the interface and the implementation.
+// To instantiate the implementation,
+//      #define STB_C_LEXER_IMPLEMENTATION
+// in *ONE* source file, before #including this file.
+//
+// The default configuration is fairly close to a C lexer, although
+// suffixes on integer constants are not handled (you can override this).
+//
+// Status:
+//     - haven't tested compiling as C++
+//     - haven't tested the float parsing path
+//     - haven't tested "get_location" function (used for error reporting)
+//     - haven't tested the non-default-config paths (e.g. non-stdlib)
+//     - only tested default-config paths by eyeballing output of self-parse
+//
+//     - haven't implemented multiline strings
+//     - haven't implemented octal/hex character constants
+//     - haven't implemented support for unicode CLEX_char
+//     - need to expand error reporting so you don't just get "CLEX_parse_error"
+
+#ifndef STB_C_LEXER_DEFINITIONS
+// to change the default parsing rules, copy the following lines
+// into your C/C++ file *before* including this, and then replace
+// the Y's with N's for the ones you don't want.
+// --BEGIN--
+
+#define STB_C_LEX_C_DECIMAL_INTS    Y   //  "0|[1-9][0-9]*"                        CLEX_int
+#define STB_C_LEX_C_HEX_INTS        Y   //  "0x[0-9a-fA-F]+"                       CLEX_int
+#define STB_C_LEX_C_OCTAL_INTS      Y   //  "[0-7]+"                               CLEX_int
+#define STB_C_LEX_C_DECIMAL_FLOATS  Y   //  "[0-9]*(.[0-9]*([eE]-?[0-9]+)?)        CLEX_float
+#define STB_C_LEX_C_IDENTIFIERS     Y   //  "[_a-zA-Z][_a-zA-Z0-9]*"               CLEX_id
+#define STB_C_LEX_C_DQ_STRINGS      Y   //  double-quote-delimited strings with escapes  CLEX_dqstring
+#define STB_C_LEX_C_SQ_STRINGS      N   //  single-quote-delimited strings with escapes  CLEX_ssstring
+#define STB_C_LEX_C_CHARS           Y   //  single-quote-delimited character with escape CLEX_chars
+#define STB_C_LEX_C_COMMENTS        Y   //  "/* comment */"
+#define STB_C_LEX_CPP_COMMENTS      Y   //  "// comment to end of line\n"
+#define STB_C_LEX_C_COMPARISONS     Y   //  "==" CLEX_eq  "!=" CLEX_noteq   "<=" CLEX_lesseq  ">=" CLEX_greatereq
+#define STB_C_LEX_C_LOGICAL         Y   //  "&&"  CLEX_andand   "||"  CLEX_oror
+#define STB_C_LEX_C_SHIFTS          Y   //  "<<"  CLEX_shl      ">>"  CLEX_shr
+#define STB_C_LEX_C_INCREMENTS      Y   //  "++"  CLEX_plusplus "--"  CLEX_minusminus
+#define STB_C_LEX_C_ARROW           Y   //  "->"  CLEX_arrow
+#define STB_C_LEX_EQUAL_ARROW       N   //  "=>"  CLEX_eqarrow
+#define STB_C_LEX_C_BITWISEEQ       Y   //  "&="  CLEX_andeq    "|="  CLEX_oreq     "^="  CLEX_xoreq
+#define STB_C_LEX_C_ARITHEQ         Y   //  "+="  CLEX_pluseq   "-="  CLEX_minuseq
+                                        //  "*="  CLEX_muleq    "/="  CLEX_diveq    "%=" CLEX_modeq
+                                        //  if both STB_C_LEX_SHIFTS & STB_C_LEX_ARITHEQ:
+                                        //                      "<<=" CLEX_shleq    ">>=" CLEX_shreq
+
+#define STB_C_LEX_PARSE_SUFFIXES    N   // letters after numbers are parsed as part of those numbers, and must be in suffix list below
+#define STB_C_LEX_DECIMAL_SUFFIXES  ""  // decimal integer suffixes e.g. "uUlL" -- these are returned as-is in string storage
+#define STB_C_LEX_HEX_SUFFIXES      ""  // e.g. "uUlL"
+#define STB_C_LEX_OCTAL_SUFFIXES    ""  // e.g. "uUlL"
+#define STB_C_LEX_FLOAT_SUFFIXES    ""  //
+
+#define STB_C_LEX_0_IS_EOF             N  // if Y, ends parsing at '\0'; if N, returns '\0' as token
+#define STB_C_LEX_INTEGERS_AS_DOUBLES  N  // parses integers as doubles so they can be larger than 'int', but only if STB_C_LEX_STDLIB==N
+#define STB_C_LEX_MULTILINE_DSTRINGS   N  // allow newlines in double-quoted strings
+#define STB_C_LEX_MULTILINE_SSTRINGS   N  // allow newlines in single-quoted strings
+#define STB_C_LEX_USE_STDLIB           Y  // use strtod,strtol for parsing #s; otherwise inaccurate hack
+#define STB_C_LEX_DOLLAR_IDENTIFIER    Y  // allow $ as an identifier character
+#define STB_C_LEX_FLOAT_NO_DECIMAL     Y  // allow floats that have no decimal point if they have an exponent
+
+#define STB_C_LEX_DEFINE_ALL_TOKEN_NAMES  N   // if Y, all CLEX_ token names are defined, even if never returned
+                                              // leaving it as N should help you catch config bugs
+
+//#define STB_C_LEX_ISWHITE(str)    ... // return length in bytes of first character if it is whitespace
+
+#define STB_C_LEXER_DEFINITIONS         // This line prevents the header file from replacing your definitions
+// --END--
+
+#endif
+
+#ifndef INCLUDE_STB_C_LEXER_H
+#define INCLUDE_STB_C_LEXER_H
+
+typedef struct
+{
+   char *where_firstchar;
+   char *where_lastchar;
+   long token;
+   double real_number;
+   long   int_number;
+   char *string;
+   int string_len;
+} stb_lex_token;
+
+typedef struct
+{
+   char *input_stream;
+   char *eof;
+   char *parse_point;
+   char *string_storage;
+   int   string_storage_len;
+} stb_lexer;
+
+typedef struct
+{
+   int line_number;
+   int line_offset;
+} stb_lex_location;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void stb_c_lexer_init(stb_lexer *lexer, const char *input_stream, const char *input_stream_end, char *string_store, int store_length);
+// this function initialize the 'lexer' structure
+//   Input:
+//   - input_stream points to the file to parse, loaded into memory
+//   - input_stream_end points to the end of the file, or NULL if you use 0-for-EOF
+//   - string_store is storage the lexer can use for storing parsed strings and identifiers
+//   - store_length is the length of that storage
+
+extern int stb_c_lexer_get_token(stb_lexer *lexer, stb_lex_token *tok);
+// this function returns non-zero if a token is parsed, or 0 if at EOF
+//   Output:
+//   - tok->token is the token ID, which is unicode code point for a single-char token, < 0 for a multichar or eof or error
+//   - tok->real_number is a double constant value for CLEX_float, or CLEX_int if STB_C_LEX_INTEGERS_AS_DOUBLES
+//   - tok->int_number is an integer constant for CLEX_int if !STB_C_LEX_INTEGERS_AS_DOUBLES, or character for CLEX_char
+//   - tok->string is a 0-terminated string for CLEX_dqstring or CLEX_sqstring or CLEX_identifier
+//   - tok->string_len is the byte length of tok->string
+
+extern void stb_c_lexer_get_location(const stb_lexer *lexer, const char *where, stb_lex_location *loc);
+// this inefficient function returns the line number and character offset of a
+// given location in the file as returned by stb_lex_token. Because it's inefficient,
+// you should only call it for errors, not for every token.
+// For error messages of invalid tokens, you typically want the location of the
+// in the token (which caused the token to be invalid). For bugs involving legit
+// tokens, you can report the first or the range.
+//    Output:
+//    - loc->line_number is the line number in the file, counting from 1, of the location
+//    - loc->line_offset is the char-offset in the line, counting from 0, of the location
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // INCLUDE_STB_C_LEXER_H
+
+#ifdef STB_C_LEXER_IMPLEMENTATION
+
+   #if defined(Y) || defined(N)
+   #error "Can only use stb_c_lex in contexts where the preprocessor symbols 'Y' and 'N' are not defined"
+   #endif
+
+
+// Hacky definitions so we can easily #if on them
+#define Y(x) 1
+#define N(x) 0
+
+#if STB_C_LEX_USE_STDLIB(x)
+#define STB__CLEX_use_stdlib
+#include <stdlib.h>
+#endif
+
+#if STB_C_LEX_INTEGERS_AS_DOUBLES(x)
+typedef double     stb__clex_int;
+#define intfield   real_number
+#define STB__clex_int_as_double
+#else
+typedef long       stb__clex_int;
+#define intfield   int_number
+#endif
+
+// Convert these config options to simple conditional #defines so we can more
+// easily test them once we've change the meaning of Y/N
+
+#if STB_C_LEX_PARSE_SUFFIXES(x)
+#define STB__clex_parse_suffixes
+#endif
+
+#if STB_C_LEX_C_DECIMAL_INTS(x) || STB_C_LEX_C_HEX_INTS(x) || STB_C_LEX_DEFINE_ALL_TOKEN_NAMES(x)
+#define STB__clex_define_int
+#endif
+
+#if (STB_C_LEX_C_ARITHEQ(x) && STB_C_LEX_C_SHIFTS(x)) || STB_C_LEX_DEFINE_ALL_TOKEN_NAMES(x)
+#define STB__clex_define_shifts
+#endif
+
+#if STB_C_LEX_C_HEX_INTS(x)
+#define STB__clex_hex_ints
+#endif
+
+#if STB_C_LEX_C_DECIMAL_INTS(x)
+#define STB__clex_decimal_ints
+#endif
+
+#if STB_C_LEX_C_OCTAL_INTS(x)
+#define STB__clex_octal_ints
+#endif
+
+#if STB_C_LEX_C_DECIMAL_FLOATS(x)
+#define STB__clex_decimal_floats
+#endif
+
+// Now pick a definition of Y/N that's conducive to
+// defining the enum of token names.
+#if STB_C_LEX_DEFINE_ALL_TOKEN_NAMES(x) || defined(STB_C_LEXER_SELF_TEST)
+  #undef  N
+  #define N(a) Y(a)
+#else
+  #undef  N
+  #define N(a)
+#endif
+
+#undef  Y
+#define Y(a) a,
+
+enum
+{
+   CLEX_eof = 256,
+   CLEX_parse_error,
+
+#ifdef STB__clex_define_int
+   CLEX_int,
+#endif
+
+   STB_C_LEX_C_DECIMAL_FLOATS( CLEX_float       )
+   STB_C_LEX_C_IDENTIFIERS(  CLEX_id            )
+   STB_C_LEX_C_DQ_STRINGS(   CLEX_dqstring      )
+   STB_C_LEX_C_SQ_STRINGS(   CLEX_sqstring      )
+   STB_C_LEX_C_CHARS(        CLEX_char          )
+   STB_C_LEX_C_COMPARISONS(  CLEX_eq            )
+   STB_C_LEX_C_COMPARISONS(  CLEX_noteq         )
+   STB_C_LEX_C_COMPARISONS(  CLEX_lesseq        )
+   STB_C_LEX_C_COMPARISONS(  CLEX_greatereq     )
+   STB_C_LEX_C_LOGICAL(      CLEX_andand        )
+   STB_C_LEX_C_LOGICAL(      CLEX_oror          )
+   STB_C_LEX_C_SHIFTS(       CLEX_shl           )
+   STB_C_LEX_C_SHIFTS(       CLEX_shr           )
+   STB_C_LEX_C_INCREMENTS(   CLEX_plusplus      )
+   STB_C_LEX_C_INCREMENTS(   CLEX_minusminus    )
+   STB_C_LEX_C_ARITHEQ(      CLEX_pluseq        )
+   STB_C_LEX_C_ARITHEQ(      CLEX_minuseq       )
+   STB_C_LEX_C_ARITHEQ(      CLEX_muleq         )
+   STB_C_LEX_C_ARITHEQ(      CLEX_diveq         )
+   STB_C_LEX_C_ARITHEQ(      CLEX_modeq         )
+   STB_C_LEX_C_BITWISEEQ(    CLEX_andeq         )
+   STB_C_LEX_C_BITWISEEQ(    CLEX_oreq          )
+   STB_C_LEX_C_BITWISEEQ(    CLEX_xoreq         )
+   STB_C_LEX_C_ARROW(        CLEX_arrow         )
+   STB_C_LEX_EQUAL_ARROW(    CLEX_eqarrow       )
+
+#ifdef STB__clex_define_shifts
+   CLEX_shleq, CLEX_shreq
+#endif
+
+#undef Y
+#define Y(a) a
+};
+
+// Now for the rest of the file we'll use the basic definition where
+// where Y expands to its contents and N expands to nothing
+#undef N
+#define N(a)
+
+// API function
+void stb_c_lexer_init(stb_lexer *lexer, const char *input_stream, const char *input_stream_end, char *string_store, int store_length)
+{
+   lexer->input_stream = (char *) input_stream;
+   lexer->eof = (char *) input_stream_end;
+   lexer->parse_point = (char *) input_stream;
+   lexer->string_storage = string_store;
+   lexer->string_storage_len = store_length;
+}
+
+// API function
+void stb_c_lexer_get_location(const stb_lexer *lexer, const char *where, stb_lex_location *loc)
+{
+   char *p = lexer->input_stream;
+   int line_number = 1;
+   int char_offset = 0;
+   while (*p) {
+      if (*p == '\n' || *p == '\r') {
+         p += (p[0]+p[1] == '\r'+'\n' ? 2 : 1); // skip newline
+         line_number += 1;
+         char_offset = 0;
+      } else {
+         ++p;
+         ++char_offset;
+      }
+   }
+   loc->line_number = line_number;
+   loc->line_offset = char_offset;
+}
+
+// main helper function for returning a parsed token
+static int stb__clex_token(stb_lexer *lexer, stb_lex_token *tok, int token, char *start, char *end)
+{
+   tok->token = token;
+   tok->where_firstchar = start;
+   tok->where_lastchar = end;
+   lexer->parse_point = end+1;
+   return 1;
+}
+
+// helper function for returning eof
+static int stb__clex_eof(stb_lex_token *tok)
+{
+   tok->token = CLEX_eof;
+   return 0;
+}
+
+static int stb__clex_iswhite(int x)
+{
+   return x == ' ' || x == '\t' || x == '\r' || x == '\n' || x == '\f';
+}
+
+static const char *stb__strchr(const char *str, int ch)
+{
+   for (; *str; ++str) 
+      if (*str == ch)
+         return str;
+   return 0;
+}
+
+// parse suffixes at the end of a number
+static int stb__clex_parse_suffixes(stb_lexer *lexer, stb_lex_token *tok, long tokenid, char *start, char *cur, const char *suffixes)
+{
+   #ifdef STB__clex_parse_suffixes
+   tok->string = lexer->string_storage;
+   tok->string_len = 0;
+
+   while ((*cur >= 'a' && *cur <= 'z') || (*cur >= 'A' && *cur <= 'Z')) {
+      if (!stb__strchr(suffixes, *cur))
+         return stb__clex_token(lexer, tok, CLEX_parse_error, start, cur);
+      if (tok->string_len+1 >= lexer->string_storage_len)
+         return stb__clex_token(lexer, tok, CLEX_parse_error, start, cur);
+      tok->string[tok->string_len++] = *cur++;
+   }
+   #else
+   suffixes = suffixes; // attempt to suppress warnings
+   #endif
+   return stb__clex_token(lexer, tok, tokenid, start, cur-1);
+}
+
+#ifndef STB__CLEX_use_stdlib
+static double stb__clex_parse_float(char *p, char **q)
+{
+   double value=0;
+   while (*p >= '0' && *p <= '9')
+      value = value*10 + (*p++ - '0');
+   if (*p == '.') {
+      double powten=1, addend = 0;
+      ++p;
+      while (*p >= '0' && *p <= '9') {
+         addend = addend + 10*(*p++ - '0');
+         powten *= 10;
+      }
+      value += addend / powten;
+   }
+   if (*p == 'e' || *p == 'E') {
+      int sign = p[1] == '-';
+      int exponent=0;
+      double pow10=1;
+      p += 1+sign;
+      while (*p >= '0' && *p <= '9')
+         exponent = exponent*10 + (*p++ - '0');
+      // can't use pow() from stdlib, so do it slow way
+      while (exponent-- > 0)
+         pow10 *= 10;
+      if (sign)
+         value /= pow10;
+      else
+         value *= pow10;
+   }
+   *q = p;
+   return value;
+}
+#endif
+
+static int stb__clex_parse_char(char *p, char **q)
+{
+   if (*p == '\\') {
+      *q = p+2; // tentatively guess we'll parse two characters
+      switch(p[1]) {
+         case '\\': return '\\';
+         case '\'': return '\'';
+         case '"': return '"';
+         case 't': return '\t';
+         case 'f': return '\f';
+         case 'n': return '\n';
+         case 'r': return '\r';
+         case '0': return '\0'; // @TODO ocatal constants
+         case 'x': case 'X': return -1; // @TODO hex constants
+         case 'u': return -1; // @TODO unicode constants
+      }
+   }
+   *q = p+1;
+   return (unsigned char) *p;
+}
+
+static int stb__clex_parse_string(stb_lexer *lexer, stb_lex_token *tok, char *p, int type)
+{
+   char *start = p;
+   char delim = *p++; // grab the " or ' for later matching
+   char *out = lexer->string_storage;
+   char *outend = lexer->string_storage + lexer->string_storage_len;
+   while (*p != delim) {
+      int n;
+      if (*p == '\\') {
+         char *q;
+         n = stb__clex_parse_char(p, &q);
+         if (n < 0)
+            return stb__clex_token(lexer, tok, CLEX_parse_error, start, q);
+         p = q;
+      } else {
+         // @OPTIMIZE: could speed this up by looping-while-not-backslash
+         n = (unsigned char) *p++;
+      }
+      if (out+1 > outend)
+         return stb__clex_token(lexer, tok, CLEX_parse_error, start, p);
+      // @TODO expand unicode escapes to UTF8
+      *out++ = (char) n;
+   }
+   *out = 0;
+   tok->string = lexer->string_storage;
+   tok->string_len = out - lexer->string_storage;
+   return stb__clex_token(lexer, tok, type, start, p+1);
+}
+
+int stb_c_lexer_get_token(stb_lexer *lexer, stb_lex_token *tok)
+{
+   char *p = lexer->parse_point;
+
+   // skip whitespace and comments
+   for (;;) {
+      #ifdef STB_C_LEX_ISWHITE
+      while (p != lexer->stream_end) {
+         int n;
+         n = STB_C_LEX_ISWHITE(p);
+         if (n == 0) break;
+         if (lexer->eof && lexer+n > lexer->eof)
+            return stb__clex_token(tok, CLEX_parse_error, p,lexer->eof-1);
+         p += n;
+      }
+      #else
+      while (p != lexer->eof && stb__clex_iswhite(*p))
+         ++p;
+      #endif
+
+      STB_C_LEX_CPP_COMMENTS(
+         if (p != lexer->eof && p[0] == '/' && p[1] == '/') {
+            while (p != lexer->eof && *p != '\r' && *p != '\n')
+               ++p;
+            continue;
+         }
+      )
+
+      STB_C_LEX_C_COMMENTS(
+         if (p != lexer->eof && p[0] == '/' && p[1] == '*') {
+            char *start = p;
+            p += 2;
+            while (p != lexer->eof && (p[0] != '*' || p[1] != '/'))
+               ++p;
+            if (p == lexer->eof)
+               return stb__clex_token(lexer, tok, CLEX_parse_error, start, p-1);
+            p += 2;
+            continue;
+         }
+      )
+      break;
+   }
+
+   if (p == lexer->eof)
+      return stb__clex_eof(tok);
+
+   switch (*p) {
+      default:
+         if (   (*p >= 'a' && *p <= 'z')
+             || (*p >= 'A' && *p <= 'Z')
+             || *p == '_' || (unsigned char) *p >= 128    // >= 128 is UTF8 char
+             STB_C_LEX_DOLLAR_IDENTIFIER( || *p == '$' ) )
+         {
+            int n = 0;
+            tok->string = lexer->string_storage;
+            tok->string_len = n;
+            do {
+               if (n+1 >= lexer->string_storage_len)
+                  return stb__clex_token(lexer, tok, CLEX_parse_error, p, p+n);
+               tok->string[n] = p[n];
+               ++n;
+            } while (
+                  (p[n] >= 'a' && p[n] <= 'z')
+               || (p[n] >= 'A' && p[n] <= 'Z')
+               || (p[n] >= '0' && p[n] <= '9') // allow digits in middle of identifier
+               || p[n] == '_' || (unsigned char) p[n] >= 128
+                STB_C_LEX_DOLLAR_IDENTIFIER( || p[n] == '$' )
+            );
+            tok->string[n] = 0;
+            return stb__clex_token(lexer, tok, CLEX_id, p, p+n-1);
+         }
+ 
+         // check for EOF
+         STB_C_LEX_0_IS_EOF(
+            if (*p == 0)
+               return stb__clex_eof(tok);
+         )
+
+      single_char:         
+         // not an identifier, return the character as itself
+         return stb__clex_token(lexer, tok, *p, p, p);
+
+      case '+':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_INCREMENTS(if (p[1] == '+') return stb__clex_token(lexer, tok, CLEX_plusplus, p,p+1);)
+            STB_C_LEX_C_ARITHEQ(   if (p[1] == '=') return stb__clex_token(lexer, tok, CLEX_pluseq  , p,p+1);)
+         }
+         goto single_char;
+      case '-':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_INCREMENTS(if (p[1] == '-') return stb__clex_token(lexer, tok, CLEX_minusminus, p,p+1);)
+            STB_C_LEX_C_ARITHEQ(   if (p[1] == '=') return stb__clex_token(lexer, tok, CLEX_minuseq   , p,p+1);)
+            STB_C_LEX_C_ARROW(     if (p[1] == '>') return stb__clex_token(lexer, tok, CLEX_arrow     , p,p+1);)
+         }
+         goto single_char;
+      case '&':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_LOGICAL(  if (p[1] == '&') return stb__clex_token(lexer, tok, CLEX_andand, p,p+1);)
+            STB_C_LEX_C_BITWISEEQ(if (p[1] == '=') return stb__clex_token(lexer, tok, CLEX_andeq , p,p+1);)
+         }
+         goto single_char;
+      case '|':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_LOGICAL(  if (p[1] == '|') return stb__clex_token(lexer, tok, CLEX_oror, p,p+1);)
+            STB_C_LEX_C_BITWISEEQ(if (p[1] == '=') return stb__clex_token(lexer, tok, CLEX_oreq, p,p+1);)
+         }
+         goto single_char;
+      case '=':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, tok, CLEX_eq, p,p+1);)
+            STB_C_LEX_EQUAL_ARROW(  if (p[1] == '>') return stb__clex_token(lexer, tok, CLEX_eqarrow, p,p+1);)
+         }
+         goto single_char;
+      case '!':
+         STB_C_LEX_C_COMPARISONS(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, tok, CLEX_noteq, p,p+1);)
+         goto single_char;
+      case '^':
+         STB_C_LEX_C_BITWISEEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, tok, CLEX_xoreq, p,p+1));
+         goto single_char;
+      case '%':
+         STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, tok, CLEX_modeq, p,p+1));
+         goto single_char;
+      case '*':
+         STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, tok, CLEX_muleq, p,p+1));
+         goto single_char;
+      case '/':
+         STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, tok, CLEX_diveq, p,p+1));
+         goto single_char;
+      case '<':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, tok, CLEX_lesseq, p,p+1);)
+            STB_C_LEX_C_SHIFTS(     if (p[1] == '<') {
+                                       STB_C_LEX_C_ARITHEQ(if (p+2 != lexer->eof && p[2] == '=')
+                                                              return stb__clex_token(lexer, tok, CLEX_shleq, p,p+2);)
+                                       return stb__clex_token(lexer, tok, CLEX_shl, p,p+1);
+                                    }
+                              )
+         }
+         goto single_char;
+      case '>':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, tok, CLEX_greatereq, p,p+1);)
+            STB_C_LEX_C_SHIFTS(     if (p[1] == '>') {
+                                       STB_C_LEX_C_ARITHEQ(if (p+2 != lexer->eof && p[2] == '=')
+                                                              return stb__clex_token(lexer, tok, CLEX_shreq, p,p+2);)
+                                       return stb__clex_token(lexer, tok, CLEX_shr, p,p+1);
+                                    }
+                              )
+         }
+         goto single_char;
+
+      case '"':
+         STB_C_LEX_C_DQ_STRINGS(return stb__clex_parse_string(lexer, tok, p, CLEX_dqstring);)
+         goto single_char;
+      case '\'':
+         STB_C_LEX_C_SQ_STRINGS(return stb__clex_parse_string(lexer, tok, p, CLEX_sqstring);)
+         STB_C_LEX_C_CHARS(
+         {
+            char *start = p;
+            tok->int_number = stb__clex_parse_char(p+1, &p);
+            if (tok->int_number < 0)
+               return stb__clex_token(lexer, tok, CLEX_parse_error, start,start);
+            if (p == lexer->eof || *p != '\'')
+               return stb__clex_token(lexer, tok, CLEX_parse_error, start,p);
+            return stb__clex_token(lexer, tok, CLEX_char, start, p+1);
+         })
+         goto single_char;
+
+      case '0':
+         #ifdef STB__clex_hex_ints
+            if (p+1 != lexer->eof) {
+               if (p[1] == 'x' || p[1] == 'X') {
+                  char *q = p+2;
+                  #ifdef STB__CLEX_use_stdlib
+                  tok->int_number = strtol((char *) p, (char **) q, 16);
+                  #else
+                  stb__clex_int n=0;
+                  while (q != lexer->eof) {
+                     if (*q >= '0' && *q <= '9')
+                        n = n*16 + (*q - '0');
+                     else if (*q >= 'a' && *q <= 'f')
+                        n = n*16 + (*q - 'a') + 10;
+                     else if (*q >= 'A' && *q <= 'F')
+                        n = n*16 + (*q - 'A') + 10;
+                     else
+                        break;
+                     ++q;
+                  }
+                  tok->int_field = n; // int_field is macro that expands to real_number/int_number depending on type of n
+                  #endif
+                  if (q == p+2)
+                     return stb__clex_token(lexer, tok, CLEX_parse_error, p-2,p-1);
+                  return stb__clex_parse_suffixes(lexer, tok, CLEX_int, p,q, STB_C_LEX_HEX_SUFFIXES);
+               }
+            }
+         #endif // STB__clex_hex_ints
+         // can't test for octal because we might parse '0.0' as float or as '0' '.' '0',
+         // so have to do float first
+
+         /* FALL THROUGH */
+      case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
+
+         #ifdef STB__clex_decimal_floats
+         {
+            char *q = p;
+            while (q != lexer->eof && (*q >= '0' && *q <= '9'))
+               ++q;
+            if (q != lexer->eof) {
+               if (*q == '.' STB_C_LEX_FLOAT_NO_DECIMAL(|| *q == 'e' || *q == 'E')) {
+                  #ifdef STB__CLEX_use_stdlib
+                  tok->real_number = strtod((char *) p, (char**) &q);
+                  #else
+                  tok->real_number = stb__clex_parse_float(p, &q);
+                  #endif
+
+                  return stb__clex_parse_suffixes(lexer, tok, CLEX_float, p,q, STB_C_LEX_FLOAT_SUFFIXES);
+
+               }
+            }
+         }
+         #endif // STB__clex_decimal_floats
+
+         #ifdef STB__clex_octal_ints
+         if (p[0] == 0) {
+            char *q = p;
+            #ifdef STB__CLEX_use_stdlib
+            tok->int_number = strtol((char *) p, (char **) &q, 8);
+            #else
+            stb__clex_int n=0;
+            while (q != lexer->eof) {
+               if (*q >= '0' && *q <= '7')
+                  n = n*8 + (q - '0');
+               else
+                  break;
+               ++q;
+            }
+            if (q != lexer->eof && (*q == '8' || *q=='9'))
+               return stb__clex_token(tok, CLEX_parse_error, p, q);
+            tok->int_field = n;
+            #endif
+            return stb__clex_parse_suffixes(lexer, tok, CLEX_int, p,q, STB_C_LEX_OCTAL_SUFFIXES);
+         }
+         #endif // STB__clex_octal_ints
+
+         #ifdef STB__clex_decimal_ints
+         {
+            char *q = p;
+            #ifdef STB__CLEX_use_stdlib
+            tok->int_number = strtol((char *) p, (char **) &q, 10);
+            #else
+            stb__clex_int n=0;
+            while (q != lexer->eof) {
+               if (*q >= '0' && *q <= '9')
+                  n = n*10 + (q - '0');
+               else
+                  break;
+               ++q;
+            }
+            tok->int_field = n;
+            #endif
+            return stb__clex_parse_suffixes(lexer, tok, CLEX_int, p,q, STB_C_LEX_OCTAL_SUFFIXES);
+         }
+         #endif // STB__clex_decimal_ints
+         goto single_char;
+   }
+}
+#endif // STB_C_LEXER_IMPLEMENTATION
+
+#ifdef STB_C_LEXER_SELF_TEST
+
+#include <stdio.h>
+
+static void print_token(stb_lex_token *tok)
+{
+   switch (tok->token) {
+      case CLEX_id        : printf("_%s", tok->string); break;
+      case CLEX_eq        : printf("=="); break;
+      case CLEX_noteq     : printf("!="); break;
+      case CLEX_lesseq    : printf("<="); break;
+      case CLEX_greatereq : printf(">="); break;
+      case CLEX_andand    : printf("&&"); break;
+      case CLEX_oror      : printf("||"); break;
+      case CLEX_shl       : printf("<<"); break;
+      case CLEX_shr       : printf(">>"); break;
+      case CLEX_plusplus  : printf("++"); break;
+      case CLEX_minusminus: printf("--"); break;
+      case CLEX_arrow     : printf("->"); break;
+      case CLEX_andeq     : printf("&="); break;
+      case CLEX_oreq      : printf("|="); break;
+      case CLEX_xoreq     : printf("^="); break;
+      case CLEX_pluseq    : printf("+="); break;
+      case CLEX_minuseq   : printf("-="); break;
+      case CLEX_muleq     : printf("*="); break;
+      case CLEX_diveq     : printf("/="); break;
+      case CLEX_modeq     : printf("%%="); break;
+      case CLEX_shleq     : printf("<<="); break;
+      case CLEX_shreq     : printf(">>="); break;
+      case CLEX_eqarrow   : printf("=>"); break;
+      case CLEX_dqstring  : printf("\"%s\"", tok->string); break;
+      case CLEX_sqstring  : printf("'\"%s\"'", tok->string); break;
+      case CLEX_char      : printf("'%s'", tok->string); break;
+      #if defined(STB__clex_int_as_double) && !defined(STB__CLEX_use_stdlib)
+      case CLEX_int       : printf("#%g", tok->real_number); break;
+      #else
+      case CLEX_int       : printf("#%ld", tok->int_number); break;
+      #endif
+      case CLEX_float     : printf("%g", tok->real_number); break;
+      default:
+         if (tok->token >= 0 && tok->token < 256)
+            printf("%c", (int)tok->token);
+         else {
+            printf("<<<UNKNOWN TOKEN %ld >>>\n", tok->token);
+         }
+         break;
+   }
+}
+
+/* Force a test
+of parsing
+multiline comments */
+
+/*/ comment /*/
+/**/ extern /**/
+
+int main(int argc, char **argv)
+{
+   FILE *f = fopen("stb_c_lexer.h", "rb");
+   char *text = (char*)malloc(1 << 20);
+   int len = f ? fread(text, 1, 1<<20, f) : -1;
+   stb_lexer lex;
+   stb_lex_token token;
+   if (len < 0) {
+      fprintf(stderr, "Error opening file\n");
+      return 1;
+   }
+   fclose(f);
+
+   stb_c_lexer_init(&lex, text, text+len, (char*)malloc(1<<16), 1<<16);
+   while (stb_c_lexer_get_token(&lex, &token)) {
+      if (token.token == CLEX_parse_error) {
+         printf("\n<<<PARSE ERROR>>>\n");
+         break;
+      }
+      print_token(&token);
+      printf("  ");
+   }
+   return 0;
+}
+#endif
--- a/nob.c
+++ b/nob.c
@@ -0,0 +1,34 @@
+
+#define NOB_STRIP_PREFIX
+#define NOB_IMPLEMENTATION
+#define NOB_WARN_DEPRECATED
+#define NOB_EXPERIMENTAL_DELETE_OLD
+#include "nob.h"
+
+#define BUILD_DIR "./build"
+#define SRC "./src"
+#define EXTERNAL_DIR "./external"
+
+Cmd cmd = {0};
+
+int main(int argc, char *argv[]) {
+  NOB_GO_REBUILD_URSELF(argc, argv);
+
+  if (!nob_mkdir_if_not_exists(BUILD_DIR))
+    return 1;
+
+  nob_cc(&cmd);
+  nob_cc_flags(&cmd);
+  nob_cc_output(&cmd, BUILD_DIR "/ada");
+  cmd_append(&cmd, "-I" EXTERNAL_DIR);
+  cmd_append(&cmd, "-DSTB_C_LEXER_IMPLEMENTATION");
+  nob_cc_inputs(&cmd, EXTERNAL_DIR "/stb_c_lexer.h", SRC "/main.c");
+  if (!cmd_run(&cmd))
+    return 1;
+
+  cmd_append(&cmd, BUILD_DIR "/ada");
+  if (!cmd_run(&cmd))
+    return 1;
+
+  return 0;
+}
--- a/src/main.c
+++ b/src/main.c
@@ -0,0 +1,68 @@
+#include "stb_c_lexer.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+typedef struct {
+  stb_lexer lexer;
+  const char *filename;
+} Context;
+
+Context ctx = {0};
+
+long read_whole_file(const char *filename, char **buffer) {
+  FILE *fp = fopen(filename, "rb");
+  if (!fp) {
+    fprintf(stderr, "ERROR: Failed to open file '%s'\n", filename);
+    return -1;
+  }
+  fseek(fp, 0, SEEK_END);
+  long len = ftell(fp);
+  fseek(fp, 0, SEEK_SET);
+
+  *buffer = malloc(len + 1);
+  if (!*buffer) {
+    fclose(fp);
+    return -1;
+  }
+  fread(*buffer, 1, len, fp);
+  (*buffer)[len] = '\0';
+  fclose(fp);
+
+  return len;
+}
+
+int main(void) {
+  char string_store[1024];
+
+  ctx.filename = "test.ada";
+
+  char *buffer;
+  long len = read_whole_file(ctx.filename, &buffer);
+
+  stb_c_lexer_init(&ctx.lexer, buffer, buffer + len, string_store,
+                   sizeof(string_store));
+
+  stb_lex_token tok = {0};
+  while (stb_c_lexer_get_token(&ctx.lexer, &tok)) {
+    switch (tok.token) {
+    case CLEX_id:
+      printf("id = %s\n", tok.string);
+      break;
+    case CLEX_int:
+      printf("int = %zu\n", tok.int_number);
+      break;
+    case CLEX_float:
+      printf("float = %f\n", tok.real_number);
+      break;
+    default: {
+      if (tok.token < 256) {
+        printf("char = '%c'\n", (char)tok.token);
+      } else {
+        printf("token = %zu\n", tok.token);
+      }
+    }
+    }
+  }
+
+  return 0;
+}
--- a/test.ada
+++ b/test.ada
@@ -0,0 +1,9 @@
+extrn putchar;
+
+fn main() {
+  let a = 34;
+  let b = 35;
+  let PI = 3.1415;
+  putchar(a + b);
+}
+