#include "rbs_extension.h"

#define ONE_CHAR_PATTERN(c, t) case c: tok = next_token(state, t); break

/**
 * Returns one character at current.
 *
 * ... A B C ...
 *    ^           current => A
 * */
#define peek(state) rb_enc_mbc_to_codepoint(RSTRING_PTR(state->string) + state->current.byte_pos, RSTRING_END(state->string), rb_enc_get(state->string))

static const char *RBS_TOKENTYPE_NAMES[] = {
  "NullType",
  "pEOF",
  "ErrorToken",

  "pLPAREN",          /* ( */
  "pRPAREN",          /* ) */
  "pCOLON",           /* : */
  "pCOLON2",          /* :: */
  "pLBRACKET",        /* [ */
  "pRBRACKET",        /* ] */
  "pLBRACE",          /* { */
  "pRBRACE",          /* } */
  "pHAT",             /* ^ */
  "pARROW",           /* -> */
  "pFATARROW",        /* => */
  "pCOMMA",           /* , */
  "pBAR",             /* | */
  "pAMP",             /* & */
  "pSTAR",            /* * */
  "pSTAR2",           /* ** */
  "pDOT",             /* . */
  "pDOT3",            /* ... */
  "pBANG",            /* ! */
  "pQUESTION",        /* ? */
  "pLT",              /* < */
  "pEQ",              /* = */

  "kBOOL",            /* bool */
  "kBOT",             /* bot */
  "kCLASS",           /* class */
  "kFALSE",           /* kFALSE */
  "kINSTANCE",        /* instance */
  "kINTERFACE",       /* interface */
  "kNIL",             /* nil */
  "kSELF",            /* self */
  "kSINGLETON",       /* singleton */
  "kTOP",             /* top */
  "kTRUE",            /* true */
  "kVOID",            /* void */
  "kTYPE",            /* type */
  "kUNCHECKED",       /* unchecked */
  "kIN",              /* in */
  "kOUT",             /* out */
  "kEND",             /* end */
  "kDEF",             /* def */
  "kINCLUDE",         /* include */
  "kEXTEND",          /* extend */
  "kPREPEND",         /* prepend */
  "kALIAS",           /* alias */
  "kMODULE",          /* module */
  "kATTRREADER",      /* attr_reader */
  "kATTRWRITER",      /* attr_writer */
  "kATTRACCESSOR",    /* attr_accessor */
  "kPUBLIC",          /* public */
  "kPRIVATE",         /* private */
  "kUNTYPED",         /* untyped */

  "tLIDENT",          /* Identifiers starting with lower case */
  "tUIDENT",          /* Identifiers starting with upper case */
  "tULIDENT",         /* Identifiers starting with `_` */
  "tULLIDENT",
  "tGIDENT",          /* Identifiers starting with `$` */
  "tAIDENT",          /* Identifiers starting with `@` */
  "tA2IDENT",         /* Identifiers starting with `@@` */
  "tBANGIDENT",
  "tEQIDENT",
  "tQIDENT",          /* Quoted identifier */
  "tOPERATOR",        /* Operator identifier */

  "tCOMMENT",
  "tLINECOMMENT",

  "tDQSTRING",        /* Double quoted string */
  "tSQSTRING",        /* Single quoted string */
  "tINTEGER",         /* Integer */
  "tSYMBOL",          /* Symbol */
  "tDQSYMBOL",
  "tSQSYMBOL",
  "tANNOTATION",      /* Annotation */
};

token NullToken = { NullType };
position NullPosition = { -1, -1, -1, -1 };
range NULL_RANGE = { { -1, -1, -1, -1 }, { -1, -1, -1, -1 } };

const char *token_type_str(enum TokenType type) {
  return RBS_TOKENTYPE_NAMES[type];
}

unsigned int peekn(lexstate *state, unsigned int chars[], size_t length) {
  int byteoffset = 0;

  rb_encoding *encoding = rb_enc_get(state->string);
  char *start = RSTRING_PTR(state->string) + state->current.byte_pos;
  char *end = RSTRING_END(state->string);

  for (size_t i = 0; i < length; i++)
  {
    chars[i] = rb_enc_mbc_to_codepoint(start + byteoffset, end, encoding);
    byteoffset += rb_enc_codelen(chars[i], rb_enc_get(state->string));
  }

  return byteoffset;
}

int token_chars(token tok) {
  return tok.range.end.char_pos - tok.range.start.char_pos;
}

int token_bytes(token tok) {
  return RANGE_BYTES(tok.range);
}

/**
 * ... token ...
 *    ^             start
 *          ^       current
 *
 * */
token next_token(lexstate *state, enum TokenType type) {
  token t;

  t.type = type;
  t.range.start = state->start;
  t.range.end = state->current;
  state->start = state->current;
  state->first_token_of_line = false;

  return t;
}

void advance_skip(lexstate *state, unsigned int c, bool skip) {
  int len = rb_enc_codelen(c, rb_enc_get(state->string));

  state->current.char_pos += 1;
  state->current.byte_pos += len;

  if (c == '\n') {
    state->current.line += 1;
    state->current.column = 0;
    state->first_token_of_line = true;
  } else {
    state->current.column += 1;
  }

  if (skip) {
    state->start = state->current;
  }
}

void advance_char(lexstate *state, unsigned int c) {
  advance_skip(state, c, false);
}

void skip_char(lexstate *state, unsigned int c) {
  advance_skip(state, c, true);
}

void skip(lexstate *state) {
  unsigned char c = peek(state);
  skip_char(state, c);
}

void advance(lexstate *state) {
  unsigned char c = peek(state);
  advance_char(state, c);
}

/*
  1. Peek one character from state
  2. If read characetr equals to given `c`, skip the character and return true.
  3. Return false otherwise.
*/
static bool advance_next_character_if(lexstate *state, unsigned int c) {
  if (peek(state) == c) {
    advance_char(state, c);
    return true;
  } else {
    return false;
  }
}

/*
   ... 0 1 ...
        ^           current
          ^         current (return)
*/
static token lex_number(lexstate *state) {
  unsigned int c;

  while (true) {
    c = peek(state);

    if (rb_isdigit(c) || c == '_') {
      advance_char(state, c);
    } else {
      break;
    }
  }

  return next_token(state, tINTEGER);
}

/*
  lex_hyphen ::=  -       (tOPERATOR)
               |  - @     (tOPERATOR)
               |  - >     (pARROW)
               |  - 1 ... (tINTEGER)
*/
static token lex_hyphen(lexstate* state) {
  if (advance_next_character_if(state, '>')) {
    return next_token(state, pARROW);
  } else if (advance_next_character_if(state, '@')) {
    return next_token(state, tOPERATOR);
  } else {
    unsigned int c = peek(state);

    if (rb_isdigit(c)) {
      advance_char(state, c);
      return lex_number(state);
    } else {
      return next_token(state, tOPERATOR);
    }
  }
}

/*
  lex_plus ::= +
             | + @
             | + \d
*/
static token lex_plus(lexstate *state) {
  if (advance_next_character_if(state, '@')) {
    return next_token(state, tOPERATOR);
  } else if (rb_isdigit(peek(state))) {
    return lex_number(state);
  } else {
    return next_token(state, tOPERATOR);
  }
}

/*
  lex_dot ::= .         pDOT
            | . . .     pDOT3
*/
static token lex_dot(lexstate *state) {
  unsigned int cs[2];

  peekn(state, cs, 2);

  if (cs[0] == '.' && cs[1] == '.') {
    advance_char(state, '.');
    advance_char(state, '.');
    return next_token(state, pDOT3);
  } else {
    return next_token(state, pDOT);
  }
}

/*
  lex_eq ::= =
           | ==
           | ===
           | =~
           | =>
*/
static token lex_eq(lexstate *state) {
  unsigned int cs[2];
  peekn(state, cs, 2);

  if (cs[0] == '=' && cs[1] == '=') {
    // ===
    advance_char(state, cs[0]);
    advance_char(state, cs[1]);
    return next_token(state, tOPERATOR);
  } else if (cs[0] == '=') {
    // ==
    advance_char(state, cs[0]);
    return next_token(state, tOPERATOR);
  } else if (cs[0] == '~') {
    // =~
    advance_char(state, cs[0]);
    return next_token(state, tOPERATOR);
  } else if (cs[0] == '>') {
    // =>
    advance_char(state, cs[0]);
    return next_token(state, pFATARROW);
  } else {
    return next_token(state, pEQ);
  }
}

/*
  underscore ::= _A        tULIDENT
               | _a        tULLIDENT
               | _         tULLIDENT
*/
static token lex_underscore(lexstate *state) {
  unsigned int c;

  c = peek(state);

  if ('A' <= c && c <= 'Z') {
    advance_char(state, c);

    while (true) {
      c = peek(state);

      if (rb_isalnum(c) || c == '_') {
        // ok
        advance_char(state, c);
      } else {
        break;
      }
    }

    return next_token(state, tULIDENT);
  } else if (rb_isalnum(c) || c == '_') {
    advance_char(state, c);

    while (true) {
      c = peek(state);

      if (rb_isalnum(c) || c == '_') {
        // ok
        advance_char(state, c);
      } else {
        break;
      }
    }

    if (c == '!') {
      advance_char(state, c);
      return next_token(state, tBANGIDENT);
    } else if (c == '=') {
      advance_char(state, c);
      return next_token(state, tEQIDENT);
    } else {
      return next_token(state, tULLIDENT);
    }
  } else {
    return next_token(state, tULLIDENT);
  }
}

static bool is_opr(unsigned int c) {
  switch (c) {
  case ':':
  case ';':
  case '=':
  case '.':
  case ',':
  case '!':
  case '"':
  case '$':
  case '%':
  case '&':
  case '(':
  case ')':
  case '-':
  case '+':
  case '~':
  case '|':
  case '\\':
  case '\'':
  case '[':
  case ']':
  case '{':
  case '}':
  case '*':
  case '/':
  case '<':
  case '>':
  case '^':
    return true;
  default:
    return false;
  }
}

static token lex_global(lexstate *state) {
  unsigned int c;

  c = peek(state);

  if (rb_isspace(c) || c == 0) {
    return next_token(state, ErrorToken);
  }

  if (rb_isdigit(c)) {
    // `$` [`0`-`9`]+
    advance_char(state, c);

    while (true) {
      c = peek(state);
      if (rb_isdigit(c)) {
        advance_char(state, c);
      } else {
        return next_token(state, tGIDENT);
      }
    }
  }

  if (c == '-') {
    // `$` `-` [a-zA-Z0-9_]
    advance_char(state, c);
    c = peek(state);

    if (rb_isalnum(c) || c == '_') {
      advance_char(state, c);
      return next_token(state, tGIDENT);
    } else {
      return next_token(state, ErrorToken);
    }
  }

  switch (c) {
  case '~':
  case '*':
  case '$':
  case '?':
  case '!':
  case '@':
  case '\\':
  case '/':
  case ';':
  case ',':
  case '.':
  case '=':
  case ':':
  case '<':
  case '>':
  case '"':
  case '&':
  case '\'':
  case '`':
  case '+':
    advance_char(state, c);
    return next_token(state, tGIDENT);

  default:
    if (is_opr(c) || c == 0) {
      return next_token(state, ErrorToken);
    }

    while (true) {
      advance_char(state, c);
      c = peek(state);

      if (rb_isspace(c) || is_opr(c) || c == 0) {
        break;
      }
    }

    return next_token(state, tGIDENT);
  }
}

void pp(VALUE object) {
  VALUE inspect = rb_funcall(object, rb_intern("inspect"), 0);
  printf("pp >> %s\n", RSTRING_PTR(inspect));
}

static token lex_ident(lexstate *state, enum TokenType default_type) {
  unsigned int c;
  token tok;

  while (true) {
    c = peek(state);
    if (rb_isalnum(c) || c == '_') {
      advance_char(state, c);
    } else if (c == '!') {
      advance_char(state, c);
      tok = next_token(state, tBANGIDENT);
      break;
    } else if (c == '=') {
      advance_char(state, c);
      tok = next_token(state, tEQIDENT);
      break;
    } else {
      tok = next_token(state, default_type);
      break;
    }
  }

  if (tok.type == tLIDENT) {
    VALUE string = rb_enc_str_new(
      RSTRING_PTR(state->string) + tok.range.start.byte_pos,
      RANGE_BYTES(tok.range),
      rb_enc_get(state->string)
    );

    VALUE type = rb_hash_aref(RBS_Parser_KEYWORDS, string);
    if (FIXNUM_P(type)) {
      tok.type = FIX2INT(type);
    }
  }

  return tok;
}

static token lex_comment(lexstate *state, enum TokenType type) {
  unsigned int c;

  c = peek(state);
  if (c == ' ') {
    advance_char(state, c);
  }

  while (true) {
    c = peek(state);

    if (c == '\n' || c == '\0') {
      break;
    } else {
      advance_char(state, c);
    }
  }

  token tok = next_token(state, type);

  skip_char(state, c);

  return tok;
}

/*
   ... " ... " ...
      ^               start
        ^             current
              ^       current (after)
*/
static token lex_dqstring(lexstate *state) {
  unsigned int c;

  while (true) {
    c = peek(state);
    advance_char(state, c);

    if (c == '\\') {
      if (peek(state) == '"') {
        advance_char(state, c);
        c = peek(state);
      }
    } else if (c == '"') {
      break;
    }
  }

  return next_token(state, tDQSTRING);
}

/*
   ... @ foo ...
      ^             start
        ^           current
            ^       current (return)

   ... @ @ foo ...
      ^               start
        ^             current
              ^       current (return)
*/
static token lex_ivar(lexstate *state) {
  unsigned int c;

  enum TokenType type = tAIDENT;

  c = peek(state);

  if (c == '@') {
    type = tA2IDENT;
    advance_char(state, c);
    c = peek(state);
  }

  if (rb_isalpha(c) || c == '_') {
    advance_char(state, c);
    c = peek(state);
  } else {
    return next_token(state, ErrorToken);
  }

  while (rb_isalnum(c) || c == '_') {
    advance_char(state, c);
    c = peek(state);
  }

  return next_token(state, type);
}

/*
   ... ' ... ' ...
      ^               start
        ^             current
              ^       current (after)
*/
static token lex_sqstring(lexstate *state) {
  unsigned int c;

  c = peek(state);

  while (true) {
    c = peek(state);
    advance_char(state, c);

    if (c == '\\') {
      if (peek(state) == '\'') {
        advance_char(state, c);
        c = peek(state);
      }
    } else if (c == '\'') {
      break;
    }
  }

  return next_token(state, tSQSTRING);
}

#define EQPOINTS2(c0, c1, s) (c0 == s[0] && c1 == s[1])
#define EQPOINTS3(c0, c1, c2, s) (c0 == s[0] && c1 == s[1] && c2 == s[2])

/*
   ... : @ ...
      ^          start
        ^        current
          ^      current (return)
*/
static token lex_colon_symbol(lexstate *state) {
  unsigned int c[3];
  peekn(state, c, 3);

  switch (c[0]) {
    case '|':
    case '&':
    case '/':
    case '%':
    case '~':
    case '`':
    case '^':
      advance_char(state, c[0]);
      return next_token(state, tSYMBOL);
    case '=':
      if (EQPOINTS2(c[0], c[1], "=~")) {
        // :=~
        advance_char(state, c[0]);
        advance_char(state, c[1]);
        return next_token(state, tSYMBOL);
      } else if (EQPOINTS3(c[0], c[1], c[2], "===")) {
        // :===
        advance_char(state, c[0]);
        advance_char(state, c[1]);
        advance_char(state, c[2]);
        return next_token(state, tSYMBOL);
      } else if (EQPOINTS2(c[0], c[1], "==")) {
        // :==
        advance_char(state, c[0]);
        advance_char(state, c[1]);
        return next_token(state, tSYMBOL);
      }
      break;
    case '<':
      if (EQPOINTS3(c[0], c[1], c[2], "<=>")) {
        advance_char(state, c[0]);
        advance_char(state, c[1]);
        advance_char(state, c[2]);
      } else if (EQPOINTS2(c[0], c[1], "<=") || EQPOINTS2(c[0], c[1], "<<")) {
        advance_char(state, c[0]);
        advance_char(state, c[1]);
      } else {
        advance_char(state, c[0]);
      }
      return next_token(state, tSYMBOL);
    case '>':
      if (EQPOINTS2(c[0], c[1], ">=") || EQPOINTS2(c[0], c[1], ">>")) {
        advance_char(state, c[0]);
        advance_char(state, c[1]);
      } else {
        advance_char(state, c[0]);
      }
      return next_token(state, tSYMBOL);
    case '-':
    case '+':
      if (EQPOINTS2(c[0], c[1], "+@") || EQPOINTS2(c[0], c[1], "-@")) {
        advance_char(state, c[0]);
        advance_char(state, c[1]);
      } else {
        advance_char(state, c[0]);
      }
      return next_token(state, tSYMBOL);
    case '*':
      if (EQPOINTS2(c[0], c[1], "**")) {
        advance_char(state, c[0]);
        advance_char(state, c[1]);
      } else {
        advance_char(state, c[0]);
      }
      return next_token(state, tSYMBOL);
    case '[':
      if (EQPOINTS3(c[0], c[1], c[2], "[]=")) {
        advance_char(state, c[0]);
        advance_char(state, c[1]);
        advance_char(state, c[2]);
      } else if (EQPOINTS2(c[0], c[1], "[]")) {
        advance_char(state, c[0]);
        advance_char(state, c[1]);
      } else {
        break;
      }
      return next_token(state, tSYMBOL);
    case '!':
      if (EQPOINTS2(c[0], c[1], "!=") || EQPOINTS2(c[0], c[1], "!~")) {
        advance_char(state, c[0]);
        advance_char(state, c[1]);
      } else {
        advance_char(state, c[0]);
      }
      return next_token(state, tSYMBOL);
    case '@': {
      advance_char(state, '@');
      token tok = lex_ivar(state);
      if (tok.type != ErrorToken) {
        tok.type = tSYMBOL;
      }
      return tok;
    }
    case '$': {
      advance_char(state, '$');
      token tok = lex_global(state);
      if (tok.type != ErrorToken) {
        tok.type = tSYMBOL;
      }
      return tok;
    }
    case '\'': {
      position start = state->start;
      advance_char(state, '\'');
      token tok = lex_sqstring(state);
      tok.type = tSQSYMBOL;
      tok.range.start = start;
      return tok;
    }
    case '"': {
      position start = state->start;
      advance_char(state, '"');
      token tok = lex_dqstring(state);
      tok.type = tDQSYMBOL;
      tok.range.start = start;
      return tok;
    }
    default:
      if (rb_isalpha(c[0]) || c[0] == '_') {
        position start = state->start;
        token tok = lex_ident(state, NullType);
        tok.range.start = start;

        if (peek(state) == '?') {
          if (tok.type != tBANGIDENT && tok.type != tEQIDENT) {
            skip_char(state, '?');
            tok.range.end = state->current;
          }
        }

        tok.type = tSYMBOL;
        return tok;
      }
  }

  return next_token(state, pCOLON);
}

/*
   ... : : ...
      ^          start
        ^        current
          ^      current (return)

   ... :   ...
      ^          start
        ^        current (lex_colon_symbol)
*/
static token lex_colon(lexstate *state) {
  unsigned int c = peek(state);

  if (c == ':') {
    advance_char(state, c);
    return next_token(state, pCOLON2);
  } else {
    return lex_colon_symbol(state);
  }
}

/*
  lex_lt ::= <       (pLT)
           | < <     (tOPERATOR)
           | < =     (tOPERATOR)
           | < = >   (tOPERATOR)
*/
static token lex_lt(lexstate *state) {
  if (advance_next_character_if(state, '<')) {
    return next_token(state, tOPERATOR);
  } else if (advance_next_character_if(state, '=')) {
    advance_next_character_if(state, '>');
    return next_token(state, tOPERATOR);
  } else {
    return next_token(state, pLT);
  }
}

/*
  lex_gt ::= >
           | > =
           | > >
*/
static token lex_gt(lexstate *state) {
  advance_next_character_if(state, '=') || advance_next_character_if(state, '>');
  return next_token(state, tOPERATOR);
}

/*
    ... `%` `a` `{` ... `}` ...
       ^                         start
           ^                     current
                           ^     current (exit)
                    ---          token
*/
static token lex_percent(lexstate *state) {
  unsigned int cs[2];
  unsigned int end_char;

  peekn(state, cs, 2);

  if (cs[0] != 'a') {
    return next_token(state, tOPERATOR);
  }

  switch (cs[1])
  {
  case '{':
    end_char = '}';
    break;
  case '(':
    end_char = ')';
    break;
  case '[':
    end_char = ']';
    break;
  case '|':
    end_char = '|';
    break;
  case '<':
    end_char = '>';
    break;
  default:
    return next_token(state, tOPERATOR);
  }

  advance_char(state, cs[0]);
  advance_char(state, cs[1]);

  unsigned int c;

  while ((c = peek(state))) {
    if (c == end_char) {
      advance_char(state, c);
      return next_token(state, tANNOTATION);
    }
    advance_char(state, c);
  }

  return next_token(state, ErrorToken);
}

/*
  bracket ::= [       (pLBRACKET)
             * ^
            | [ ]     (tOPERATOR)
             * ^ $
            | [ ] =   (tOPERATOR)
             * ^   $
*/
static token lex_bracket(lexstate *state) {
  if (advance_next_character_if(state, ']')) {
    advance_next_character_if(state, '=');
    return next_token(state, tOPERATOR);
  } else {
    return next_token(state, pLBRACKET);
  }
}

/*
  bracket ::= *
            | * *
*/
static token lex_star(lexstate *state) {
  if (advance_next_character_if(state, '*')) {
    return next_token(state, pSTAR2);
  } else {
    return next_token(state, pSTAR);
  }
}

/*
  bang ::= !
         | ! =
         | ! ~
*/
static token lex_bang(lexstate *state) {
  advance_next_character_if(state, '=') || advance_next_character_if(state, '~');
  return next_token(state, tOPERATOR);
}

/*
  backquote ::= `            (tOPERATOR)
              | `[^ :][^`]`  (tQIDENT)
*/
static token lex_backquote(lexstate *state) {
  unsigned int c = peek(state);

  if (c == ' ' || c == ':') {
    return next_token(state, tOPERATOR);
  } else {
    while (true) {
      if (c == '`') {
        break;
      }

      c = peek(state);
      advance_char(state, c);
    }

    return next_token(state, tQIDENT);
  }
}

token rbsparser_next_token(lexstate *state) {
  token tok = NullToken;

  unsigned int c;
  bool skipping = true;

  while (skipping) {
    c = peek(state);

    switch (c) {
    case ' ':
    case '\t':
    case '\n':
      // nop
      skip_char(state, c);
      break;
    case '\0':
      return next_token(state, pEOF);
    default:
      advance_char(state, c);
      skipping = false;
      break;
    }
  }

  /* ... c d ..                */
  /*      ^     state->current */
  /*    ^       start          */
  switch (c) {
    case '\0': tok = next_token(state, pEOF);
    ONE_CHAR_PATTERN('(', pLPAREN);
    ONE_CHAR_PATTERN(')', pRPAREN);
    ONE_CHAR_PATTERN(']', pRBRACKET);
    ONE_CHAR_PATTERN('{', pLBRACE);
    ONE_CHAR_PATTERN('}', pRBRACE);
    ONE_CHAR_PATTERN(',', pCOMMA);
    ONE_CHAR_PATTERN('|', pBAR);
    ONE_CHAR_PATTERN('^', pHAT);
    ONE_CHAR_PATTERN('&', pAMP);
    ONE_CHAR_PATTERN('?', pQUESTION);
    ONE_CHAR_PATTERN('/', tOPERATOR);
    ONE_CHAR_PATTERN('~', tOPERATOR);
    case '[':
      tok = lex_bracket(state);
      break;
    case '-':
      tok = lex_hyphen(state);
      break;
    case '+':
      tok = lex_plus(state);
      break;
    case '*':
      tok = lex_star(state);
      break;
    case '<':
      tok = lex_lt(state);
      break;
    case '=':
      tok = lex_eq(state);
      break;
    case '>':
      tok = lex_gt(state);
      break;
    case '!':
      tok = lex_bang(state);
      break;
    case '#':
      if (state->first_token_of_line) {
        tok = lex_comment(state, tLINECOMMENT);
      } else {
        tok = lex_comment(state, tCOMMENT);
      }
      break;
    case ':':
      tok = lex_colon(state);
      break;
    case '.':
      tok = lex_dot(state);
      break;
    case '_':
      tok = lex_underscore(state);
      break;
    case '$':
      tok = lex_global(state);
      break;
    case '@':
      tok = lex_ivar(state);
      break;
    case '"':
      tok = lex_dqstring(state);
      break;
    case '\'':
      tok = lex_sqstring(state);
      break;
    case '%':
      tok = lex_percent(state);
      break;
    case '`':
      tok = lex_backquote(state);
      break;
    default:
      if (rb_isalpha(c) && rb_isupper(c)) {
        tok = lex_ident(state, tUIDENT);
      }
      if (rb_isalpha(c) && rb_islower(c)) {
        tok = lex_ident(state, tLIDENT);
      }
      if (rb_isdigit(c)) {
        tok = lex_number(state);
      }
  }

  if (tok.type == NullType) {
    tok = next_token(state, ErrorToken);
  }

  return tok;
}

char *peek_token(lexstate *state, token tok) {
  return RSTRING_PTR(state->string) + tok.range.start.byte_pos;
}