#ifndef RBS__LEXER_H
#define RBS__LEXER_H

enum TokenType {
  NullType,         /* (Nothing) */
  pEOF,             /* EOF */
  ErrorToken,       /* Error */

  pLPAREN,          /* ( */
  pRPAREN,          /* ) */
  pCOLON,           /* : */
  pCOLON2,          /* :: */
  pLBRACKET,        /* [ */
  pRBRACKET,        /* ] */
  pLBRACE,          /* { */
  pRBRACE,          /* } */
  pHAT,             /* ^ */
  pARROW,           /* -> */
  pFATARROW,        /* => */
  pCOMMA,           /* , */
  pBAR,             /* | */
  pAMP,             /* & */
  pSTAR,            /* * */
  pSTAR2,           /* ** */
  pDOT,             /* . */
  pDOT3,            /* ... */
  pBANG,            /* ! */
  pQUESTION,        /* ? */
  pLT,              /* < */
  pEQ,              /* = */

  kALIAS,           /* alias */
  kATTRACCESSOR,    /* attr_accessor */
  kATTRREADER,      /* attr_reader */
  kATTRWRITER,      /* attr_writer */
  kBOOL,            /* bool */
  kBOT,             /* bot */
  kCLASS,           /* class */
  kDEF,             /* def */
  kEND,             /* end */
  kEXTEND,          /* extend */
  kFALSE,           /* false */
  kIN,              /* in */
  kINCLUDE,         /* include */
  kINSTANCE,        /* instance */
  kINTERFACE,       /* interface */
  kMODULE,          /* module */
  kNIL,             /* nil */
  kOUT,             /* out */
  kPREPEND,         /* prepend */
  kPRIVATE,         /* private */
  kPUBLIC,          /* public */
  kSELF,            /* self */
  kSINGLETON,       /* singleton */
  kTOP,             /* top */
  kTRUE,            /* true */
  kTYPE,            /* type */
  kUNCHECKED,       /* unchecked */
  kUNTYPED,         /* untyped */
  kVOID,            /* void */

  tLIDENT,          /* Identifiers starting with lower case */
  tUIDENT,          /* Identifiers starting with upper case */
  tULIDENT,         /* Identifiers starting with `_` followed by upper case */
  tULLIDENT,        /* Identifiers starting with `_` followed by lower case */
  tGIDENT,          /* Identifiers starting with `$` */
  tAIDENT,          /* Identifiers starting with `@` */
  tA2IDENT,         /* Identifiers starting with `@@` */
  tBANGIDENT,       /* Identifiers ending with `!` */
  tEQIDENT,         /* Identifiers ending with `=` */
  tQIDENT,          /* Quoted identifier */
  pAREF_OPR,        /* [] */
  tOPERATOR,        /* Operator identifier */

  tCOMMENT,         /* Comment */
  tLINECOMMENT,     /* Comment of all line */

  tDQSTRING,        /* Double quoted string */
  tSQSTRING,        /* Single quoted string */
  tINTEGER,         /* Integer */
  tSYMBOL,          /* Symbol */
  tDQSYMBOL,        /* Double quoted symbol */
  tSQSYMBOL,        /* Single quoted symbol */
  tANNOTATION,      /* Annotation */
};

/**
 * The `byte_pos` (or `char_pos`) is the primary data.
 * The rest are cache.
 *
 * They can be computed from `byte_pos` (or `char_pos`), but it needs full scan from the beginning of the string (depending on the encoding).
 * */
typedef struct {
  int byte_pos;
  int char_pos;
  int line;
  int column;
} position;

typedef struct {
  position start;
  position end;
} range;

typedef struct {
  enum TokenType type;
  range range;
} token;

/**
 * The lexer state is the curren token.
 *
 * ```
 * ... "a string token"
 *    ^                      start position
 *          ^                current position
 *     ~~~~~~                Token => "a str
 * ```
 * */
typedef struct {
  VALUE string;
  int start_pos;                  /* The character position that defines the start of the input */
  int end_pos;                    /* The character position that defines the end of the input */
  position current;               /* The current position */
  position start;                 /* The start position of the current token */
  bool first_token_of_line;       /* This flag is used for tLINECOMMENT */
  unsigned int last_char;         /* Last peeked character */
} lexstate;

extern token NullToken;
extern position NullPosition;
extern range NULL_RANGE;

char *peek_token(lexstate *state, token tok);
int token_chars(token tok);
int token_bytes(token tok);

#define null_position_p(pos) (pos.byte_pos == -1)
#define null_range_p(range) (range.start.byte_pos == -1)
#define nonnull_pos_or(pos1, pos2) (null_position_p(pos1) ? pos2 : pos1)
#define RANGE_BYTES(range) (range.end.byte_pos - range.start.byte_pos)

const char *token_type_str(enum TokenType type);

/**
 * Read next character.
 * */
unsigned int peek(lexstate *state);

/**
 * Skip one character.
 * */
void skip(lexstate *state);

/**
 * Skip n characters.
 * */
void skipn(lexstate *state, size_t size);

/**
 * Return new token with given type.
 * */
token next_token(lexstate *state, enum TokenType type);

token rbsparser_next_token(lexstate *state);

void print_token(token tok);

#endif