/*
 * schemetok.l - a lex rule for Scheme.
 * 
 * Copyright (C) 2005 Kenichi Ishibashi <bashi at dream.ie.ariake-nct.ac.jp>
 *     All rights reserved.
 *     This is free software with ABSOLUTELY NO WARRANTY.
 * 
 * You can redistribute it and/or modify it under the terms of 
 * the GNU General Public License version 2.
 */

%option reentrant
%option prefix="langscan_scheme_lex_"
%option noyywrap
%option nodefault

space           [ \t]+
newline         \r\n|\r|\n
escseq          \\({newline}|.)
sp_initial      !|\$|%|&|\*|\/|:|\<|=|\>|\?|\^|_|~
initial         [0-9A-Za-z]|{sp_initial}
sp_subseq       \+|\-|\.|@
subseq          {initial}|[0-9]|{sp_subseq}
peculiar_id     \+|\-|\.\.\.
ident           {initial}{subseq}*|{peculiar_id}
boolean         #t|#f
radix           (#[bB]|#[oO]|#[dD])
exactness       (#i|#e)
suffix          ([eEsSfFdDlL][-+]?[0-9]+)
uinteger10      [0-9]+#*
uinteger16      [0-9a-fA-F]+#*
decimal         (\.?{uinteger10}|[0-9]+\.[0-9]*#*|[0-9]+#+\.#*){suffix}?
ureal10         {uinteger10}|{uinteger10}\/{uinteger10}|{decimal}
ureal16         {uinteger16}|{uinteger16}\/{uinteger16}
complex10       ([-+]?{ureal10}[-+]{ureal10}i|[-+]?{ureal10}[-+]i|[-+]?{ureal10}@{ureal10}|[-+]?{ureal10}i?)
complex16       ([-+]?{ureal16}[-+]{ureal16}i|[-+]?{ureal16}[-+]i|[-+]?{ureal16}@{ureal16}|[-+]?{ureal16}i?)
prefix          {radix}{exactness}|{exactness}{radix}
character       #\\.|#\\newline|#\\space
str_elem        [^\"]|\\\"|\\\\
reg_elem        [^/]|\\\/

%{

#include "scheme.h"

#define YY_EXTRA_TYPE langscan_scheme_lex_extra_t *

#if YY_NULL != 0
#error "YY_NULL is not 0."
#endif

#define YY_DECL langscan_scheme_token_t langscan_scheme_lex_lex(yyscan_t yyscanner)

#define YY_INPUT(buf,result,max_size) \
  if (!yyextra->eof) { \
    result = yyextra->user_read(&(yyextra->user_data), (buf), (max_size)); \
    if (result == 0) \
      yyextra->eof = 1; \
  }

#define UPD update_pos(yyextra, yytext, yyleng)
static void update_pos(langscan_scheme_lex_extra_t *, char *, int);

#define report(token) \
  do { \
    yyextra->text = yytext; \
    yyextra->leng = yyleng; \
    return langscan_scheme_##token; \
  } while (0)


%}

%%

{space}                              { UPD; report(space); }
{newline}                            { UPD; report(space); }
;.*                                  { UPD; report(comment); }
[,@`\']([,@`\']|{space})*            { UPD; report(quote_chars); }
\"{str_elem}*\"                      { UPD; report(string); }
\#\/{reg_elem}*\/                    { UPD; report(string); }
{exactness}?{complex10}              { UPD; report(number); }
{radix}{exactness}?{complex10}       { UPD; report(number); }
{exactness}?{radix}{complex10}       { UPD; report(number); }
{exactness}?\#x{complex16}           { UPD; report(number); }
\#x{exactness}?{complex16}           { UPD; report(number); }
(\#x|\#d|\#b|\#o)?[-+]i              { UPD; report(number); }
{ident}                              { UPD; report(ident); }
{boolean}                            { UPD; report(ident); }
{character}                          { UPD; report(punct); }
\#\(                                 { UPD; report(punct); }
.                                    { UPD; report(punct); }

%%

static void update_pos(
  langscan_scheme_lex_extra_t *extra,
  char *text,
  int leng)
{
  int i, j;
  extra->beg_byteno = extra->end_byteno;
  extra->beg_lineno = extra->end_lineno;
  extra->beg_columnno = extra->end_columnno;
  j = 0;
  for (i = 0; i < leng; i++) {
    if (text[i] == '\n') {
      extra->end_lineno++;
      j = i + 1;
      extra->end_columnno = 0;
    }
  }
  extra->end_columnno += leng - j;
  extra->end_byteno += leng;
}

langscan_scheme_tokenizer_t *langscan_scheme_make_tokenizer(
  size_t (*user_read)(void **user_data_p, char *buf, size_t maxlen),
  void *user_data)
{
  langscan_scheme_tokenizer_t *tokenizer;
  langscan_scheme_lex_extra_t *extra;
  tokenizer = (langscan_scheme_tokenizer_t *)malloc(sizeof(langscan_scheme_tokenizer_t));
  if (tokenizer == NULL)
    return NULL;
  extra = (langscan_scheme_lex_extra_t *)malloc(sizeof(langscan_scheme_lex_extra_t));
  if (extra == NULL)
    return NULL;
  extra->user_read = user_read;
  extra->user_data = user_data;
  extra->beg_lineno = 1;
  extra->beg_columnno = 0;
  extra->beg_byteno = 0;
  extra->end_lineno = 1;
  extra->end_columnno = 0;
  extra->end_byteno = 0;
  extra->eof = 0;
  tokenizer->extra = extra;
  langscan_scheme_lex_lex_init(&tokenizer->scanner);
  langscan_scheme_lex_set_extra(extra, tokenizer->scanner);
  return tokenizer;
}

langscan_scheme_token_t langscan_scheme_get_token(langscan_scheme_tokenizer_t *tokenizer) 
{
  return langscan_scheme_lex_lex(tokenizer->scanner);
}

void langscan_scheme_free_tokenizer(langscan_scheme_tokenizer_t *tokenizer) 
{
  langscan_scheme_lex_extra_t *extra = langscan_scheme_lex_get_extra(tokenizer->scanner);
  free((void *)extra);
  langscan_scheme_lex_lex_destroy(tokenizer->scanner);
  free((void *)tokenizer);
}

user_read_t langscan_scheme_tokenizer_get_user_read(langscan_scheme_tokenizer_t *tokenizer)
{
  return tokenizer->extra->user_read;
}

void *langscan_scheme_tokenizer_get_user_data(langscan_scheme_tokenizer_t *tokenizer)
{
  return tokenizer->extra->user_data;
}

const char *langscan_scheme_token_name(langscan_scheme_token_t token)
{
  static char *token_names[] = {
    "*eof*",
#define LANGSCAN_SCHEME_TOKEN(name) #name,
    LANGSCAN_SCHEME_TOKEN_LIST
#undef LANGSCAN_SCHEME_TOKEN
  };

  return token_names[token];
}