require 'rdf/ll1/lexer'

module RDF::Turtle
  module Terminals
    # Definitions of token regular expressions used for lexical analysis
  
    if RUBY_VERSION >= '1.9'
      ##
      # Unicode regular expressions for Ruby 1.9+ with the Oniguruma engine.
      U_CHARS1         = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
                           [\\u00C0-\\u00D6]|[\\u00D8-\\u00F6]|[\\u00F8-\\u02FF]|
                           [\\u0370-\\u037D]|[\\u037F-\\u1FFF]|[\\u200C-\\u200D]|
                           [\\u2070-\\u218F]|[\\u2C00-\\u2FEF]|[\\u3001-\\uD7FF]|
                           [\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]|[\\u{10000}-\\u{EFFFF}]
                         EOS
      U_CHARS2         = Regexp.compile("\\u00B7|[\\u0300-\\u036F]|[\\u203F-\\u2040]")
      IRI_RANGE        = Regexp.compile("[[^<>\"{}|^`\\\\]&&[^\\x00-\\x20]]")   # [^<>\"{}|^`\\] - [#x00-#x20]
    else
      ##
      # UTF-8 regular expressions for Ruby 1.8.x.
      U_CHARS1         = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
                           \\xC3[\\x80-\\x96]|                                (?# [\\u00C0-\\u00D6]|)
                           \\xC3[\\x98-\\xB6]|                                (?# [\\u00D8-\\u00F6]|)
                           \\xC3[\\xB8-\\xBF]|[\\xC4-\\xCB][\\x80-\\xBF]|     (?# [\\u00F8-\\u02FF]|)
                           \\xCD[\\xB0-\\xBD]|                                (?# [\\u0370-\\u037D]|)
                           \\xCD\\xBF|[\\xCE-\\xDF][\\x80-\\xBF]|             (?# [\\u037F-\\u1FFF]|)
                           \\xE0[\\xA0-\\xBF][\\x80-\\xBF]|                   (?# ...)
                           \\xE1[\\x80-\\xBF][\\x80-\\xBF]|                   (?# ...)
                           \\xE2\\x80[\\x8C-\\x8D]|                           (?# [\\u200C-\\u200D]|)
                           \\xE2\\x81[\\xB0-\\xBF]|                           (?# [\\u2070-\\u218F]|)
                           \\xE2[\\x82-\\x85][\\x80-\\xBF]|                   (?# ...)
                           \\xE2\\x86[\\x80-\\x8F]|                           (?# ...)
                           \\xE2[\\xB0-\\xBE][\\x80-\\xBF]|                   (?# [\\u2C00-\\u2FEF]|)
                           \\xE2\\xBF[\\x80-\\xAF]|                           (?# ...)
                           \\xE3\\x80[\\x81-\\xBF]|                           (?# [\\u3001-\\uD7FF]|)
                           \\xE3[\\x81-\\xBF][\\x80-\\xBF]|                   (?# ...)
                           [\\xE4-\\xEC][\\x80-\\xBF][\\x80-\\xBF]|           (?# ...)
                           \\xED[\\x80-\\x9F][\\x80-\\xBF]|                   (?# ...)
                           \\xEF[\\xA4-\\xB6][\\x80-\\xBF]|                   (?# [\\uF900-\\uFDCF]|)
                           \\xEF\\xB7[\\x80-\\x8F]|                           (?# ...)
                           \\xEF\\xB7[\\xB0-\\xBF]|                           (?# [\\uFDF0-\\uFFFD]|)
                           \\xEF[\\xB8-\\xBE][\\x80-\\xBF]|                   (?# ...)
                           \\xEF\\xBF[\\x80-\\xBD]|                           (?# ...)
                           \\xF0[\\x90-\\xBF][\\x80-\\xBF][\\x80-\\xBF]|      (?# [\\u{10000}-\\u{EFFFF}])
                           [\\xF1-\\xF2][\\x80-\\xBF][\\x80-\\xBF][\\x80-\\xBF]|
                           \\xF3[\\x80-\\xAF][\\x80-\\xBF][\\x80-\\xBF]       (?# ...)
                         EOS
      U_CHARS2         = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
                           \\xC2\\xB7|                                        (?# \\u00B7|)
                           \\xCC[\\x80-\\xBF]|\\xCD[\\x80-\\xAF]|             (?# [\\u0300-\\u036F]|)
                           \\xE2\\x80\\xBF|\\xE2\\x81\\x80                    (?# [\\u203F-\\u2040])
                         EOS
      IRI_RANGE        = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
                           \\x21|                                             (?# ")
                           [\\x23-\\x3b]|\\x3d|                               (?# < & >)
                           [\\x3f-\\x5b]|\\x5d|\\x5f|                         (?# \ ^ `)
                           [\\x61-\\x7a]|                                     (?# { } |)
                           [\\x7e-\\xff]
                         EOS
    end
    UCHAR                = RDF::LL1::Lexer::UCHAR

    WS                   = / |\t|\r|\n  /                                         # [93s]
    PERCENT              = /%[0-9A-Fa-f]{2}/                                      # [162s]
    PN_LOCAL_ESC         = /\\[_~\.\-\!$\&'\(\)\*\+,;=:\/\?\#@%]/                 # [163s]                                      # [163s]
    PLX                  = /#{PERCENT}|#{PN_LOCAL_ESC}/                           # [160s]
    PN_CHARS_BASE        = /[A-Z]|[a-z]|#{U_CHARS1}|#{UCHAR}/                     # [95s]
    PN_CHARS_U           = /_|#{PN_CHARS_BASE}/                                   # [96s]
    PN_CHARS             = /-|[0-9]|#{PN_CHARS_U}|#{U_CHARS2}/                    # [98s]
    PN_CHARS_BODY        = /(?:(?:\.|#{PN_CHARS})*#{PN_CHARS})?/
    PN_LOCAL_BODY        = /(?:(?:\.|#{PN_CHARS}|#{PLX})*(?:#{PN_CHARS}|#{PLX}))?/
    PN_LOCAL             = /(?:[0-9]|#{PN_CHARS_U}|#{PLX})#{PN_LOCAL_BODY}/       # [100s]

    EXPONENT             = /[eE][+-]?[0-9]+/                                      # [86s]
                                                                                  
    ANON                 = /\[#{WS}*\]/                                           # [94s]
    BLANK_NODE_LABEL     = /_:#{PN_LOCAL}/                                        # [73s]
    DECIMAL              = /(?:[0-9]+\.[0-9]+|\.[0-9]+)/                          # [78s]
    DECIMAL_NEGATIVE     = /\-(?:[0-9]+\.[0-9]+|\.[0-9]+)/                        # [83s]
    DECIMAL_POSITIVE     = /\+(?:[0-9]+\.[0-9]+|\.[0-9]+)/                        # [81s]
    DOUBLE               = /(?:[0-9]+\.[0-9]+|\.[0-9]+|[0-9]+)#{EXPONENT}/        # [79s]
    DOUBLE_NEGATIVE      = /\-(?:[0-9]+\.[0-9]+|\.[0-9]+|[0-9]+)#{EXPONENT}/      # [79s]
    DOUBLE_POSITIVE      = /\+(?:[0-9]+\.[0-9]+|\.[0-9]+|[0-9]+)#{EXPONENT}/      # [79s]
    ECHAR                = /\\[tbnrf\\"']/                                        # [91s]
    INTEGER              = /[0-9]+/                                               # [77s]
    INTEGER_NEGATIVE     = /\-[0-9]+/                                             # [83s]
    INTEGER_POSITIVE     = /\+[0-9]+/                                             # [80s]
    IRI_REF              = /<(?:#{IRI_RANGE}|#{UCHAR})*>/                         # [70s]
    LANGTAG              = /@[a-zA-Z]+(?:-[a-zA-Z0-9]+)*/                         # [76s]
    PN_PREFIX            = /#{PN_CHARS_BASE}#{PN_CHARS_BODY}/                     # [99s]
    PNAME_NS             = /#{PN_PREFIX}?:/                                       # [71s]
    PNAME_LN             = /#{PNAME_NS}#{PN_LOCAL}/                               # [72s]
    STRING_LITERAL1      = /'(?:[^\'\\\n\r]|#{ECHAR}|#{UCHAR})*'/                 # [87s]
    STRING_LITERAL2      = /"(?:[^\"\\\n\r]|#{ECHAR}|#{UCHAR})*"/                 # [88s]
    STRING_LITERAL_LONG1 = /'''(?:(?:'|'')?(?:[^'\\]|#{ECHAR}|#{UCHAR}))*'''/m    # [89s]
    STRING_LITERAL_LONG2 = /"""(?:(?:"|"")?(?:[^"\\]|#{ECHAR}|#{UCHAR}))*"""/m    # [90s]
  end
end