lib/shex/terminals.rb in shex-0.4.0 vs lib/shex/terminals.rb in shex-0.5.0
- old
+ new
@@ -1,5 +1,6 @@
+# -*- encoding: utf-8 -*-
require 'ebnf/ll1/lexer'
module ShEx
module Terminals
# Definitions of token regular expressions used for lexical analysis
@@ -13,12 +14,14 @@
[\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]|[\\u{10000}-\\u{EFFFF}]
EOS
U_CHARS2 = Regexp.compile("\\u00B7|[\\u0300-\\u036F]|[\\u203F-\\u2040]").freeze
IRI_RANGE = Regexp.compile("[[^<>\"{}|^`\\\\]&&[^\\x00-\\x20]]").freeze
- # 26t
- UCHAR = EBNF::LL1::Lexer::UCHAR
+ # 87
+ UCHAR4 = /\\u([0-9A-Fa-f]{4,4})/.freeze
+ UCHAR8 = /\\U([0-9A-Fa-f]{8,8})/.freeze
+ UCHAR = Regexp.union(UCHAR4, UCHAR8).freeze
# 171s
PERCENT = /%\h\h/.freeze
# 173s
PN_LOCAL_ESC = /\\[_~\.\-\!$\&'\(\)\*\+,;=\/\?\#@%]/.freeze
# 170s
@@ -40,63 +43,75 @@
# 160s
ECHAR = /\\[tbnrf\\"']/
WS = /(?:\s|(?:#[^\n\r]*))+/m.freeze
- # 60
+ # 69
RDF_TYPE = /a/.freeze
# 18t
IRIREF = /<(?:#{IRI_RANGE}|#{UCHAR})*>/.freeze
- # 140s
+ # 73
PNAME_NS = /#{PN_PREFIX}?:/.freeze
- # 141s
+ # 74
PNAME_LN = /#{PNAME_NS}#{PN_LOCAL}/.freeze
- # 61
+ # 75
ATPNAME_NS = /@#{WS}*#{PN_PREFIX}?:/m.freeze
- # 62
+ # 76
ATPNAME_LN = /@#{WS}*#{PNAME_NS}#{PN_LOCAL}/m.freeze
- # 142s
+ # 77
BLANK_NODE_LABEL = /_:(?:\d|#{PN_CHARS_U})(?:(?:#{PN_CHARS}|\.)*#{PN_CHARS})?/.freeze
- # 145s
+ # 78
LANGTAG = /@[a-zA-Z]+(?:-[a-zA-Z0-9]+)*/.freeze
- # 19t
+ # 79
INTEGER = /[+-]?\d+/.freeze
- # 20t
+ # 80
DECIMAL = /[+-]?(?:\d*\.\d+)/.freeze
- # 21t
+ # 81
DOUBLE = /[+-]?(?:\d+\.\d*#{EXPONENT}|\.?\d+#{EXPONENT})/.freeze
- # 156s
+ # 83
STRING_LITERAL1 = /'(?:[^\'\\\n\r]|#{ECHAR}|#{UCHAR})*'/.freeze
- # 157s
+ # 84
STRING_LITERAL2 = /"(?:[^\"\\\n\r]|#{ECHAR}|#{UCHAR})*"/.freeze
- # 158s
+ # 85
STRING_LITERAL_LONG1 = /'''(?:(?:'|'')?(?:[^'\\]|#{ECHAR}|#{UCHAR}))*'''/m.freeze
- # 159s
+ # 86
STRING_LITERAL_LONG2 = /"""(?:(?:"|"")?(?:[^"\\]|#{ECHAR}|#{UCHAR}))*"""/m.freeze
- # 29
+ # 83l
+ LANG_STRING_LITERAL1 = /'(?:[^\'\\\n\r]|#{ECHAR}|#{UCHAR})*'#{LANGTAG}/.freeze
+ # 84l
+ LANG_STRING_LITERAL2 = /"(?:[^\"\\\n\r]|#{ECHAR}|#{UCHAR})*"#{LANGTAG}/.freeze
+ # 85l
+ LANG_STRING_LITERAL_LONG1 = /'''(?:(?:'|'')?(?:[^'\\]|#{ECHAR}|#{UCHAR}))*'''#{LANGTAG}/m.freeze
+ # 86l
+ LANG_STRING_LITERAL_LONG2 = /"""(?:(?:"|"")?(?:[^"\\]|#{ECHAR}|#{UCHAR}))*"""#{LANGTAG}/m.freeze
+
+ # XX
+ REGEXP = %r(/(?:[^/\\\n\r]|\\[nrt\\|.?*+(){}$-\[\]^/]|#{UCHAR})+/[smix]*).freeze
+
+ # 68
CODE = /\{(?:[^%\\]|\\[%\\]|#{UCHAR})*%#{WS}*\}/m.freeze
- # 30
+ # 70
REPEAT_RANGE = /\{\s*#{INTEGER}(?:,#{WS}*(?:#{INTEGER}|\*)?)?#{WS}*\}/.freeze
# String terminals, mixed case sensitivity
STR_EXPR = %r(true|false
|\^\^|\/\/
- |[\(\)\{\}\[\],\.;\=\-\~!\|\&\@\$\?\+\*\%\^\/a]|
+ |[\(\)\{\}\[\],\.;\=\-\~!\|\&\@\$\?\+\*\%\^a]|
(?i:OR|AND|NOT
|BASE|PREFIX
- |IRI|BNODE|NONLITERAL|PATTERN
+ |IRI|BNODE|NONLITERAL
|MINLENGTH|MAXLENGTH|LENGTH
|MAXINCLUSIVE|MAXEXCLUSIVE
|MININCLUSIVE|MINEXCLUSIVE
|TOTALDIGITS|FRACTIONDIGITS
|START
|EXTERNAL|CLOSED|EXTRA|LITERAL
)
)x.freeze
# Map terminals to canonical form
- STR_MAP = %w{OR AND NOT BASE PREFIX IRI BNODE NONLITERAL PATTERN
+ STR_MAP = %w{OR AND NOT BASE PREFIX IRI BNODE NONLITERAL
MINLENGTH MAXLENGTH LENGTH MININCLUSIVE MAXINCLUSIVE MINEXCLUSIVE MAXEXCLUSIVE
TOTALDIGITS FRACTIONDIGITS START EXTERNAL CLOSED EXTRA LITERAL}.
inject({}) do |memo, t|
memo.merge(t.downcase => t)
end