lib/minjs/lex.rb in minjs-0.2.2 vs lib/minjs/lex.rb in minjs-0.3.0
- old
+ new
@@ -1,94 +1,118 @@
+# coding: utf-8
require 'minjs/ctype'
module Minjs
class Lex
include Ctype
attr_reader :pos
- attr_reader :error_pos
attr_reader :codes
def initialize(str = "", options = {})
str = str.gsub(/\r\n/, "\n")
@codes = str.codepoints
+ if !str.match(/\n\z/)
+ @codes.push(10)
+ end
@pos = 0
@lit_cache = []
@lit_nextpos = []
@logger = options[:logger]
+
+ @eval_nest = 0
end
- def next_input_element(options = {})
- if @lit_cache[@pos]
- ret = @lit_cache[@pos]
+ def clear_cache
+ @lit_cache = []
+ @lit_nextpos = []
+ end
+
+ #
+ # Fetch next literal
+ #
+ # hint:
+ # :regexp
+ # :div
+ # nil
+ #
+ # ECMA262 says:
+ #
+ # There are no syntactic grammar contexts where both a leading division
+ # or division-assignment, and a leading RegularExpressionLiteral are permitted.
+ # This is not affected by semicolon insertion (see 7.9); in examples such as the following:
+ # To determine `/' is regular expression or not
+ #
+ def next_input_element(hint)
+ if ret = @lit_cache[@pos]
@pos = @lit_nextpos[@pos]
- @error_pos = @pos
+ @head_pos = @pos
return ret
end
pos0 = @pos
- if ret = (white_space || line_terminator || comment || token)
- if ret
- @lit_cache[pos0] = ret
- @lit_nextpos[pos0] = @pos
- end
- @error_pos = @pos
- return ret
- end
#
- # ECMA262 say:
+ # skip white space here, because ECMA262(5.1.2) says:
#
- # There are no syntactic grammar contexts where both a leading division
- # or division-assignment, and a leading RegularExpressionLiteral are permitted.
- # This is not affected by semicolon insertion (see 7.9); in examples such as the following:
- # To determine `/' is regular expression or not
+ # Simple white space and single-line comments are discarded and
+ # do not appear in the stream of input elements for the
+ # syntactic grammar.
#
- #
- if options[:hint] == :div
+ while white_space or single_line_comment
+ end
+
+ ret = line_terminator || multi_line_comment || token
+ if ret
+ @lit_cache[pos0] = ret
+ @lit_nextpos[pos0] = @pos
+ @head_pos = @pos
+ return ret
+ end
+
+ if @codes[@pos].nil?
+ return nil
+ end
+ if hint.nil?
+ ECMA262::LIT_DIV_OR_REGEXP_LITERAL
+ elsif hint == :div
ret = div_punctuator
if ret
@lit_cache[pos0] = ret
@lit_nextpos[pos0] = @pos
end
- @error_pos = @pos
+ @head_pos = @pos
return ret
- elsif options[:hint] == :regexp
+ elsif hint == :regexp
ret = regexp_literal
if ret
@lit_cache[pos0] = ret
@lit_nextpos[pos0] = @pos
end
- @error_pos = @pos
+ @head_pos = @pos
return ret
else
ECMA262::LIT_DIV_OR_REGEXP_LITERAL
end
end
# 7.2
def white_space
- code = @codes[@pos]
- if white_space?(code)
- while true
+ if white_space?(@codes[@pos])
+ begin
@pos += 1
- code = @codes[@pos]
- break unless white_space?(code)
- end
+ end until !white_space?(@codes[@pos])
return ECMA262::WhiteSpace.get
else
nil
end
end
#7.3
def line_terminator
- code = @codes[@pos]
- if line_terminator?(code)
- while true
+ if line_terminator?(@codes[@pos])
+ begin
@pos += 1
- code = @codes[@pos]
- break unless line_terminator?(code)
- end
+ end until !line_terminator?(@codes[@pos])
return ECMA262::LineFeed.get
else
nil
end
end
@@ -97,250 +121,260 @@
def comment
multi_line_comment || single_line_comment
end
def multi_line_comment
+ # /*
if @codes[@pos] == 0x2f and @codes[@pos + 1] == 0x2a
- @pos = @pos + 2
+ @pos += 2
pos0 = @pos
- lf = false
- while (@codes[@pos] != 0x2a or @codes[@pos + 1] != 0x2f)
- if @codes[@pos].nil?
- raise ParseError.new("no `*/' at end of comment", self)
- end
- if line_terminator?(@codes[@pos])
- lf = true
- end
- @pos = @pos + 1
+ # */
+ while (code = @codes[@pos] != 0x2a) or @codes[@pos + 1] != 0x2f
+ raise ParseError.new("no `*/' at end of comment", self) if code.nil?
+ @pos += 1
end
- @pos = @pos + 2
- return ECMA262::MultiLineComment.new(@codes[pos0...(@pos-2)].pack("U*"), lf)
+ @pos +=2
+ return ECMA262::MultiLineComment.new(@codes[pos0...(@pos-2)].pack("U*"))
else
nil
end
end
def single_line_comment
+ # //
if @codes[@pos] == 0x2f and @codes[@pos + 1] == 0x2f
- @pos = @pos + 2
+ @pos += 2
pos0 = @pos
- while !line_terminator?(@codes[@pos]) and @codes[@pos]
+ while (code = @codes[@pos]) and !line_terminator?(code)
@pos += 1
end
- if @codes[@pos].nil?
- return ECMA262::SingleLineComment.new(@codes[pos0...@pos].pack("U*") + "\n")
- else
- return ECMA262::SingleLineComment.new(@codes[pos0...@pos].pack("U*"))
- end
+ return ECMA262::SingleLineComment.new(@codes[pos0...@pos].pack("U*"))
else
nil
end
end
#
# 7.5 tokens
#
def token
- pos0 = @pos
- ret = (identifier_name || numeric_literal || punctuator || string_literal)
- if ret
- @lit_cache[pos0] = ret
- @lit_nextpos[pos0] = @pos
- end
- ret
+ identifier_name || numeric_literal || punctuator || string_literal
end
+ #
def unicode_escape?
- if @codes[@pos] == 0x5c and
- @codes[@pos+1] == 0x75 and
- hex_number?(@codes[@pos+2]) and
- hex_number?(@codes[@pos+3]) and
- hex_number?(@codes[@pos+4]) and
- hex_number?(@codes[@pos+5])
- [@codes[@pos+2],@codes[@pos+3],@codes[@pos+4],@codes[@pos+5]].pack("U*").to_i(16)
+ # @codes[@pos] == 0x5c
+ if @codes[@pos+1] == 0x75 #u
+ if hex_digit?(@codes[@pos+2]) and
+ hex_digit?(@codes[@pos+3]) and
+ hex_digit?(@codes[@pos+4]) and
+ hex_digit?(@codes[@pos+5])
+ @codes[(@pos+2)..(@pos+5)].pack("U*").to_i(16)
+ else
+ raise ParseError.new("bad unicode escpae sequence", self)
+ end
else
- false
+ nil
end
end
def identifier_name
- pos0 = @pos
- return nil if @codes[@pos].nil?
+ return nil if (code = @codes[@pos]).nil?
+ pos0 = @pos
chars = []
- if u=unicode_escape? and identifier_start?(u)
- chars.push(u)
+ if code == 0x5c and ucode = unicode_escape? and identifier_start?(ucode)
+ chars.push(ucode)
@pos += 6
- elsif identifier_start?(@codes[@pos])
- chars.push(@codes[@pos])
+ elsif identifier_start?(code)
+ chars.push(code)
@pos += 1
else
return nil
end
while true
code = @codes[@pos]
- if u=unicode_escape? and (identifier_start?(u) || identifier_part?(u))
- chars.push(u)
+ if code == 0x5c and ucode = unicode_escape? and identifier_part?(ucode)
+ chars.push(ucode)
@pos += 6
- elsif identifier_part?(@codes[@pos])
- chars.push(@codes[@pos])
+ elsif identifier_part?(code)
+ chars.push(code)
@pos += 1
else
name = chars.pack("U*").to_sym
- return ECMA262::IdentifierName.new(nil, name)
+ return ECMA262::IdentifierName.get(nil, name)
end
end
end
def punctuator
code0 = @codes[@pos]
code1 = @codes[@pos+1]
code2 = @codes[@pos+2]
code3 = @codes[@pos+3]
- if false
- elsif (code0 == 0x3e and code1 == 0x3e and code2 == 0x3e and code3 == 0x3d)
- @pos += 4
- return ECMA262::Punctuator.get('>>>=')
- elsif (code0 == 0x3d and code1 == 0x3d and code2 == 0x3d)
- @pos += 3
- return ECMA262::Punctuator.get('===')
- elsif (code0 == 0x21 and code1 == 0x3d and code2 == 0x3d)
- @pos += 3
- return ECMA262::Punctuator.get('!==')
- elsif (code0 == 0x3e and code1 == 0x3e and code2 == 0x3e)
- @pos += 3
- return ECMA262::Punctuator.get('>>>')
- elsif (code0 == 0x3c and code1 == 0x3c and code2 == 0x3d)
- @pos += 3
- return ECMA262::Punctuator.get('<<=')
- elsif (code0 == 0x3e and code1 == 0x3e and code2 == 0x3d)
- @pos += 3
- return ECMA262::Punctuator.get('>>=')
- elsif (code0 == 0x3e and code1 == 0x3e)
- @pos += 2
- return ECMA262::Punctuator.get('>>')
- elsif (code0 == 0x3c and code1 == 0x3d)
- @pos += 2
- return ECMA262::Punctuator.get('<=')
- elsif (code0 == 0x3e and code1 == 0x3d)
- @pos += 2
- return ECMA262::Punctuator.get('>=')
- elsif (code0 == 0x3d and code1 == 0x3d)
- @pos += 2
- return ECMA262::Punctuator.get('==')
- elsif (code0 == 0x21 and code1 == 0x3d)
- @pos += 2
- return ECMA262::Punctuator.get('!=')
- elsif (code0 == 0x2b and code1 == 0x2b)
- @pos += 2
- return ECMA262::Punctuator.get('++')
- elsif (code0 == 0x2d and code1 == 0x2d)
- @pos += 2
- return ECMA262::Punctuator.get('--')
- elsif (code0 == 0x3c and code1 == 0x3c)
- @pos += 2
- return ECMA262::Punctuator.get('<<')
- elsif (code0 == 0x3e and code1 == 0x3e)
- @pos += 2
- return ECMA262::Punctuator.get('>>')
- elsif (code0 == 0x26 and code1 == 0x26)
- @pos += 2
- return ECMA262::Punctuator.get('&&')
- elsif (code0 == 0x7c and code1 == 0x7c)
- @pos += 2
- return ECMA262::Punctuator.get('||')
- elsif (code0 == 0x2b and code1 == 0x3d)
- @pos += 2
- return ECMA262::Punctuator.get('+=')
- elsif (code0 == 0x2d and code1 == 0x3d)
- @pos += 2
- return ECMA262::Punctuator.get('-=')
- elsif (code0 == 0x2a and code1 == 0x3d)
- @pos += 2
- return ECMA262::Punctuator.get('*=')
- elsif (code0 == 0x25 and code1 == 0x3d)
- @pos += 2
- return ECMA262::Punctuator.get('%=')
- elsif (code0 == 0x26 and code1 == 0x3d)
- @pos += 2
- return ECMA262::Punctuator.get('&=')
- elsif (code0 == 0x7c and code1 == 0x3d)
- @pos += 2
- return ECMA262::Punctuator.get('|=')
- elsif (code0 == 0x5e and code1 == 0x3d)
- @pos += 2
- return ECMA262::Punctuator.get('^=')
- elsif (code0 == 0x7b)
- @pos += 1
- return ECMA262::Punctuator.get('{')
- elsif (code0 == 0x7d)
- @pos += 1
- return ECMA262::Punctuator.get('}')
- elsif (code0 == 0x28)
- @pos += 1
- return ECMA262::Punctuator.get('(')
- elsif (code0 == 0x29)
- @pos += 1
- return ECMA262::Punctuator.get(')')
- elsif (code0 == 0x5b)
- @pos += 1
- return ECMA262::Punctuator.get('[')
- elsif (code0 == 0x5d)
- @pos += 1
- return ECMA262::Punctuator.get(']')
- elsif (code0 == 0x2e)
- @pos += 1
- return ECMA262::Punctuator.get('.')
- elsif (code0 == 0x3b)
- @pos += 1
- return ECMA262::Punctuator.get(';')
- elsif (code0 == 0x2c)
- @pos += 1
- return ECMA262::Punctuator.get(',')
- elsif (code0 == 0x3c)
- @pos += 1
- return ECMA262::Punctuator.get('<')
- elsif (code0 == 0x3e)
- @pos += 1
- return ECMA262::Punctuator.get('>')
- elsif (code0 == 0x2b)
- @pos += 1
- return ECMA262::Punctuator.get('+')
- elsif (code0 == 0x2d)
- @pos += 1
- return ECMA262::Punctuator.get('-')
- elsif (code0 == 0x2a)
- @pos += 1
- return ECMA262::Punctuator.get('*')
- elsif (code0 == 0x25)
- @pos += 1
- return ECMA262::Punctuator.get('%')
- elsif (code0 == 0x26)
- @pos += 1
- return ECMA262::Punctuator.get('&')
- elsif (code0 == 0x7c)
- @pos += 1
- return ECMA262::Punctuator.get('|')
- elsif (code0 == 0x5e)
- @pos += 1
- return ECMA262::Punctuator.get('^')
- elsif (code0 == 0x21)
- @pos += 1
- return ECMA262::Punctuator.get('!')
- elsif (code0 == 0x7e)
- @pos += 1
- return ECMA262::Punctuator.get('~')
- elsif (code0 == 0x3f)
- @pos += 1
- return ECMA262::Punctuator.get('?')
- elsif (code0 == 0x3a)
- @pos += 1
- return ECMA262::Punctuator.get(':')
- elsif (code0 == 0x3d)
- @pos += 1
- return ECMA262::Punctuator.get('=')
+ if code0 == 0x21 # !
+ if code1 == 0x3d and code2 == 0x3d # !==
+ @pos += 3
+ return ECMA262::PUNC_SNEQ
+ end
+ if code1 == 0x3d # !=
+ @pos += 2
+ return ECMA262::PUNC_NEQ
+ end
+ @pos += 1 # !
+ return ECMA262::PUNC_LNOT
+ elsif code0 == 0x25 # %
+ if code1 == 0x3d # %=
+ @pos += 2
+ return ECMA262::PUNC_MODLET
+ end
+ @pos += 1 # %
+ return ECMA262::PUNC_MOD
+ elsif code0 == 0x26 # &
+ if code1 == 0x3d # &=
+ @pos += 2
+ return ECMA262::PUNC_ANDLET
+ end
+ if code1 == 0x26 # &&
+ @pos += 2
+ return ECMA262::PUNC_LAND
+ end
+ @pos += 1 # &
+ return ECMA262::PUNC_AND
+ elsif code0 == 0x28 # (
+ @pos += 1 # (
+ return ECMA262::PUNC_LPARENTHESIS
+ elsif code0 == 0x29 # )
+ @pos += 1 # )
+ return ECMA262::PUNC_RPARENTHESIS
+ elsif code0 == 0x2a # *
+ if code1 == 0x3d # *=
+ @pos += 2
+ return ECMA262::PUNC_MULLET
+ end
+ @pos += 1 # *
+ return ECMA262::PUNC_MUL
+ elsif code0 == 0x2b # +
+ if code1 == 0x3d # +=
+ @pos += 2
+ return ECMA262::PUNC_ADDLET
+ end
+ if code1 == 0x2b # ++
+ @pos += 2
+ return ECMA262::PUNC_INC
+ end
+ @pos += 1 # +
+ return ECMA262::PUNC_ADD
+ elsif code0 == 0x2c # ,
+ @pos += 1 # ,
+ return ECMA262::PUNC_COMMA
+ elsif code0 == 0x2d # -
+ if code1 == 0x3d # -=
+ @pos += 2
+ return ECMA262::PUNC_SUBLET
+ end
+ if code1 == 0x2d # --
+ @pos += 2
+ return ECMA262::PUNC_DEC
+ end
+ @pos += 1 # -
+ return ECMA262::PUNC_SUB
+ elsif code0 == 0x2e # .
+ @pos += 1 # .
+ return ECMA262::PUNC_PERIOD
+ elsif code0 == 0x3a # :
+ @pos += 1 # :
+ return ECMA262::PUNC_COLON
+ elsif code0 == 0x3b # ;
+ @pos += 1 # ;
+ return ECMA262::PUNC_SEMICOLON
+ elsif code0 == 0x3c # <
+ if code1 == 0x3d # <=
+ @pos += 2
+ return ECMA262::PUNC_LTEQ
+ end
+ if code1 == 0x3c and code2 == 0x3d # <<=
+ @pos += 3
+ return ECMA262::PUNC_LSHIFTLET
+ end
+ if code1 == 0x3c # <<
+ @pos += 2
+ return ECMA262::PUNC_LSHIFT
+ end
+ @pos += 1 # <
+ return ECMA262::PUNC_LT
+ elsif code0 == 0x3d # =
+ if code1 == 0x3d and code2 == 0x3d # ===
+ @pos += 3
+ return ECMA262::PUNC_SEQ
+ end
+ if code1 == 0x3d # ==
+ @pos += 2
+ return ECMA262::PUNC_EQ
+ end
+ @pos += 1 # =
+ return ECMA262::PUNC_LET
+ elsif code0 == 0x3e # >
+ if code1 == 0x3e and code2 == 0x3e and code3 == 0x3d # >>>=
+ @pos += 4
+ return ECMA262::PUNC_URSHIFTLET
+ end
+ if code1 == 0x3e and code2 == 0x3e # >>>
+ @pos += 3
+ return ECMA262::PUNC_URSHIFT
+ end
+ if code1 == 0x3e and code2 == 0x3d # >>=
+ @pos += 3
+ return ECMA262::PUNC_RSHIFTLET
+ end
+ if code1 == 0x3e # >>
+ @pos += 2
+ return ECMA262::PUNC_RSHIFT
+ end
+ if code1 == 0x3d # >=
+ @pos += 2
+ return ECMA262::PUNC_GTEQ
+ end
+ @pos += 1 # >
+ return ECMA262::PUNC_GT
+ elsif code0 == 0x3f # ?
+ @pos += 1 # ?
+ return ECMA262::PUNC_CONDIF
+ elsif code0 == 0x5b # [
+ @pos += 1 # [
+ return ECMA262::PUNC_LSQBRAC
+ elsif code0 == 0x5d # ]
+ @pos += 1 # ]
+ return ECMA262::PUNC_RSQBRAC
+ elsif code0 == 0x5e # ^
+ if code1 == 0x3d # ^=
+ @pos += 2
+ return ECMA262::PUNC_XORLET
+ end
+ @pos += 1 # ^
+ return ECMA262::PUNC_XOR
+ elsif code0 == 0x7b # {
+ @pos += 1 # {
+ return ECMA262::PUNC_LCURLYBRAC
+ elsif code0 == 0x7c # |
+ if code1 == 0x7c # ||
+ @pos += 2
+ return ECMA262::PUNC_LOR
+ end
+ if code1 == 0x3d # |=
+ @pos += 2
+ return ECMA262::PUNC_ORLET
+ end
+ @pos += 1 # |
+ return ECMA262::PUNC_OR
+ elsif code0 == 0x7d # }
+ @pos += 1 # }
+ return ECMA262::PUNC_RCURLYBRAC
+ elsif code0 == 0x7e # ~
+ @pos += 1 # ~
+ return ECMA262::PUNC_NOT
end
nil
end
def div_punctuator
@@ -432,72 +466,128 @@
end
return @codes[pos0...@pos].pack("U*")
end
#7.8.3
+ #B.1.1
def numeric_literal
- code = @codes[@pos]
- return nil if code.nil?
-
- hex_integer_literal || decimal_literal
+ hex_integer_literal || octal_integer_literal || decimal_literal
end
+ #7.8.3
+ #
+ # HexIntegerLiteral ::
+ # 0x HexDigit
+ # 0X HexDigit
+ # HexIntegerLiteral HexDigit
+ #
def hex_integer_literal
- pos0 = @pos
- # 0x.... or 0X....
code = @codes[@pos]
- if code == 0x30 and (@codes[@pos+1] == 0x78 || @codes[@pos+1] == 0x58) #hex integer
+ if code.nil?
+ return nil
+ #0x / 0X
+ elsif code == 0x30 and (@codes[@pos+1] == 0x78 || @codes[@pos+1] == 0x58)
@pos += 2
- while true
- code = @codes[@pos]
- if (code >= 0x30 and code <= 0x39) || (code >= 0x41 and code <= 0x4f) || (code >= 0x61 and code <= 0x6f)
- ;
- elsif identifier_start?(code)
- raise ParseError.new("The source character immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit", self)
- else
- return ECMA262::ECMA262Numeric.new(@codes[(pos0+2)...@pos].pack("U*").to_i(16))
- end
+ pos0 = @pos
+ while code = @codes[@pos] and hex_digit?(code)
+ @pos += 1;
+ end
+ if identifier_start?(code)
+ raise ParseError.new("The source character immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit", self)
+ else
+ return ECMA262::ECMA262Numeric.new(@codes[pos0...@pos].pack("U*").to_i(16))
+ end
+ else
+ nil
+ end
+ end
+
+ #B.1.1
+ # OctalIntegerLiteral ::
+ # 0 OctalDigit
+ # OctalIntegerLiteral OctalDigit
+ #
+ def octal_integer_literal
+ code = @codes[@pos]
+ if code.nil?
+ return nil
+ elsif code == 0x30 and (code1 = @codes[@pos + 1]) >= 0x30 and code1 <= 0x37
+ @pos += 1
+ pos0 = @pos
+ while code = @codes[@pos] and code >= 0x30 and code <= 0x37
@pos += 1
end
+ if identifier_start?(code)
+ raise ParseError.new("The source character immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit", self)
+ else
+ return ECMA262::ECMA262Numeric.new(@codes[pos0...@pos].pack("U*").to_i(8))
+ end
else
nil
end
end
+ # 7.8.3
+ #
+ # DecimalLiteral ::
+ # DecimalIntegerLiteral . DecimalDigitsopt ExponentPartopt
+ # . DecimalDigits ExponentPartopt
+ # DecimalIntegerLiteral ExponentPartopt
+ #
def decimal_literal
pos0 = @pos
code = @codes[@pos]
- if code == 0x2e #.
+
+ if code.nil?
+ return nil
+ elsif code == 0x2e #.
@pos += 1
f = decimal_digits
- if f.nil?
- @pos = pos0
- return nil
+ if f.nil? #=> this period is punctuator
+ @pos = pos0 + 1
+ return ECMA262::PUNC_PERIOD
end
- if @codes[@pos] == 0x65 || @codes[@pos] == 0x45
+ if (code = @codes[@pos]) == 0x65 || code == 0x45
@pos += 1
- e = exp_part
+ e = exponent_part
end
if identifier_start?(@codes[@pos])
raise ParseError.new("The source character immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit", self)
end
return ECMA262::ECMA262Numeric.new('0', f, e)
- end
+ elsif code == 0x30 # zero
+ i = "0"
+ @pos += 1
+ if @codes[@pos] == 0x2e #.
+ @pos += 1
+ f = decimal_digits
+ if (code = @codes[@pos]) == 0x65 || code == 0x45 #e or E
+ @pos += 1
+ e = exponent_part
+ end
+ elsif (code = @codes[@pos]) == 0x65 || code == 0x45 #e or E
+ @pos += 1
+ e = exponent_part
+ end
+ if identifier_start?(@codes[@pos])
+ raise ParseError.new("The source character immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit", self)
+ end
- if code >= 0x30 and code <= 0x39
+ return ECMA262::ECMA262Numeric.new(i, f, e)
+ elsif code >= 0x31 and code <= 0x39
i = decimal_digits
if @codes[@pos] == 0x2e #.
@pos += 1
f = decimal_digits
- if @codes[@pos] == 0x65 || @codes[@pos] == 0x45 #e or E
+ if (code = @codes[@pos]) == 0x65 || code == 0x45 #e or E
@pos += 1
- e = exp_part
+ e = exponent_part
end
- elsif @codes[@pos] == 0x65 || @codes[@pos] == 0x45 #e or E
+ elsif (code = @codes[@pos]) == 0x65 || code == 0x45 #e or E
@pos += 1
- e = exp_part
+ e = exponent_part
end
if identifier_start?(@codes[@pos])
raise ParseError.new("The source character immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit", self)
end
@@ -505,274 +595,366 @@
end
nil
end
- def exp_part
- if @codes[@pos] == 0x2b
+ # 7.8.3
+ #
+ # ExponentPart ::
+ # ExponentIndicator SignedInteger
+ #
+ def exponent_part
+ if (code = @codes[@pos]) == 0x2b
@pos += 1
- elsif @codes[@pos] == 0x2d
+ elsif code == 0x2d
@pos += 1
neg = true
end
+ d = decimal_digits
+ raise ParseError.new("unexpecting token", self) if d.nil?
if neg
- e = "-#{decimal_digits}"
+ e = "-#{d}"
else
- e = decimal_digits
+ e = d
end
e
end
+ #7.8.3
+ #
+ # DecimalDigit :: one of
+ # 0 1 2 3 4 5 6 7 8 9
+ #
def decimal_digits
pos0 = @pos
- code = @codes[@pos]
- return nil if code.nil?
- if code >= 0x30 and code <= 0x39
+ if (code = @codes[@pos]) >= 0x30 and code <= 0x39
@pos += 1
- while true
- code = @codes[@pos]
- if code >= 0x30 and code <= 0x39
- @pos += 1
- else
- return @codes[pos0...@pos].pack("U*")
- end
+ while code = @codes[@pos] and code >= 0x30 and code <= 0x39
+ @pos += 1
end
+ return @codes[pos0...@pos].pack("U*")
else
nil
end
end
#7.8.4
+ #
+ # StringLiteral ::
+ # " DoubleStringCharactersopt "
+ # ' SingleStringCharactersopt '
+ #
+ # DoubleStringCharacters ::
+ # DoubleStringCharacter DoubleStringCharactersopt
+ #
+ # SingleStringCharacters ::
+ # SingleStringCharacter SingleStringCharactersopt
+ #
+ # DoubleStringCharacter ::
+ # SourceCharacter but not one of " or \ or LineTerminator
+ # \ EscapeSequence
+ # LineContinuation
+ #
+ # SingleStringCharacter ::
+ # SourceCharacter but not one of ' or \ or LineTerminator
+ # \ EscapeSequence
+ # LineContinuation
+ #
def string_literal
- code = @codes[@pos]
- return nil if code.nil?
- pos0 = @pos
- if code == 0x27 #'
+ if (code = @codes[@pos]) == 0x27 #'
term = 0x27
elsif code == 0x22 #"
term = 0x22
else
return nil
end
+ @pos += 1
+ pos0 = @pos
- str = ''
- while @codes[@pos]
- @pos += 1
- code = @codes[@pos]
+ str = []
+ while (code = @codes[@pos])
if code.nil?
raise ParseError.new("no `#{term}' at end of string", self)
elsif line_terminator?(code)
raise ParseError.new("string has line terminator in body", self)
elsif code == 0x5c #\
@pos += 1
- str << esc_string
+ str.push(escape_sequence)
elsif code == term
- @pos += 1
- return ECMA262::ECMA262String.new(str)
+ @pos += 1
+ return ECMA262::ECMA262String.new(str.compact.pack("U*"))
else
- str << code
+ @pos += 1
+ str.push(code)
end
end
nil
end
- # Annex B
- def octal?(char)
- char >= 0x30 and char <= 0x39
- end
+ # 7.8.4
+ # B.1.2
+ #
+ # EscapeSequence ::
+ # CharacterEscapeSequence
+ # 0 [lookahead ∉ DecimalDigit]
+ # HexEscapeSequence
+ # UnicodeEscapeSequence
+ # OctalEscapeSequence
- def esc_string
- case @codes[@pos]
- # when 0x30
- # "\u{0}"
- when 0x27
- "\'"
- when 0x22
- "\""
- when 0x5c
- "\\"
+ def escape_sequence
+ case (code = @codes[@pos])
+# when 0x30
+# @pos += 1
+# 0
+ when 0x27 #'
+ @pos += 1
+ 0x27
+ when 0x22 #"
+ @pos += 1
+ 0x22
+ when 0x5c #\
+ @pos += 1
+ 0x5c
when 0x62 #b
- "\u{0008}"
+ @pos += 1
+ 0x08
when 0x74 #t
- "\u{0009}"
+ @pos += 1
+ 0x09
when 0x6e #n
- "\u{000a}"
+ @pos += 1
+ 0x0a
when 0x76 #v
- "\u{000b}"
+ @pos += 1
+ 0x0b
when 0x66 #f
- "\u{000c}"
+ @pos += 1
+ 0x0c
when 0x72 #r
- "\u{000d}"
+ @pos += 1
+ 0x0d
when 0x78 #x
- t = [[@codes[@pos+1], @codes[@pos+2]].pack("U*").to_i(16)].pack("U*")
- @pos += 2
+ #check
+ t = @codes[(@pos+1)..(@pos+2)].pack("U*").to_i(16)
+ @pos += 3
t
when 0x75 #u
- t = [[@codes[@pos+1], @codes[@pos+2], @codes[@pos+3], @codes[@pos+4]].pack("U*").to_i(16)].pack("U*")
- @pos += 4
+ #check
+ t = @codes[(@pos+1)..(@pos+4)].pack("U*").to_i(16)
+ @pos += 5
t
else
# line continuation
- if line_terminator?(@codes[@pos])
- ""
- # octal
- # Annex B
- elsif octal?(@codes[@pos])
- oct = (@codes[@pos] - 0x30)
- 2.times do
- break unless octal?(@codes[@pos+1])
- @pos += 1
- oct *= 8
- oct += (@codes[@pos] - 0x30)
+ if line_terminator?(code)
+ @pos += 1
+ nil
+ # Annex B.1.2
+ #
+ # OctalEscapeSequence ::
+ # OctalDigit [lookahead ∉ DecimalDigit]
+ # ZeroToThree OctalDigit [lookahead ∉ DecimalDigit]
+ # FourToSeven OctalDigit
+ # ZeroToThree OctalDigit OctalDigit
+ #
+ # Note:
+ #
+ # A string such as the following is invalid
+ # as a octal escape sequence.
+ #
+ # \19 or \319
+ #
+ # However, it is not to an error in most implementations.
+ # Therefore, minjs also intepret it such way.
+ #
+ elsif octal_digit?(code)
+ code1 = @codes[@pos+1]
+ code2 = @codes[@pos+2]
+ if code >= 0x30 and code <= 0x33
+ if octal_digit?(code1)
+ if octal_digit?(code2)
+ @pos += 3
+ (code - 0x30) * 64 + (code1 - 0x30) * 8 + (code2 - 0x30)
+ else
+ @pos += 2
+ (code - 0x30) * 8 + (code1 - 0x30)
+ end
+ else
+ @pos += 1
+ code - 0x30
+ end
+ else #if code >= 0x34 and code <= 0x37
+ if octal_digit?(code1)
+ @pos += 2
+ (code - 0x30) * 8 + (code1 - 0x30)
+ else
+ @pos += 1
+ code - 0x30
+ end
end
- [oct].pack("U*")
else
- [@codes[@pos]].pack("U*")
+ @pos += 1
+ code
end
end
end
- def eof?(pos = nil)
- if pos.nil?
- pos = @pos
- end
- @codes[pos].nil?
+ def eof?
+ peek_lit(nil).nil?
end
#
- # check next literal is 'l' or not
+ # check next literal is strictly equal to 'l' or not.
+ # white spaces and line terminators are skipped and ignored.
+ #
# if next literal is not 'l', position is not forwarded
# if next literal is 'l', position is forwarded
#
- def match_lit(l, options = {})
- eval_lit {
- t = fwd_lit(options)
- t == l ? t : nil
- }
+ def eql_lit?(l, hint = nil)
+ lit = peek_lit(hint)
+ if lit.eql? l
+ fwd_after_peek
+ lit
+ else
+ nil
+ end
end
- def next_lit(options = {})
- lit = nil
- pos0 = @pos
- return nil if eof?
- while lit = next_input_element(options)
- if lit and (lit.ws? or lit.lt?)
- ;
- else
- break
- end
+ #
+ # check next literal is strictly equal to 'l' or not.
+ # white spaces are skipped and ignored.
+ # line terminators are not ignored.
+ #
+ # if next literal is not 'l', position is not forwarded
+ # if next literal is 'l', position is forwarded
+ #
+ def eql_lit_nolt?(l, hint = nil)
+ lit = peek_lit_nolt(hint)
+ if lit.eql? l
+ fwd_after_peek
+ lit
+ else
+ nil
end
- @pos = pos0
- lit
end
- def fwd_lit(options = {})
- lit = nil
- return nil if eof?
- if options[:nolt]
- while lit = next_input_element(options)
- if lit and lit.ws?
- ;
- else
- break
- end
- end
+ #
+ # check next literal is equal to 'l' or not.
+ # white spaces and line terminators are skipped and ignored.
+ #
+ # if next literal is not 'l', position is not forwarded
+ # if next literal is 'l', position is forwarded
+ #
+ def match_lit?(l, hint = nil)
+ lit = peek_lit(hint)
+ if lit == l
+ fwd_after_peek
+ lit
else
- while lit = next_input_element(options)
- if lit and (lit.ws? or lit.lt?)
- ;
- else
- break
- end
- end
+ nil
end
- lit
end
- def ws_lit(options = {})
- ret = next_input_element(options)
- if ret and (ret.ws? or ret.lt?)
- ret
+ #
+ # check next literal is equal to 'l' or not.
+ # white spaces are skipped and ignored.
+ # line terminators are not ignored.
+ #
+ # if next literal is not 'l', position is not forwarded
+ # if next literal is 'l', position is forwarded
+ #
+ def match_lit_nolt?(l, hint = nil)
+ lit = peek_lit_nolt(hint)
+ if lit == l
+ fwd_after_peek
+ lit
else
nil
end
end
- def rewind_pos
- if @pos > 0
- @pos -= 1
+ #
+ # fetch next literal.
+ # position is not forwarded.
+ # white spaces and line terminators are skipped and ignored.
+ #
+ def peek_lit(hint)
+ pos0 = @pos
+ while lit = next_input_element(hint) and (lit.ws? or lit.lt?)
end
+ @pos = pos0
+ lit
end
- def debug_code(from, to = nil)
- if to.nil?
- to = (@error_pos || @pos)
+ #
+ # fetch next literal.
+ # position is not forwarded.
+ # white spaces are skipped and ignored.
+ # line terminators are not ignored.
+ #
+ def peek_lit_nolt(hint)
+ pos0 = @pos
+ while lit = next_input_element(hint) and lit.ws?
end
- @codes[from,to].pack("U*")
+ @pos = pos0
+ lit
end
- def debug_str(pos = nil, line = nil, col = nil)
- if pos.nil?
- pos = @error_pos
- if pos.nil?
- pos = @pos
- end
+ def fwd_after_peek
+ @pos = @head_pos
+ end
+
+ #
+ # fetch next literal.
+ # position is forwarded.
+ # white spaces and line terminators are skipped and ignored.
+ #
+ def fwd_lit(hint)
+ while lit = next_input_element(hint) and (lit.ws? or lit.lt?)
end
- if pos > 20
- pos -= 20
- pos0 = 20
- elsif pos >= 0
- pos0 = pos
- pos = 0
- end
- if col and col >= 1
- pos0 = col - 1;
- end
- t = ''
- t << @codes[pos..(pos+80)].pack("U*")
- t << "\n"
- t << (' ' * pos0) + "^"
- t
+ lit
end
- def debug_lit(pos = nil)
- if pos.nil?
- pos = @error_pos
- if pos.nil?
- pos = @pos
- end
+ #
+ # fetch next literal.
+ # position is forwarded.
+ # white spaces are skipped and ignored.
+ # line terminators are not ignored.
+ #
+ def fwd_lit_nolt(hint)
+ while lit = next_input_element(hint) and lit.ws?
end
- if pos > 20
- pos -= 20
- pos0 = 20
- elsif pos >= 0
- pos0 = pos
- pos = 0
- end
- #STDERR.puts pos0
- STDERR.puts @codes[pos..(pos+80)].collect{|u| u == 10 ? 0x20 : u}.pack("U*")
- STDERR.puts (' ' * pos0) + "^"
+ lit
end
+
#
# break <val> => position is rewind, then break with <val>
# return <val> => position is rewind, then return <val>
# next <val> => position is not rewind, then break with <val>
#
def eval_lit(&block)
begin
saved_pos = @pos
+ @eval_nest += 1
ret = yield
ensure
+ @eval_nest -= 1
if ret.nil?
- #@error_pos = @pos
@pos = saved_pos
nil
+ else
+ if @eval_nest == 0
+ #STDERR.puts "clear_cache [#{saved_pos}..#{@pos}]"
+ clear_cache
+ end
end
end
end
- def line_col(pos)
+ #
+ # position to [row, col]
+ #
+ def row_col(pos)
_pos = 0
row = 0
col = 1
@codes.each do |code|
break if _pos >= pos
@@ -783,8 +965,48 @@
col += 1
end
_pos += 1
end
return [row+1, col+1]
+ end
+
+ #
+ # position to line
+ #
+ def line(pos)
+ pos0 = pos1 = pos
+ while true
+ pos0 -= 1
+ break if line_terminator?(@codes[pos0])
+ end
+ pos0 += 1
+
+ while true
+ break if line_terminator?(@codes[pos1])
+ pos1 += 1
+ end
+
+ @codes[pos0..pos1].pack("U*")
+ end
+
+ def debug_str(pos = nil, row = nil, col = nil)
+ if pos.nil?
+ pos = @head_pos or @pos
+ end
+
+ t = ''
+ if col >= 80
+ t << @codes[(pos-80)..(pos+80)].pack("U*")
+ col = 81
+ else
+ t << line(pos)
+ end
+
+ if col and col >= 1
+ col = col - 1;
+ end
+ t << "\n"
+ t << (' ' * col) + "^"
+ t
end
end
end