lib/rubylexer.rb in rubylexer-0.7.6 vs lib/rubylexer.rb in rubylexer-0.7.7
- old
+ new
@@ -107,21 +107,26 @@
?: => :symbol_or_op,
?\n => :newline, #implicitly escaped after op
#?\r => :newline, #implicitly escaped after op
?\\ => :escnewline,
- ?\x00 => :eof,
- ?\x04 => :eof,
- ?\x1a => :eof,
"[({" => :open_brace,
"])}" => :close_brace,
?# => :comment,
- NONASCII => :identifier,
+ ?\x00 => :eof,
+ ?\x04 => :eof,
+ ?\x1a => :eof,
+
+ ?\x01..?\x03 => :illegal_char,
+ ?\x05..?\x08 => :illegal_char,
+ ?\x0E..?\x19 => :illegal_char,
+ ?\x1b..?\x1F => :illegal_char,
+ ?\x7F => :illegal_char,
}
attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit
UCLETTER=@@UCLETTER="[A-Z]"
@@ -136,28 +141,32 @@
LETTER_DIGIT=@@LETTER_DIGIT="[A-Za-z_0-9\x80-\xFF]"
eval %w[UCLETTER LCLETTER LETTER LETTER_DIGIT].map{|n| "
def #{n}; #{n}; end
def self.#{n}; @@#{n}; end
"
- }.to_s
+ }.join
NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)((?:(?!#@@LETTER_DIGIT).)|\Z)/om
- NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST
+ if ?A.is_a? String #ruby >= 1.9
+ NEVERSTARTPARAMLISTFIRST=/[aoeitrwu]/
+ else
+ NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST
+ end
NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
=begin
require 'jcode'
utf8=String::PATTERN_UTF8 #or euc, or sjis...
LCLETTER_U="(?>[a-z_]|#{utf8})"
LETTER_U="(?>[A-Za-z_]|#{utf8})"
- IDENTCHAR_U="(?>[A-Za-z_0-9]|#{utf8})"
+ LETTER_DIGIT_U="(?>[A-Za-z_0-9]|#{utf8})"
=end
#-----------------------------------
- def initialize(filename,file,linenum=1,offset_adjust=0,options={:rubyversion=>1.8})
+ def initialize(filename,file,linenum=1,offset_adjust=0,options={})
@offset_adjust=0 #set again in next line
- super(filename,file, linenum,offset_adjust)
+ rulexer_initialize(filename,file, linenum,offset_adjust)
@start_linenum=linenum
@parsestack=[TopLevelContext.new]
@incomplete_here_tokens=[] #not used anymore
@pending_here_bodies=[]
@localvars_stack=[SymbolTable.new]
@@ -166,20 +175,21 @@
@last_operative_token=nil
@last_token_maybe_implicit=nil
@enable_macro=nil
@base_file=nil
@progress_thread=nil
- @rubyversion=options[:rubyversion]
+ @rubyversion=options[:rubyversion]||1.8
@encoding=options[:encoding]||:detect
@method_operators=if @rubyversion>=1.9
- /#{RUBYSYMOPERATORREX}|\A![=~]?\Z/o
+ /#{RUBYSYMOPERATORREX}|\A![=~@]?/o
else
RUBYSYMOPERATORREX
end
- @toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
+ @toptable=CharHandler.new(self, :identifier, CHARMAPPINGS)
+ extend RubyLexer1_9 if @rubyversion>=1.9
read_leading_encoding
start_of_line_directives
progress_printer
end
@@ -201,15 +211,15 @@
}
ENCODINGS=%w[ascii binary utf8 euc sjis]
def read_leading_encoding
return unless @encoding==:detect
@encoding=:ascii
- @encoding=:utf8 if @file.skip( /\xEF\xBB\xBF/ ) #bom
+ @encoding=:utf8 if @file.skip( "\xEF\xBB\xBF" ) #bom
if @file.skip( /\A#!/ )
loop do
til_charset( /[\s\v]/ )
- break if @file.match( / ([^-\s\v]|--[\s\v])/,4 )
+ break if @file.match( /^\n|[\s\v]([^-\s\v]|--?[\s\v])/,4 )
if @file.skip( /.-K(.)/ )
case $1
when 'u'; @encoding=:utf8
when 'e'; @encoding=:euc
when 's'; @encoding=:sjis
@@ -241,12 +251,13 @@
def localvars;
@localvars_stack.last
end
+ attr_accessor :localvars_stack
+
attr_accessor :in_def
- attr :localvars_stack
attr :offset_adjust
attr_writer :pending_here_bodies
attr :rubyversion
#-----------------------------------
@@ -254,11 +265,11 @@
@last_operative_token=@last_token_maybe_implicit=tok
end
#-----------------------------------
def get1token
- result=super #most of the action's here
+ result=rulexer_get1token #most of the action's here
if ENV['PROGRESS']
@last_cp_pos||=0
@start_time||=Time.now
if result.offset-@last_cp_pos>100000
@@ -298,16 +309,16 @@
end
end
#-----------------------------------
def eof?
- super or EoiToken===@last_operative_token
+ rulexer_eof? or EoiToken===@last_operative_token
end
#-----------------------------------
def input_position
- super+@offset_adjust
+ rulexer_input_position+@offset_adjust
end
#-----------------------------------
def input_position_raw
@file.pos
@@ -349,10 +360,11 @@
#-----------------------------------
def inside_method_def?
return true if (defined? @in_def) and @in_def
@parsestack.reverse_each{|ctx|
ctx.starter=='def' and ctx.state!=:saw_def and return true
+ ctx.starter=='class' || ctx.starter=='module' and return false
}
return false
end
#-----------------------------------
@@ -387,11 +399,11 @@
@moretokens.empty? or return result
loop do
unless @moretokens.empty?
case @moretokens.first
when StillIgnoreToken
- when NewlineToken: allow_eol or break
+ when NewlineToken; allow_eol or break
else break
end
else
break unless ch=nextchar
@@ -465,16 +477,13 @@
assert MethNameToken===@last_operative_token || !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
if @parsestack.last.wantarrow and @rubyversion>=1.9 and @file.skip ":"
@moretokens.push SymbolToken.new(str,oldpos), KeywordToken.new("=>",input_position-1)
else
- @moretokens.unshift(*parse_keywords(str,oldpos) do |tok|
- #if not a keyword, decide if it should be var or method
- case str
- when FUNCLIKE_KEYWORDS; except=tok
- when VARLIKE_KEYWORDS,RUBYKEYWORDS; raise "shouldnt see keywords here, now"
- end
+ @moretokens.unshift(*parse_keywords(str,oldpos) do |tok,except|
+ #most callers of this block pass nothing(==nil) for except. only _keyword_funclike passes a true val
+
was_last=@last_operative_token
@last_operative_token=tok if tok
normally=safe_recurse { |a| var_or_meth_name(str,was_last,oldpos,after_nonid_op?{true}) }
(Array===normally ? normally[0]=except : normally=except) if except
normally
@@ -545,10 +554,11 @@
#-----------------------------------
def in_lvar_define_state lasttok=@last_operative_token
#@defining_lvar is a hack
@defining_lvar or case ctx=@parsestack.last
#when ForSMContext; ctx.state==:for
+ when UnparenedParamListLhsContext; /^(->|,|;)$/===lasttok.ident
when RescueSMContext
lasttok.ident=="=>" and @file.match?( /\A[\s\v]*([:;#\n]|then(?!#@@LETTER_DIGIT))/om )
#when BlockParamListLhsContext; true
end
end
@@ -565,10 +575,11 @@
#in general, operators in ruby are disambuated by the before-but-not-after rule.
#an otherwise ambiguous operator is disambiguated by the surrounding whitespace:
#whitespace before but not after the 'operator' indicates it is to be considered a
#value token instead. otherwise it is a binary operator. (unary (prefix) ops count
#as 'values' here.)
+ #this is by far the ugliest method in RubyLexer.
def var_or_meth_name(name,lasttok,pos,was_after_nonid_op)
#look for call site if not a keyword or keyword is function-like
#look for and ignore local variable names
assert String===name
@@ -577,11 +588,11 @@
#maybe_local really means 'maybe local or constant'
maybe_local=case name
when /(?!#@@LETTER_DIGIT).$/o #do nothing
when /^#@@LCLETTER/o
(localvars===name or
- VARLIKE_KEYWORDS===name or
+ #VARLIKE_KEYWORDS===name or
was_in_lvar_define_state
) and not lasttok===/^(\.|::)$/
when /^#@@UCLETTER/o
is_const=true
not lasttok==='.' #this is the right algorithm for constants...
@@ -615,12 +626,13 @@
end
#if next op is assignment (or comma in lvalue list)
#then omit implicit parens
assignment_coming=case nc=nextchar
- when ?=; not /^=[>=~]$/===readahead(2)
+ when ?=; not( /^=[>=~]$/===readahead(2) )
when ?,; comma_in_lvalue_list?
+ when (?; if @rubyversion>=1.9); ParenedParamListLhsContext===@parsestack.last
when ?); last_context_not_implicit.lhs
when ?i; /^in(?!#@@LETTER_DIGIT)/o===readahead(3) and
ForSMContext===last_context_not_implicit
when ?>,?<; /^(.)\1=$/===readahead(3)
when ?*,?&; /^(.)\1?=/===readahead(3)
@@ -643,11 +655,11 @@
if assignment_coming
@parsestack.push AssignmentContext.new(nil) if nc==?% or nc==?/
IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT
else
case nc
- when nil: 2
+ when nil; 2
when ?!; /^![=~]$/===readahead(2) ? 2 : 1
when ?d;
if /^do((?!#@@LETTER_DIGIT)|$)/o===readahead(3)
if maybe_local and expecting_do?
ty=VarNameToken
@@ -759,11 +771,11 @@
when 1,3;
if /^(break|next|return)$/===name and
!(KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident)
#only 1 param in list
result.unshift ImplicitParamListStartToken.new(oldpos)
- @parsestack.push ParamListContextNoParen.new(@linenum)
+ @parsestack.push KWParamListContextNoParen.new(@linenum)
else
arr,pass=*param_list_coming_with_2_or_more_params?
result.push( *arr )
unless pass
#only 1 param in list
@@ -844,18 +856,18 @@
def abort_noparens_for_rescue!(str='')
#assert @moretokens.empty?
result=[]
ctx=@parsestack.last
while klass=CONTEXT2ENDTOK_FOR_RESCUE[ctx.class]
- break if AssignmentRhsContext===ctx && !ctx.multi_assign?
- if ParamListContextNoParen===ctx && AssignmentRhsContext===@parsestack[-2]
- result.push ImplicitParamListEndToken.new(input_position-str.length),
- AssignmentRhsListEndToken.new(input_position-str.length)
- @parsestack.pop
- @parsestack.pop
- break
- end
+# break if AssignmentRhsContext===ctx && !ctx.multi_assign?
+# if ParamListContextNoParen===ctx && AssignmentRhsContext===@parsestack[-2]
+# result.push ImplicitParamListEndToken.new(input_position-str.length),
+# AssignmentRhsListEndToken.new(input_position-str.length)
+# @parsestack.pop
+# @parsestack.pop
+# break
+# end
result << klass.new(input_position-str.length) #unless AssignmentRhsContext===ctx and !ctx.multi_assign?
break if RescueSMContext===ctx #why is this here?
@parsestack.pop
ctx=@parsestack.last
end
@@ -864,18 +876,30 @@
#-----------------------------------
CONTEXT2ENDTOK_FOR_DO={
AssignmentRhsContext=>AssignmentRhsListEndToken,
ParamListContextNoParen=>ImplicitParamListEndToken,
+ UnparenedParamListLhsContext=>KwParamListEndToken,
ExpectDoOrNlContext=>1,
#WhenParamListContext=>KwParamListEndToken,
#RescueSMContext=>KwParamListEndToken
}
def abort_noparens_for_do!(str='')
#assert @moretokens.empty?
result=[]
while klass=CONTEXT2ENDTOK_FOR_DO[@parsestack.last.class]
+ if klass==AssignmentRhsListEndToken
+ i=@parsestack.size
+ end_the_assign=false
+ while AssignmentRhsContext===@parsestack[i-=1]
+ if CONTEXT2ENDTOK_FOR_DO[@parsestack[i-1].class] and
+ @parsestack[i-1].class!=AssignmentRhsContext
+ break end_the_assign=true
+ end
+ end
+ break unless end_the_assign
+ end
break if klass==1
result << klass.new(input_position-str.length)
@parsestack.pop
end
return result
@@ -915,44 +939,67 @@
public :enable_macros!
#-----------------------------------
@@SPACES=/[\ \t\v\f\v]/
- @@WSTOK=/\r?\n|\r*#@@SPACES+(?:#@@SPACES|\r(?!\n))*|\#[^\n]*\n|\\\r?\n|
- ^=begin(?:[\s].*)?\n(?:(?!=end).*\n)*=end[\s\n].*\n/x
- @@WSTOKS=/(?!=begin)#@@WSTOK+/o
- def divide_ws(ws,offset)
+ @@WSTOK=/(?>
+ (?>\r?)\n|
+ (?>\r*)(?>#@@SPACES+)(?>(?:#@@SPACES|\r(?!\n))*)|
+ \#(?>[^\n]*)\n|
+ \\(?>\r?)\n|
+ ^=begin(?>(?>#@@SPACES.*)?)\n
+ (?>(?:(?!=end)(?>.*)\n))*
+ =end(?>(?>#@@SPACES.*)?)\n
+ )/x
+ @@WSTOKS=/(?!=begin)(?>#@@WSTOK+)/o
+ def divide_ws(ws0,offset)
result=[]
- ws.scan(/\G#@@WSTOK/o){|ws|
+ ws0.scan(/\G#@@WSTOK/o){|ws|
incr= $~.begin(0)
- klass=case ws
- when /\A[\#=]/; CommentToken
- when /\n\Z/; EscNlToken
- else WsToken
+ tok=case ws
+ when /\A[\#=]/; IgnoreToken.new(ws,offset+incr)
+ when /\n\Z/; EscNlToken.new(ws,offset+incr,@filename,@linenum)
+ else WsToken.new(ws,offset+incr)
end
- result << klass.new(ws,offset+incr)
+ result << tok
+ @linenum+=ws.count "\n"
}
result.each_with_index{|ws,i|
if WsToken===ws
ws.ident << result.delete_at(i+1).ident while WsToken===result[i+1]
end
}
return result
end
+ #-----------------------------------
+ #lex tokens until a predefined end token is found.
+ #returns a list of tokens seen.
+ def read_arbitrary_expression(&endcondition)
+ result=[]
+ oldsize=@parsestack.size
+ safe_recurse{
+ tok=nil
+ until endcondition[tok,@parsestack[oldsize+1..-1]||[]] and @parsestack.size==oldsize
+ tok=get1token
+ result<<tok
+ EoiToken===tok and break lexerror( tok, "unexpected eof" )
+ end
+ }
+ result
+ end
-
#-----------------------------------
#parse keywords now, to prevent confusion over bare symbols
#and match end with corresponding preceding def or class or whatever.
#if arg is not a keyword, the block is called
def parse_keywords(str,offset,&block)
assert @moretokens.empty?
assert !(KeywordToken===@last_operative_token and /A(\.|::|def)\Z/===@last_operative_token.ident)
result=[KeywordToken.new(str,offset)]
- m="keyword_#{str}"
+ m=:"keyword_#{str}"
respond_to?(m) ? (send m,str,offset,result,&block) : block[MethNameToken.new(str)]
end
public #these have to be public so respond_to? can see them (sigh)
def keyword_end(str,offset,result)
result.unshift(*abort_noparens!(str))
@@ -975,33 +1022,43 @@
end
def keyword_module(str,offset,result)
result.first.has_end!
@parsestack.push WantsEndContext.new(str,@linenum)
- @localvars_stack.push SymbolTable.new
offset=input_position
- @file.scan(/\A(#@@WSTOKS)?(::)?/o)
- md=@file.last_match
- all,ws,dc=*md
- fail if all.empty?
- @moretokens.concat divide_ws(ws,offset) if ws
- @moretokens.push KeywordToken.new('::',offset+md.end(0)-2) if dc
- loop do
- offset=input_position
- @file.scan(/\A(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)(::)?/o)
+ assert @moretokens.empty?
+ tokens=[]
+ if @file.scan(/\A(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)(?=[#{WHSP}]+(?:[^(])|[#;\n]|::)/o)
md=@file.last_match
- all,ws,name,dc=*md
- if ws
- @moretokens.concat divide_ws(ws,offset)
- incr=ws.size
+ all,ws,name=*md
+ tokens.concat divide_ws(ws,md.begin(1)) if ws
+ tokens.push VarNameToken.new(name,md.begin(2))
+ end
+ tokens.push( *read_arbitrary_expression{|tok,extra_contexts|
+ #@file.check /\A(\n|;|::|end(?!#@@LETTER_DIGIT)|(#@@UCLETTER#@@LETTER_DIGIT*)(?!(#@@WSTOKS)?::))/o
+ @file.check( /\A(\n|;|end(?!#@@LETTER_DIGIT))/o ) or
+ @file.check("::") && extra_contexts.all?{|ctx| ImplicitParamListContext===ctx } &&
+ @moretokens.push(*abort_noparens!)
+ } ) if !name #or @file.check /#@@WSTOKS?::/o
+ @moretokens[0,0]=tokens
+ @localvars_stack.push SymbolTable.new
+ while @file.check( /\A::/ )
+ #VarNameToken===@moretokens.last or
+ #KeywordToken===@moretokens.last && @moretokens.last.ident=="::"
+ @file.scan(/\A(#@@WSTOKS)?(::)?(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)/o) or break
+ md=@file.last_match
+ all,ws1,dc,ws2,name=*md
+ if ws1
+ @moretokens.concat divide_ws(ws1,md.begin(1))
+ incr=ws1.size
else
incr=0
end
- @moretokens.push VarNameToken.new(name,offset+incr)
- break unless dc
- @moretokens.push NoWsToken.new(offset+md.end(0)-2)
- @moretokens.push KeywordToken.new('::',offset+md.end(0)-2)
+ @moretokens.push NoWsToken.new(md.begin(2)) if dc
+ @moretokens.push KeywordToken.new('::',md.begin(2)) if dc
+ @moretokens.concat divide_ws(ws2,md.begin(3)) if ws2
+ @moretokens.push VarNameToken.new(name,md.begin(4))
end
@moretokens.push EndHeaderToken.new(input_position)
return result
end
@@ -1069,12 +1126,11 @@
assert WantsEndContext===@parsestack.last
result.last.as=";"
else
result.last.has_end!
if BlockContext===ctx and ctx.wanting_stabby_block_body
- ctx.wanting_stabby_block_body=false
- ctx.starter,ctx.ender="do","end"
+ @parsestack[-1]= WantsEndContext.new(str,@linenum)
else
@parsestack.push WantsEndContext.new(str,@linenum)
localvars.start_block
block_param_list_lookahead
end
@@ -1105,22 +1161,22 @@
end
EoiToken===tok and lexerror tok, "eof in def header"
result << tok
end until parencount==0 #@parsestack.size==old_size
@localvars_stack.push SymbolTable.new
- else #no parentheses, all tail
- set_last_token KeywordToken.new(".") #hack hack
+ else #no parentheses, all tail
+ set_last_token KeywordToken.new(".") #hack hack
tokindex=result.size
result << tok=symbol(false,false)
name=tok.to_s
assert !in_lvar_define_state
#maybe_local really means 'maybe local or constant'
maybe_local=case name
when /(?!#@@LETTER_DIGIT).$/o; #do nothing
when /^[@$]/; true
- when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS; ty=KeywordToken
+ when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS,("__ENCODING__" if @rubyversion>=1.9); ty=KeywordToken
when /^#@@LCLETTER/o; localvars===name
when /^#@@UCLETTER/o; is_const=true #this is the right algorithm for constants...
end
result.push( *ignored_tokens(false,false) )
nc=nextchar
@@ -1162,10 +1218,11 @@
#look for start of parameter list
nc=(@moretokens.empty? ? nextchar.chr : @moretokens.first.to_s[0,1])
if state==:expect_op and /^(?:#@@LETTER|[(&*])/o===nc
ctx.state=:def_param_list
+ ctx.has_parens= '('==nc
list,listend=def_param_list
result.concat list
end_index=result.index(listend)
ofs=listend.offset
if endofs
@@ -1269,14 +1326,23 @@
return result
end
def keyword_rescue(str,offset,result)
unless after_nonid_op? {false}
+ result.replace []
#rescue needs to be treated differently when in operator context...
#i think no RescueSMContext should be pushed on the stack...
- result.first.set_infix! #plus, the rescue token should be marked as infix
- result.unshift(*abort_noparens_for_rescue!(str))
+ tok=OperatorToken.new(str,offset)
+ tok.unary=false #plus, the rescue token should be marked as infix
+ if AssignmentRhsContext===@parsestack.last
+ tok.as="rescue3"
+ @parsestack.pop #end rhs context
+ result.push AssignmentRhsListEndToken.new(offset) #end rhs token
+ else
+ result.concat abort_noparens_for_rescue!(str)
+ end
+ result.push tok
else
result.push KwParamListStartToken.new(offset+str.length)
#corresponding EndToken emitted by abort_noparens! on leaving rescue context
@parsestack.push RescueSMContext.new(@linenum)
# result.unshift(*abort_noparens!(str))
@@ -1347,16 +1413,35 @@
}
end
return result
end
+ def keyword___FILE__(str,offset,result)
+ result.last.value=@filename
+ return result
+ end
+ def keyword___LINE__(str,offset,result)
+ result.last.value=@linenum
+ return result
+ end
+
+ module RubyLexer1_9
+ def keyword___ENCODING__(str,offset,result)
+ #result.last.value=huh
+ return result
+ end
+
+ def keyword_not(*args,&block) _keyword_funclike(*args,&block) end
+ end
+
def _keyword_funclike(str,offset,result)
if @last_operative_token===/^(\.|::)$/
result=yield MethNameToken.new(str) #should pass a methname token here
else
- result=yield KeywordToken.new(str)
+ tok=KeywordToken.new(str)
+ result=yield tok,tok
end
return result
end
for kw in FUNCLIKE_KEYWORDLIST-["END","return","break","next"] do
alias_method "keyword_#{kw}".to_sym, :_keyword_funclike
@@ -1364,14 +1449,16 @@
def _keyword_varlike(str,offset,result)
#do nothing
return result
end
- for kw in VARLIKE_KEYWORDLIST+["defined?", "not"] do
+ for kw in VARLIKE_KEYWORDLIST-["__FILE__","__LINE__"]+["defined?", "not"] do
alias_method "keyword_#{kw}".to_sym, :_keyword_varlike
end
+
+
private
#-----------------------------------
def parsestack_lastnonassign_is?(obj)
@parsestack.reverse_each{|ctx|
@@ -1451,10 +1538,11 @@
end
end
elsif starter==?(
ctx_type=UnparenedParamListLhsContext #hacky... should be a param?
@parsestack.push ctx_type.new(@linenum)
+ a<<KwParamListStartToken.new( input_position )
end
set_last_token KeywordToken.new( ';' )
#a.concat ignored_tokens
@@ -1491,20 +1579,49 @@
#parsestack was changed by get1token above...
normal_comma_level+=1
assert(normal_comma_level==@parsestack.size)
- endingblock=proc{|tok| tok===')' }
+ endingblock=proc{|tok2| tok2===')' }
else
- endingblock=proc{|tok| tok===';' or NewlineToken===tok}
+ endingblock=proc{|tok2| tok2===';' or NewlineToken===tok2}
end
class << endingblock
alias === call
end
+ listend=method_parameters(result,normal_comma_level,endingblock,old_parsestack_size)
+
+ @defining_lvar=false
+ @parsestack.last.see self,:semi
+
+ assert(@parsestack.size <= old_parsestack_size)
+
+ #hack: force next token to look like start of a
+ #new stmt, if the last ignored_tokens
+ #call above did not find a newline
+ #(just in case the next token parsed
+ #happens to call quote_expected? or after_nonid_op)
+ result.concat ignored_tokens
+# if !eof? and nextchar.chr[/[iuw\/<|>+\-*&%?:({]/] and
+# !(NewlineToken===@last_operative_token) and
+# !(/^(end|;)$/===@last_operative_token)
+ #result<<EndHeaderToken.new(result.last.offset+result.last.to_s.size)
+ set_last_token KeywordToken.new( ';' )
+ result<< get1token
+# end
+ }
+
+ return result,listend
+ end
+
+
+ #-----------------------------------
+ #read local parameter names in method definition
+ def method_parameters(result,normal_comma_level,endingblock,old_parsestack_size)
+ listend=nil
set_last_token KeywordToken.new( ',' )#hack
- #read local parameter names
nextvar=nil
loop do
expect_name=(@last_operative_token===',' and
normal_comma_level==@parsestack.size)
expect_name and @defining_lvar||=true
@@ -1531,11 +1648,11 @@
@defining_lvar=false
assert((not @last_operative_token===','))
# assert !nextvar
nextvar=tok.ident
localvars[nextvar]=false #remove nextvar from list of local vars for now
- when /^[&*]$/.token_pat #unary form...
+ when /^[&*(]$/.token_pat #unary form...
#a NoWsToken is also expected... read it now
result.concat maybe_no_ws_token #not needed?
set_last_token KeywordToken.new( ',' )
else
lexerror tok,"unfamiliar var name '#{tok}'"
@@ -1551,36 +1668,13 @@
localvars[nextvar]=true #now, finally add nextvar back to local vars
nextvar
end
end
end
-
- @defining_lvar=false
- @parsestack.last.see self,:semi
-
- assert(@parsestack.size <= old_parsestack_size)
- assert(endingblock[tok] || ErrorToken===tok)
-
- #hack: force next token to look like start of a
- #new stmt, if the last ignored_tokens
- #call above did not find a newline
- #(just in case the next token parsed
- #happens to call quote_expected? or after_nonid_op)
- result.concat ignored_tokens
-# if !eof? and nextchar.chr[/[iuw\/<|>+\-*&%?:({]/] and
-# !(NewlineToken===@last_operative_token) and
-# !(/^(end|;)$/===@last_operative_token)
- #result<<EndHeaderToken.new(result.last.offset+result.last.to_s.size)
- set_last_token KeywordToken.new( ';' )
- result<< get1token
-# end
- }
-
- return result,listend
+ return listend
end
-
#-----------------------------------
#handle % in ruby code. is it part of fancy quote or a modulo operator?
def percent(ch)
if AssignmentContext===@parsestack.last
@parsestack.pop
@@ -1628,11 +1722,17 @@
#-----------------------------------
#handle ? in ruby code. is it part of ?..: or a character literal?
def char_literal_or_op(ch)
if colon_quote_expected? ch
getchar
- NumberToken.new getchar_maybe_escape
+ if @rubyversion >= 1.9
+ StringToken.new getchar_maybe_escape
+ else
+ ch=getchar_maybe_escape[0]
+ ch=ch.ord if ch.respond_to? :ord
+ NumberToken.new ch
+ end
else
@parsestack.push TernaryContext.new(@linenum)
KeywordToken.new getchar #operator
end
end
@@ -1823,11 +1923,11 @@
return [read(1),start]
when ?_,?a..?z,?A..?Z,NONASCII
context=merge_assignment_op_in_setter_callsites? ? ?: : nc
return [identifier_as_string(context), start]
when ?(
- return [nil,start] if @enable_macro
+ return [nil,start] if @enable_macro or @rubyversion>=1.9
end
set_last_token KeywordToken.new(';')
lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}")
return [nil, start]
@@ -1851,11 +1951,11 @@
return lexerror(HerePlaceholderToken.new( dash, quote, ender, nil ), "invalid here header")
end
res= HerePlaceholderToken.new( dash, quote, ender, quote_real )
if true
- res.open=["<<",dash,quote,ender,quote].to_s
+ res.open=["<<",dash,quote,ender,quote].join
procrastinated=til_charset(/[\n]/)#+readnl
unless @base_file
@base_file=@file
@file=Sequence::List.new([@file])
@file.pos=@base_file.pos
@@ -1977,11 +2077,11 @@
end
@offset_adjust=@min_offset_adjust
@moretokens.push( *optional_here_bodies )
ln=@linenum
- @moretokens.push lexerror(EscNlToken.new(@filename,ln-1,result,input_position-result.size), error),
+ @moretokens.push lexerror(EscNlToken.new(result,input_position-result.size,@filename,ln-1), error),
FileAndLineToken.new(@filename,ln,input_position)
start_of_line_directives
return @moretokens.shift
@@ -1993,11 +2093,11 @@
if true
#handle here bodies queued up by previous line
pos=input_position
while body=@pending_here_bodies.shift
#body.offset=pos
- result.push EscNlToken.new(@filename,nil,"\n",body.offset-1)
+ result.push EscNlToken.new("\n",body.offset-1,@filename,nil)
result.push FileAndLineToken.new(@filename,body.ident.line,body.offset)
result.push body
#result.push NoWsToken.new @pending_here_bodies.empty? ? input_position : @pending_here_bodies.first
#result.push FileAndLineToken.new(@filename,@linenum,pos) #position and line num are off
body.headtok.line=@linenum-1
@@ -2144,29 +2244,29 @@
(KeywordToken===@last_operative_token and
@last_operative_token.ident=="rescue" and
!@last_operative_token.infix?) ||
!after_nonid_op?{false}
- hard=false if @rubyversion>=1.9 and @file.check /\A\n(?:#@@WSTOKS)?\.[^.]/o
+ hard=false if @rubyversion>=1.9 and @file.check( /\A\n(?:#@@WSTOKS)?[.:][^.:]/o )
if hard
@offset_adjust=@min_offset_adjust
a= abort_noparens!
case @parsestack.last #these should be in the see:semi handler
- when ExpectDoOrNlContext: @parsestack.pop
- when ExpectThenOrNlContext: @parsestack.pop
+ when ExpectDoOrNlContext; @parsestack.pop
+ when ExpectThenOrNlContext; @parsestack.pop
end
assert !@parsestack.empty?
@parsestack.last.see self,:semi
- a << super(ch)
+ a << rulexer_newline(ch)
@moretokens.replace a+@moretokens
else
@offset_adjust=@min_offset_adjust
offset= input_position
nl=readnl
- @moretokens.push EscNlToken.new(@filename,@linenum-1,nl,offset),
+ @moretokens.push EscNlToken.new(nl,offset,@filename,@linenum-1),
FileAndLineToken.new(@filename,@linenum,input_position)
end
#optimization: when thru with regurgitated text from a here document,
#revert back to original unadorned Sequence instead of staying in the list.
@@ -2220,19 +2320,19 @@
startpos= input_position
more= read(EQBEGINLENGTH-1) #get =begin
begin
eof? and raise "eof before =end"
- more<< til_charset(/[\r\n]/)
+ more<< til_charset(/\n/)
eof? and raise "eof before =end"
more<< readnl
end until readahead(EQENDLENGTH)==EQEND
#read rest of line after =end
- more << til_charset(/[\r\n]/)
- assert((eof? or ?\r===nextchar or ?\n===nextchar))
- assert !(/[\r\n]/===more[-1,1])
+ more << til_charset(/\n/)
+ assert((eof? or ?\n===nextchar))
+ assert !(/\n/===more[-1,1])
more<< readnl unless eof?
# newls= more.scan(/\r\n?|\n\r?/)
# @linenum+= newls.size
@@ -2309,12 +2409,12 @@
#but {false} in the block
#(hmmm... some now have true or other non-varname checks in them... could these be bugs?)
return yield
when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
%r{^(
- end|self|true|false|nil|
- __FILE__|__LINE__|[\})\]]
+ end|self|true|false|nil|->|
+ __FILE__|__LINE__|__ENCODING__|[\})\]]
)$}x.token_pat
#dunno about def/undef
#maybe class/module shouldn't he here either?
#for is also in NewlineToken branch, below.
#what about rescue?
@@ -2397,11 +2497,11 @@
result << ?=
end
result= operator_or_methname_token( result)
result.offset=oldpos
return result
- end
+ end
#-----------------------------------
def tilde(ch) #match ~
assert(ch=='~')
result=getchar
@@ -2424,24 +2524,26 @@
#-----------------------------------
#match /[+\-]=?/ (+ or +=)
#could be beginning of number, too
#fixme: handle +@ and -@ here as well... (currently, this is done in symbol()?)
def plusminus(ch)
+ pos=input_position
assert(/^[+\-]$/===ch)
if unary_op_expected?(ch) or
KeywordToken===@last_operative_token &&
/^(return|break|next)$/===@last_operative_token.ident
if (?0..?9)===readahead(2)[1]
- return number(ch)
+ result= number(ch)
elsif @rubyversion>=1.9 and '->' == readahead(2) #stabby proc
+ @file.pos+=2
#push down block context
localvars.start_block
@parsestack.push ctx=BlockContext.new(@linenum)
ctx.wanting_stabby_block_body=true
#read optional proc params
block_param_list_lookahead ?(, ParenedParamListLhsContext
-
+ result=KeywordToken.new('->',pos)
else #unary operator
result=getchar
WHSPLF[nextchar.chr] or
@moretokens << NoWsToken.new(input_position)
@@ -2454,10 +2556,11 @@
if eat_next_if(?=)
result << ?=
end
result=(operator_or_methname_token result)
end
+ result.offset=pos
return result
end
#-----------------------------------
def equals(ch) #match /=(>|~|==?)?/ (= or == or =~ or === or =>)
@@ -2483,27 +2586,28 @@
if @rubyversion>=1.9 and StringToken===last and last.lvars
#ruby delays adding lvars from regexps to known lvars table
#for several tokens in some cases. not sure why or if on purpose
#i'm just going to add them right away
- localvars.concat last.lvars
+ last.lvars.each{|lvar| localvars[lvar]=true }
end
when '' #plain assignment: record local variable definitions
last_context_not_implicit.lhs=false
+ @last_operative_token=result
@moretokens.push( *ignored_tokens(true).map{|x|
- NewlineToken===x ? EscNlToken.new(@filename,@linenum,x.ident,x.offset) : x
+ NewlineToken===x ? EscNlToken.new(x.ident,x.offset,@filename,@linenum) : x
} )
@parsestack.push AssignmentRhsContext.new(@linenum)
+ @moretokens.push AssignmentRhsListStartToken.new( input_position)
if eat_next_if ?*
tok=OperatorToken.new('*', input_position-1)
tok.tag=:unary
@moretokens.push tok
WHSPLF[nextchar.chr] or
@moretokens << NoWsToken.new(input_position)
comma_in_lvalue_list? #is this needed?
end
- @moretokens.push AssignmentRhsListStartToken.new( input_position)
end
return result
end
#-----------------------------------
@@ -2511,17 +2615,19 @@
assert nextchar==?!
result=getchar
k=eat_next_if(/[~=]/)
if k
result+=k
- elsif eof?: #do nothing
+ elsif eof? or WHSPLF[nextchar.chr] #do nothing
else
- WHSPLF[nextchar.chr] or
- @moretokens << NoWsToken.new(input_position)
+ @moretokens << NoWsToken.new(input_position)
end
- return KeywordToken.new(result, input_position-result.size)
- #result should distinguish unary !
+ ty= @rubyversion>=1.9 ? OperatorToken : KeywordToken
+ result=ty.new(result, input_position-result.size)
+ result.unary=!k #result should distinguish unary !
+
+ return result
end
#-----------------------------------
def dot(ch)
@@ -2563,11 +2669,11 @@
if false
#-----------------------------------
def comment(str)
result=""
#loop{
- result<< super(nil).to_s
+ result<< rulexer_comment(nil).to_s
if /^\#.*\#$/===result #if comment was ended by a crunch
#that's not a legal comment end in ruby, so just keep reading
assert(result.to_s[-1]==?#)
@@ -2643,24 +2749,45 @@
end
when '{'
#check if we are in a hash literal or string inclusion (#{}),
#in which case below would be bad.
- if after_nonid_op?{false} or @last_operative_token.has_no_block?
+ if !(UnparenedParamListLhsContext===@parsestack.last) and
+ after_nonid_op?{false} || @last_operative_token.has_no_block?
@parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
else
#abort_noparens!
tokch.set_infix!
tokch.as="do"
-#=begin not needed now, i think
+
+ #if (perhaps deep) inside a stabby block param list context, end it
+ if @rubyversion>=1.9
+ stabby_params_just_ended=false
+ (@parsestack.size-1).downto(1){|i|
+ case @parsestack[i]
+ when ParamListContextNoParen,AssignmentRhsContext
+ #do nothing yet... see if inside a UnparenedParamListLhsContext
+ when UnparenedParamListLhsContext #stabby proc
+ @moretokens<<tokch
+ (@parsestack.size-1).downto(i){|j|
+ @moretokens.unshift @parsestack[j].endtoken(input_position-1)
+ }
+ @parsestack[i..-1]=[]
+ tokch=@moretokens.shift
+ stabby_params_just_ended=true
+ break
+ else break
+ end
+ }
+ end
+
# 'need to find matching callsite context and end it if implicit'
lasttok=last_operative_token
- if !(lasttok===')' and lasttok.callsite?) #or ParamListContextNoParen===parsestack.last
+ if !(lasttok===')' and lasttok.callsite?) and !stabby_params_just_ended #or ParamListContextNoParen===parsestack.last
@moretokens.push( *(abort_1_noparen!(1).push tokch) )
tokch=@moretokens.shift
end
-#=end
if BlockContext===@parsestack.last and @parsestack.last.wanting_stabby_block_body
@parsestack.last.wanting_stabby_block_body=false
else
localvars.start_block
@@ -2717,11 +2844,11 @@
return(endoffile_detected result)
end
#-----------------------------------
def endoffile_detected(s='')
- @moretokens.push( *(abort_noparens!.push super(s)))
+ @moretokens.push( *(abort_noparens!.push rulexer_endoffile_detected(s)))
if @progress_thread
@progress_thread.kill
@progress_thread=nil
end
result= @moretokens.shift
@@ -2729,36 +2856,41 @@
result
end
#-----------------------------------
def single_char_token(ch)
- KeywordToken.new super(ch), input_position-1
+ KeywordToken.new rulexer_single_char_token(ch), input_position-1
end
#-----------------------------------
def comma(ch)
@moretokens.push token=single_char_token(ch)
- #if assignment rhs seen inside method param list, when param list, array or hash literal,
- # rescue where comma is expected, or method def param list
- # then end the assignment rhs now
- #+[OBS,ParamListContext|ParamListContextNoParen|WhenParamListContext|ListImmedContext|
+ case @parsestack[-1]
+ when AssignmentRhsContext;
+ token.tag=:rhs
+ #if assignment rhs seen inside method param list, when param list,
+ # array or hash literal, rescue where comma is expected, method def param list,
+ # or another right hand side
+ # then end the assignment rhs now
+ #+[OBS,ParamListContext|ParamListContextNoParen|WhenParamListContext|ListImmedContext|AssignmentRhsContext|
# (RescueSMContext&-{:state=>:rescue})|(DefContext&-{:in_body=>FalseClass|nil}),
# AssignmentRhsContext
#]===@parsestack
- if AssignmentRhsContext===@parsestack[-1] and
- ParamListContext===@parsestack[-2] ||
- ParamListContextNoParen===@parsestack[-2] ||
- WhenParamListContext===@parsestack[-2] ||
- ListImmedContext===@parsestack[-2] ||
- (RescueSMContext===@parsestack[-2] && @parsestack[-2].state==:rescue) ||
- (DefContext===@parsestack[-2] && !@parsestack[-2].in_body)
+ while AssignmentRhsContext===@parsestack[-1]
+ pop=
+ case @parsestack[-2]
+ when ParamListContext,ParamListContextNoParen,WhenParamListContext,
+ ListImmedContext,AssignmentRhsContext; true
+ when RescueSMContext; @parsestack[-2].state==:rescue
+ when DefContext; !@parsestack[-2].in_body and !@parsestack[-2].has_parens?
+ else false
+ end
+ break unless pop
@parsestack.pop
- @moretokens.unshift AssignmentRhsListEndToken.new(input_position)
- end
- case @parsestack[-1]
- when AssignmentRhsContext; token.tag=:rhs
+ @moretokens.unshift AssignmentRhsListEndToken.new(input_position-1)
+ end
when ParamListContext,ParamListContextNoParen; #:call
when ListImmedContext; #:array
when BlockParamListLhsContext,UnparenedParamListLhsContext; #:block or stabby proc
when ParenedParamListLhsContext; #stabby proc or method def'n?
when KnownNestedLhsParenContext; #:nested
@@ -2798,10 +2930,10 @@
end
#-----------------------------------
#tokenify_results_of :identifier
save_offsets_in(*CHARMAPPINGS.values.uniq-[
- :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote,:caret
+ :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote,:caret,:plusminus
])
#save_offsets_in :symbol
end