lib/rubylexer.rb in rubylexer-0.7.6 vs lib/rubylexer.rb in rubylexer-0.7.7

- old
+ new

@@ -107,21 +107,26 @@ ?: => :symbol_or_op, ?\n => :newline, #implicitly escaped after op #?\r => :newline, #implicitly escaped after op ?\\ => :escnewline, - ?\x00 => :eof, - ?\x04 => :eof, - ?\x1a => :eof, "[({" => :open_brace, "])}" => :close_brace, ?# => :comment, - NONASCII => :identifier, + ?\x00 => :eof, + ?\x04 => :eof, + ?\x1a => :eof, + + ?\x01..?\x03 => :illegal_char, + ?\x05..?\x08 => :illegal_char, + ?\x0E..?\x19 => :illegal_char, + ?\x1b..?\x1F => :illegal_char, + ?\x7F => :illegal_char, } attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit UCLETTER=@@UCLETTER="[A-Z]" @@ -136,28 +141,32 @@ LETTER_DIGIT=@@LETTER_DIGIT="[A-Za-z_0-9\x80-\xFF]" eval %w[UCLETTER LCLETTER LETTER LETTER_DIGIT].map{|n| " def #{n}; #{n}; end def self.#{n}; @@#{n}; end " - }.to_s + }.join NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)((?:(?!#@@LETTER_DIGIT).)|\Z)/om - NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST + if ?A.is_a? String #ruby >= 1.9 + NEVERSTARTPARAMLISTFIRST=/[aoeitrwu]/ + else + NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST + end NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST =begin require 'jcode' utf8=String::PATTERN_UTF8 #or euc, or sjis... LCLETTER_U="(?>[a-z_]|#{utf8})" LETTER_U="(?>[A-Za-z_]|#{utf8})" - IDENTCHAR_U="(?>[A-Za-z_0-9]|#{utf8})" + LETTER_DIGIT_U="(?>[A-Za-z_0-9]|#{utf8})" =end #----------------------------------- - def initialize(filename,file,linenum=1,offset_adjust=0,options={:rubyversion=>1.8}) + def initialize(filename,file,linenum=1,offset_adjust=0,options={}) @offset_adjust=0 #set again in next line - super(filename,file, linenum,offset_adjust) + rulexer_initialize(filename,file, linenum,offset_adjust) @start_linenum=linenum @parsestack=[TopLevelContext.new] @incomplete_here_tokens=[] #not used anymore @pending_here_bodies=[] @localvars_stack=[SymbolTable.new] @@ -166,20 +175,21 @@ @last_operative_token=nil @last_token_maybe_implicit=nil @enable_macro=nil @base_file=nil @progress_thread=nil - @rubyversion=options[:rubyversion] + @rubyversion=options[:rubyversion]||1.8 @encoding=options[:encoding]||:detect @method_operators=if @rubyversion>=1.9 - /#{RUBYSYMOPERATORREX}|\A![=~]?\Z/o + /#{RUBYSYMOPERATORREX}|\A![=~@]?/o else RUBYSYMOPERATORREX end - @toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS) + @toptable=CharHandler.new(self, :identifier, CHARMAPPINGS) + extend RubyLexer1_9 if @rubyversion>=1.9 read_leading_encoding start_of_line_directives progress_printer end @@ -201,15 +211,15 @@ } ENCODINGS=%w[ascii binary utf8 euc sjis] def read_leading_encoding return unless @encoding==:detect @encoding=:ascii - @encoding=:utf8 if @file.skip( /\xEF\xBB\xBF/ ) #bom + @encoding=:utf8 if @file.skip( "\xEF\xBB\xBF" ) #bom if @file.skip( /\A#!/ ) loop do til_charset( /[\s\v]/ ) - break if @file.match( / ([^-\s\v]|--[\s\v])/,4 ) + break if @file.match( /^\n|[\s\v]([^-\s\v]|--?[\s\v])/,4 ) if @file.skip( /.-K(.)/ ) case $1 when 'u'; @encoding=:utf8 when 'e'; @encoding=:euc when 's'; @encoding=:sjis @@ -241,12 +251,13 @@ def localvars; @localvars_stack.last end + attr_accessor :localvars_stack + attr_accessor :in_def - attr :localvars_stack attr :offset_adjust attr_writer :pending_here_bodies attr :rubyversion #----------------------------------- @@ -254,11 +265,11 @@ @last_operative_token=@last_token_maybe_implicit=tok end #----------------------------------- def get1token - result=super #most of the action's here + result=rulexer_get1token #most of the action's here if ENV['PROGRESS'] @last_cp_pos||=0 @start_time||=Time.now if result.offset-@last_cp_pos>100000 @@ -298,16 +309,16 @@ end end #----------------------------------- def eof? - super or EoiToken===@last_operative_token + rulexer_eof? or EoiToken===@last_operative_token end #----------------------------------- def input_position - super+@offset_adjust + rulexer_input_position+@offset_adjust end #----------------------------------- def input_position_raw @file.pos @@ -349,10 +360,11 @@ #----------------------------------- def inside_method_def? return true if (defined? @in_def) and @in_def @parsestack.reverse_each{|ctx| ctx.starter=='def' and ctx.state!=:saw_def and return true + ctx.starter=='class' || ctx.starter=='module' and return false } return false end #----------------------------------- @@ -387,11 +399,11 @@ @moretokens.empty? or return result loop do unless @moretokens.empty? case @moretokens.first when StillIgnoreToken - when NewlineToken: allow_eol or break + when NewlineToken; allow_eol or break else break end else break unless ch=nextchar @@ -465,16 +477,13 @@ assert MethNameToken===@last_operative_token || !(@last_operative_token===/^(\.|::|(un)?def|alias)$/) if @parsestack.last.wantarrow and @rubyversion>=1.9 and @file.skip ":" @moretokens.push SymbolToken.new(str,oldpos), KeywordToken.new("=>",input_position-1) else - @moretokens.unshift(*parse_keywords(str,oldpos) do |tok| - #if not a keyword, decide if it should be var or method - case str - when FUNCLIKE_KEYWORDS; except=tok - when VARLIKE_KEYWORDS,RUBYKEYWORDS; raise "shouldnt see keywords here, now" - end + @moretokens.unshift(*parse_keywords(str,oldpos) do |tok,except| + #most callers of this block pass nothing(==nil) for except. only _keyword_funclike passes a true val + was_last=@last_operative_token @last_operative_token=tok if tok normally=safe_recurse { |a| var_or_meth_name(str,was_last,oldpos,after_nonid_op?{true}) } (Array===normally ? normally[0]=except : normally=except) if except normally @@ -545,10 +554,11 @@ #----------------------------------- def in_lvar_define_state lasttok=@last_operative_token #@defining_lvar is a hack @defining_lvar or case ctx=@parsestack.last #when ForSMContext; ctx.state==:for + when UnparenedParamListLhsContext; /^(->|,|;)$/===lasttok.ident when RescueSMContext lasttok.ident=="=>" and @file.match?( /\A[\s\v]*([:;#\n]|then(?!#@@LETTER_DIGIT))/om ) #when BlockParamListLhsContext; true end end @@ -565,10 +575,11 @@ #in general, operators in ruby are disambuated by the before-but-not-after rule. #an otherwise ambiguous operator is disambiguated by the surrounding whitespace: #whitespace before but not after the 'operator' indicates it is to be considered a #value token instead. otherwise it is a binary operator. (unary (prefix) ops count #as 'values' here.) + #this is by far the ugliest method in RubyLexer. def var_or_meth_name(name,lasttok,pos,was_after_nonid_op) #look for call site if not a keyword or keyword is function-like #look for and ignore local variable names assert String===name @@ -577,11 +588,11 @@ #maybe_local really means 'maybe local or constant' maybe_local=case name when /(?!#@@LETTER_DIGIT).$/o #do nothing when /^#@@LCLETTER/o (localvars===name or - VARLIKE_KEYWORDS===name or + #VARLIKE_KEYWORDS===name or was_in_lvar_define_state ) and not lasttok===/^(\.|::)$/ when /^#@@UCLETTER/o is_const=true not lasttok==='.' #this is the right algorithm for constants... @@ -615,12 +626,13 @@ end #if next op is assignment (or comma in lvalue list) #then omit implicit parens assignment_coming=case nc=nextchar - when ?=; not /^=[>=~]$/===readahead(2) + when ?=; not( /^=[>=~]$/===readahead(2) ) when ?,; comma_in_lvalue_list? + when (?; if @rubyversion>=1.9); ParenedParamListLhsContext===@parsestack.last when ?); last_context_not_implicit.lhs when ?i; /^in(?!#@@LETTER_DIGIT)/o===readahead(3) and ForSMContext===last_context_not_implicit when ?>,?<; /^(.)\1=$/===readahead(3) when ?*,?&; /^(.)\1?=/===readahead(3) @@ -643,11 +655,11 @@ if assignment_coming @parsestack.push AssignmentContext.new(nil) if nc==?% or nc==?/ IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT else case nc - when nil: 2 + when nil; 2 when ?!; /^![=~]$/===readahead(2) ? 2 : 1 when ?d; if /^do((?!#@@LETTER_DIGIT)|$)/o===readahead(3) if maybe_local and expecting_do? ty=VarNameToken @@ -759,11 +771,11 @@ when 1,3; if /^(break|next|return)$/===name and !(KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident) #only 1 param in list result.unshift ImplicitParamListStartToken.new(oldpos) - @parsestack.push ParamListContextNoParen.new(@linenum) + @parsestack.push KWParamListContextNoParen.new(@linenum) else arr,pass=*param_list_coming_with_2_or_more_params? result.push( *arr ) unless pass #only 1 param in list @@ -844,18 +856,18 @@ def abort_noparens_for_rescue!(str='') #assert @moretokens.empty? result=[] ctx=@parsestack.last while klass=CONTEXT2ENDTOK_FOR_RESCUE[ctx.class] - break if AssignmentRhsContext===ctx && !ctx.multi_assign? - if ParamListContextNoParen===ctx && AssignmentRhsContext===@parsestack[-2] - result.push ImplicitParamListEndToken.new(input_position-str.length), - AssignmentRhsListEndToken.new(input_position-str.length) - @parsestack.pop - @parsestack.pop - break - end +# break if AssignmentRhsContext===ctx && !ctx.multi_assign? +# if ParamListContextNoParen===ctx && AssignmentRhsContext===@parsestack[-2] +# result.push ImplicitParamListEndToken.new(input_position-str.length), +# AssignmentRhsListEndToken.new(input_position-str.length) +# @parsestack.pop +# @parsestack.pop +# break +# end result << klass.new(input_position-str.length) #unless AssignmentRhsContext===ctx and !ctx.multi_assign? break if RescueSMContext===ctx #why is this here? @parsestack.pop ctx=@parsestack.last end @@ -864,18 +876,30 @@ #----------------------------------- CONTEXT2ENDTOK_FOR_DO={ AssignmentRhsContext=>AssignmentRhsListEndToken, ParamListContextNoParen=>ImplicitParamListEndToken, + UnparenedParamListLhsContext=>KwParamListEndToken, ExpectDoOrNlContext=>1, #WhenParamListContext=>KwParamListEndToken, #RescueSMContext=>KwParamListEndToken } def abort_noparens_for_do!(str='') #assert @moretokens.empty? result=[] while klass=CONTEXT2ENDTOK_FOR_DO[@parsestack.last.class] + if klass==AssignmentRhsListEndToken + i=@parsestack.size + end_the_assign=false + while AssignmentRhsContext===@parsestack[i-=1] + if CONTEXT2ENDTOK_FOR_DO[@parsestack[i-1].class] and + @parsestack[i-1].class!=AssignmentRhsContext + break end_the_assign=true + end + end + break unless end_the_assign + end break if klass==1 result << klass.new(input_position-str.length) @parsestack.pop end return result @@ -915,44 +939,67 @@ public :enable_macros! #----------------------------------- @@SPACES=/[\ \t\v\f\v]/ - @@WSTOK=/\r?\n|\r*#@@SPACES+(?:#@@SPACES|\r(?!\n))*|\#[^\n]*\n|\\\r?\n| - ^=begin(?:[\s].*)?\n(?:(?!=end).*\n)*=end[\s\n].*\n/x - @@WSTOKS=/(?!=begin)#@@WSTOK+/o - def divide_ws(ws,offset) + @@WSTOK=/(?> + (?>\r?)\n| + (?>\r*)(?>#@@SPACES+)(?>(?:#@@SPACES|\r(?!\n))*)| + \#(?>[^\n]*)\n| + \\(?>\r?)\n| + ^=begin(?>(?>#@@SPACES.*)?)\n + (?>(?:(?!=end)(?>.*)\n))* + =end(?>(?>#@@SPACES.*)?)\n + )/x + @@WSTOKS=/(?!=begin)(?>#@@WSTOK+)/o + def divide_ws(ws0,offset) result=[] - ws.scan(/\G#@@WSTOK/o){|ws| + ws0.scan(/\G#@@WSTOK/o){|ws| incr= $~.begin(0) - klass=case ws - when /\A[\#=]/; CommentToken - when /\n\Z/; EscNlToken - else WsToken + tok=case ws + when /\A[\#=]/; IgnoreToken.new(ws,offset+incr) + when /\n\Z/; EscNlToken.new(ws,offset+incr,@filename,@linenum) + else WsToken.new(ws,offset+incr) end - result << klass.new(ws,offset+incr) + result << tok + @linenum+=ws.count "\n" } result.each_with_index{|ws,i| if WsToken===ws ws.ident << result.delete_at(i+1).ident while WsToken===result[i+1] end } return result end + #----------------------------------- + #lex tokens until a predefined end token is found. + #returns a list of tokens seen. + def read_arbitrary_expression(&endcondition) + result=[] + oldsize=@parsestack.size + safe_recurse{ + tok=nil + until endcondition[tok,@parsestack[oldsize+1..-1]||[]] and @parsestack.size==oldsize + tok=get1token + result<<tok + EoiToken===tok and break lexerror( tok, "unexpected eof" ) + end + } + result + end - #----------------------------------- #parse keywords now, to prevent confusion over bare symbols #and match end with corresponding preceding def or class or whatever. #if arg is not a keyword, the block is called def parse_keywords(str,offset,&block) assert @moretokens.empty? assert !(KeywordToken===@last_operative_token and /A(\.|::|def)\Z/===@last_operative_token.ident) result=[KeywordToken.new(str,offset)] - m="keyword_#{str}" + m=:"keyword_#{str}" respond_to?(m) ? (send m,str,offset,result,&block) : block[MethNameToken.new(str)] end public #these have to be public so respond_to? can see them (sigh) def keyword_end(str,offset,result) result.unshift(*abort_noparens!(str)) @@ -975,33 +1022,43 @@ end def keyword_module(str,offset,result) result.first.has_end! @parsestack.push WantsEndContext.new(str,@linenum) - @localvars_stack.push SymbolTable.new offset=input_position - @file.scan(/\A(#@@WSTOKS)?(::)?/o) - md=@file.last_match - all,ws,dc=*md - fail if all.empty? - @moretokens.concat divide_ws(ws,offset) if ws - @moretokens.push KeywordToken.new('::',offset+md.end(0)-2) if dc - loop do - offset=input_position - @file.scan(/\A(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)(::)?/o) + assert @moretokens.empty? + tokens=[] + if @file.scan(/\A(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)(?=[#{WHSP}]+(?:[^(])|[#;\n]|::)/o) md=@file.last_match - all,ws,name,dc=*md - if ws - @moretokens.concat divide_ws(ws,offset) - incr=ws.size + all,ws,name=*md + tokens.concat divide_ws(ws,md.begin(1)) if ws + tokens.push VarNameToken.new(name,md.begin(2)) + end + tokens.push( *read_arbitrary_expression{|tok,extra_contexts| + #@file.check /\A(\n|;|::|end(?!#@@LETTER_DIGIT)|(#@@UCLETTER#@@LETTER_DIGIT*)(?!(#@@WSTOKS)?::))/o + @file.check( /\A(\n|;|end(?!#@@LETTER_DIGIT))/o ) or + @file.check("::") && extra_contexts.all?{|ctx| ImplicitParamListContext===ctx } && + @moretokens.push(*abort_noparens!) + } ) if !name #or @file.check /#@@WSTOKS?::/o + @moretokens[0,0]=tokens + @localvars_stack.push SymbolTable.new + while @file.check( /\A::/ ) + #VarNameToken===@moretokens.last or + #KeywordToken===@moretokens.last && @moretokens.last.ident=="::" + @file.scan(/\A(#@@WSTOKS)?(::)?(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)/o) or break + md=@file.last_match + all,ws1,dc,ws2,name=*md + if ws1 + @moretokens.concat divide_ws(ws1,md.begin(1)) + incr=ws1.size else incr=0 end - @moretokens.push VarNameToken.new(name,offset+incr) - break unless dc - @moretokens.push NoWsToken.new(offset+md.end(0)-2) - @moretokens.push KeywordToken.new('::',offset+md.end(0)-2) + @moretokens.push NoWsToken.new(md.begin(2)) if dc + @moretokens.push KeywordToken.new('::',md.begin(2)) if dc + @moretokens.concat divide_ws(ws2,md.begin(3)) if ws2 + @moretokens.push VarNameToken.new(name,md.begin(4)) end @moretokens.push EndHeaderToken.new(input_position) return result end @@ -1069,12 +1126,11 @@ assert WantsEndContext===@parsestack.last result.last.as=";" else result.last.has_end! if BlockContext===ctx and ctx.wanting_stabby_block_body - ctx.wanting_stabby_block_body=false - ctx.starter,ctx.ender="do","end" + @parsestack[-1]= WantsEndContext.new(str,@linenum) else @parsestack.push WantsEndContext.new(str,@linenum) localvars.start_block block_param_list_lookahead end @@ -1105,22 +1161,22 @@ end EoiToken===tok and lexerror tok, "eof in def header" result << tok end until parencount==0 #@parsestack.size==old_size @localvars_stack.push SymbolTable.new - else #no parentheses, all tail - set_last_token KeywordToken.new(".") #hack hack + else #no parentheses, all tail + set_last_token KeywordToken.new(".") #hack hack tokindex=result.size result << tok=symbol(false,false) name=tok.to_s assert !in_lvar_define_state #maybe_local really means 'maybe local or constant' maybe_local=case name when /(?!#@@LETTER_DIGIT).$/o; #do nothing when /^[@$]/; true - when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS; ty=KeywordToken + when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS,("__ENCODING__" if @rubyversion>=1.9); ty=KeywordToken when /^#@@LCLETTER/o; localvars===name when /^#@@UCLETTER/o; is_const=true #this is the right algorithm for constants... end result.push( *ignored_tokens(false,false) ) nc=nextchar @@ -1162,10 +1218,11 @@ #look for start of parameter list nc=(@moretokens.empty? ? nextchar.chr : @moretokens.first.to_s[0,1]) if state==:expect_op and /^(?:#@@LETTER|[(&*])/o===nc ctx.state=:def_param_list + ctx.has_parens= '('==nc list,listend=def_param_list result.concat list end_index=result.index(listend) ofs=listend.offset if endofs @@ -1269,14 +1326,23 @@ return result end def keyword_rescue(str,offset,result) unless after_nonid_op? {false} + result.replace [] #rescue needs to be treated differently when in operator context... #i think no RescueSMContext should be pushed on the stack... - result.first.set_infix! #plus, the rescue token should be marked as infix - result.unshift(*abort_noparens_for_rescue!(str)) + tok=OperatorToken.new(str,offset) + tok.unary=false #plus, the rescue token should be marked as infix + if AssignmentRhsContext===@parsestack.last + tok.as="rescue3" + @parsestack.pop #end rhs context + result.push AssignmentRhsListEndToken.new(offset) #end rhs token + else + result.concat abort_noparens_for_rescue!(str) + end + result.push tok else result.push KwParamListStartToken.new(offset+str.length) #corresponding EndToken emitted by abort_noparens! on leaving rescue context @parsestack.push RescueSMContext.new(@linenum) # result.unshift(*abort_noparens!(str)) @@ -1347,16 +1413,35 @@ } end return result end + def keyword___FILE__(str,offset,result) + result.last.value=@filename + return result + end + def keyword___LINE__(str,offset,result) + result.last.value=@linenum + return result + end + + module RubyLexer1_9 + def keyword___ENCODING__(str,offset,result) + #result.last.value=huh + return result + end + + def keyword_not(*args,&block) _keyword_funclike(*args,&block) end + end + def _keyword_funclike(str,offset,result) if @last_operative_token===/^(\.|::)$/ result=yield MethNameToken.new(str) #should pass a methname token here else - result=yield KeywordToken.new(str) + tok=KeywordToken.new(str) + result=yield tok,tok end return result end for kw in FUNCLIKE_KEYWORDLIST-["END","return","break","next"] do alias_method "keyword_#{kw}".to_sym, :_keyword_funclike @@ -1364,14 +1449,16 @@ def _keyword_varlike(str,offset,result) #do nothing return result end - for kw in VARLIKE_KEYWORDLIST+["defined?", "not"] do + for kw in VARLIKE_KEYWORDLIST-["__FILE__","__LINE__"]+["defined?", "not"] do alias_method "keyword_#{kw}".to_sym, :_keyword_varlike end + + private #----------------------------------- def parsestack_lastnonassign_is?(obj) @parsestack.reverse_each{|ctx| @@ -1451,10 +1538,11 @@ end end elsif starter==?( ctx_type=UnparenedParamListLhsContext #hacky... should be a param? @parsestack.push ctx_type.new(@linenum) + a<<KwParamListStartToken.new( input_position ) end set_last_token KeywordToken.new( ';' ) #a.concat ignored_tokens @@ -1491,20 +1579,49 @@ #parsestack was changed by get1token above... normal_comma_level+=1 assert(normal_comma_level==@parsestack.size) - endingblock=proc{|tok| tok===')' } + endingblock=proc{|tok2| tok2===')' } else - endingblock=proc{|tok| tok===';' or NewlineToken===tok} + endingblock=proc{|tok2| tok2===';' or NewlineToken===tok2} end class << endingblock alias === call end + listend=method_parameters(result,normal_comma_level,endingblock,old_parsestack_size) + + @defining_lvar=false + @parsestack.last.see self,:semi + + assert(@parsestack.size <= old_parsestack_size) + + #hack: force next token to look like start of a + #new stmt, if the last ignored_tokens + #call above did not find a newline + #(just in case the next token parsed + #happens to call quote_expected? or after_nonid_op) + result.concat ignored_tokens +# if !eof? and nextchar.chr[/[iuw\/<|>+\-*&%?:({]/] and +# !(NewlineToken===@last_operative_token) and +# !(/^(end|;)$/===@last_operative_token) + #result<<EndHeaderToken.new(result.last.offset+result.last.to_s.size) + set_last_token KeywordToken.new( ';' ) + result<< get1token +# end + } + + return result,listend + end + + + #----------------------------------- + #read local parameter names in method definition + def method_parameters(result,normal_comma_level,endingblock,old_parsestack_size) + listend=nil set_last_token KeywordToken.new( ',' )#hack - #read local parameter names nextvar=nil loop do expect_name=(@last_operative_token===',' and normal_comma_level==@parsestack.size) expect_name and @defining_lvar||=true @@ -1531,11 +1648,11 @@ @defining_lvar=false assert((not @last_operative_token===',')) # assert !nextvar nextvar=tok.ident localvars[nextvar]=false #remove nextvar from list of local vars for now - when /^[&*]$/.token_pat #unary form... + when /^[&*(]$/.token_pat #unary form... #a NoWsToken is also expected... read it now result.concat maybe_no_ws_token #not needed? set_last_token KeywordToken.new( ',' ) else lexerror tok,"unfamiliar var name '#{tok}'" @@ -1551,36 +1668,13 @@ localvars[nextvar]=true #now, finally add nextvar back to local vars nextvar end end end - - @defining_lvar=false - @parsestack.last.see self,:semi - - assert(@parsestack.size <= old_parsestack_size) - assert(endingblock[tok] || ErrorToken===tok) - - #hack: force next token to look like start of a - #new stmt, if the last ignored_tokens - #call above did not find a newline - #(just in case the next token parsed - #happens to call quote_expected? or after_nonid_op) - result.concat ignored_tokens -# if !eof? and nextchar.chr[/[iuw\/<|>+\-*&%?:({]/] and -# !(NewlineToken===@last_operative_token) and -# !(/^(end|;)$/===@last_operative_token) - #result<<EndHeaderToken.new(result.last.offset+result.last.to_s.size) - set_last_token KeywordToken.new( ';' ) - result<< get1token -# end - } - - return result,listend + return listend end - #----------------------------------- #handle % in ruby code. is it part of fancy quote or a modulo operator? def percent(ch) if AssignmentContext===@parsestack.last @parsestack.pop @@ -1628,11 +1722,17 @@ #----------------------------------- #handle ? in ruby code. is it part of ?..: or a character literal? def char_literal_or_op(ch) if colon_quote_expected? ch getchar - NumberToken.new getchar_maybe_escape + if @rubyversion >= 1.9 + StringToken.new getchar_maybe_escape + else + ch=getchar_maybe_escape[0] + ch=ch.ord if ch.respond_to? :ord + NumberToken.new ch + end else @parsestack.push TernaryContext.new(@linenum) KeywordToken.new getchar #operator end end @@ -1823,11 +1923,11 @@ return [read(1),start] when ?_,?a..?z,?A..?Z,NONASCII context=merge_assignment_op_in_setter_callsites? ? ?: : nc return [identifier_as_string(context), start] when ?( - return [nil,start] if @enable_macro + return [nil,start] if @enable_macro or @rubyversion>=1.9 end set_last_token KeywordToken.new(';') lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}") return [nil, start] @@ -1851,11 +1951,11 @@ return lexerror(HerePlaceholderToken.new( dash, quote, ender, nil ), "invalid here header") end res= HerePlaceholderToken.new( dash, quote, ender, quote_real ) if true - res.open=["<<",dash,quote,ender,quote].to_s + res.open=["<<",dash,quote,ender,quote].join procrastinated=til_charset(/[\n]/)#+readnl unless @base_file @base_file=@file @file=Sequence::List.new([@file]) @file.pos=@base_file.pos @@ -1977,11 +2077,11 @@ end @offset_adjust=@min_offset_adjust @moretokens.push( *optional_here_bodies ) ln=@linenum - @moretokens.push lexerror(EscNlToken.new(@filename,ln-1,result,input_position-result.size), error), + @moretokens.push lexerror(EscNlToken.new(result,input_position-result.size,@filename,ln-1), error), FileAndLineToken.new(@filename,ln,input_position) start_of_line_directives return @moretokens.shift @@ -1993,11 +2093,11 @@ if true #handle here bodies queued up by previous line pos=input_position while body=@pending_here_bodies.shift #body.offset=pos - result.push EscNlToken.new(@filename,nil,"\n",body.offset-1) + result.push EscNlToken.new("\n",body.offset-1,@filename,nil) result.push FileAndLineToken.new(@filename,body.ident.line,body.offset) result.push body #result.push NoWsToken.new @pending_here_bodies.empty? ? input_position : @pending_here_bodies.first #result.push FileAndLineToken.new(@filename,@linenum,pos) #position and line num are off body.headtok.line=@linenum-1 @@ -2144,29 +2244,29 @@ (KeywordToken===@last_operative_token and @last_operative_token.ident=="rescue" and !@last_operative_token.infix?) || !after_nonid_op?{false} - hard=false if @rubyversion>=1.9 and @file.check /\A\n(?:#@@WSTOKS)?\.[^.]/o + hard=false if @rubyversion>=1.9 and @file.check( /\A\n(?:#@@WSTOKS)?[.:][^.:]/o ) if hard @offset_adjust=@min_offset_adjust a= abort_noparens! case @parsestack.last #these should be in the see:semi handler - when ExpectDoOrNlContext: @parsestack.pop - when ExpectThenOrNlContext: @parsestack.pop + when ExpectDoOrNlContext; @parsestack.pop + when ExpectThenOrNlContext; @parsestack.pop end assert !@parsestack.empty? @parsestack.last.see self,:semi - a << super(ch) + a << rulexer_newline(ch) @moretokens.replace a+@moretokens else @offset_adjust=@min_offset_adjust offset= input_position nl=readnl - @moretokens.push EscNlToken.new(@filename,@linenum-1,nl,offset), + @moretokens.push EscNlToken.new(nl,offset,@filename,@linenum-1), FileAndLineToken.new(@filename,@linenum,input_position) end #optimization: when thru with regurgitated text from a here document, #revert back to original unadorned Sequence instead of staying in the list. @@ -2220,19 +2320,19 @@ startpos= input_position more= read(EQBEGINLENGTH-1) #get =begin begin eof? and raise "eof before =end" - more<< til_charset(/[\r\n]/) + more<< til_charset(/\n/) eof? and raise "eof before =end" more<< readnl end until readahead(EQENDLENGTH)==EQEND #read rest of line after =end - more << til_charset(/[\r\n]/) - assert((eof? or ?\r===nextchar or ?\n===nextchar)) - assert !(/[\r\n]/===more[-1,1]) + more << til_charset(/\n/) + assert((eof? or ?\n===nextchar)) + assert !(/\n/===more[-1,1]) more<< readnl unless eof? # newls= more.scan(/\r\n?|\n\r?/) # @linenum+= newls.size @@ -2309,12 +2409,12 @@ #but {false} in the block #(hmmm... some now have true or other non-varname checks in them... could these be bugs?) return yield when StringToken, SymbolToken, NumberToken, HerePlaceholderToken, %r{^( - end|self|true|false|nil| - __FILE__|__LINE__|[\})\]] + end|self|true|false|nil|->| + __FILE__|__LINE__|__ENCODING__|[\})\]] )$}x.token_pat #dunno about def/undef #maybe class/module shouldn't he here either? #for is also in NewlineToken branch, below. #what about rescue? @@ -2397,11 +2497,11 @@ result << ?= end result= operator_or_methname_token( result) result.offset=oldpos return result - end + end #----------------------------------- def tilde(ch) #match ~ assert(ch=='~') result=getchar @@ -2424,24 +2524,26 @@ #----------------------------------- #match /[+\-]=?/ (+ or +=) #could be beginning of number, too #fixme: handle +@ and -@ here as well... (currently, this is done in symbol()?) def plusminus(ch) + pos=input_position assert(/^[+\-]$/===ch) if unary_op_expected?(ch) or KeywordToken===@last_operative_token && /^(return|break|next)$/===@last_operative_token.ident if (?0..?9)===readahead(2)[1] - return number(ch) + result= number(ch) elsif @rubyversion>=1.9 and '->' == readahead(2) #stabby proc + @file.pos+=2 #push down block context localvars.start_block @parsestack.push ctx=BlockContext.new(@linenum) ctx.wanting_stabby_block_body=true #read optional proc params block_param_list_lookahead ?(, ParenedParamListLhsContext - + result=KeywordToken.new('->',pos) else #unary operator result=getchar WHSPLF[nextchar.chr] or @moretokens << NoWsToken.new(input_position) @@ -2454,10 +2556,11 @@ if eat_next_if(?=) result << ?= end result=(operator_or_methname_token result) end + result.offset=pos return result end #----------------------------------- def equals(ch) #match /=(>|~|==?)?/ (= or == or =~ or === or =>) @@ -2483,27 +2586,28 @@ if @rubyversion>=1.9 and StringToken===last and last.lvars #ruby delays adding lvars from regexps to known lvars table #for several tokens in some cases. not sure why or if on purpose #i'm just going to add them right away - localvars.concat last.lvars + last.lvars.each{|lvar| localvars[lvar]=true } end when '' #plain assignment: record local variable definitions last_context_not_implicit.lhs=false + @last_operative_token=result @moretokens.push( *ignored_tokens(true).map{|x| - NewlineToken===x ? EscNlToken.new(@filename,@linenum,x.ident,x.offset) : x + NewlineToken===x ? EscNlToken.new(x.ident,x.offset,@filename,@linenum) : x } ) @parsestack.push AssignmentRhsContext.new(@linenum) + @moretokens.push AssignmentRhsListStartToken.new( input_position) if eat_next_if ?* tok=OperatorToken.new('*', input_position-1) tok.tag=:unary @moretokens.push tok WHSPLF[nextchar.chr] or @moretokens << NoWsToken.new(input_position) comma_in_lvalue_list? #is this needed? end - @moretokens.push AssignmentRhsListStartToken.new( input_position) end return result end #----------------------------------- @@ -2511,17 +2615,19 @@ assert nextchar==?! result=getchar k=eat_next_if(/[~=]/) if k result+=k - elsif eof?: #do nothing + elsif eof? or WHSPLF[nextchar.chr] #do nothing else - WHSPLF[nextchar.chr] or - @moretokens << NoWsToken.new(input_position) + @moretokens << NoWsToken.new(input_position) end - return KeywordToken.new(result, input_position-result.size) - #result should distinguish unary ! + ty= @rubyversion>=1.9 ? OperatorToken : KeywordToken + result=ty.new(result, input_position-result.size) + result.unary=!k #result should distinguish unary ! + + return result end #----------------------------------- def dot(ch) @@ -2563,11 +2669,11 @@ if false #----------------------------------- def comment(str) result="" #loop{ - result<< super(nil).to_s + result<< rulexer_comment(nil).to_s if /^\#.*\#$/===result #if comment was ended by a crunch #that's not a legal comment end in ruby, so just keep reading assert(result.to_s[-1]==?#) @@ -2643,24 +2749,45 @@ end when '{' #check if we are in a hash literal or string inclusion (#{}), #in which case below would be bad. - if after_nonid_op?{false} or @last_operative_token.has_no_block? + if !(UnparenedParamListLhsContext===@parsestack.last) and + after_nonid_op?{false} || @last_operative_token.has_no_block? @parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash else #abort_noparens! tokch.set_infix! tokch.as="do" -#=begin not needed now, i think + + #if (perhaps deep) inside a stabby block param list context, end it + if @rubyversion>=1.9 + stabby_params_just_ended=false + (@parsestack.size-1).downto(1){|i| + case @parsestack[i] + when ParamListContextNoParen,AssignmentRhsContext + #do nothing yet... see if inside a UnparenedParamListLhsContext + when UnparenedParamListLhsContext #stabby proc + @moretokens<<tokch + (@parsestack.size-1).downto(i){|j| + @moretokens.unshift @parsestack[j].endtoken(input_position-1) + } + @parsestack[i..-1]=[] + tokch=@moretokens.shift + stabby_params_just_ended=true + break + else break + end + } + end + # 'need to find matching callsite context and end it if implicit' lasttok=last_operative_token - if !(lasttok===')' and lasttok.callsite?) #or ParamListContextNoParen===parsestack.last + if !(lasttok===')' and lasttok.callsite?) and !stabby_params_just_ended #or ParamListContextNoParen===parsestack.last @moretokens.push( *(abort_1_noparen!(1).push tokch) ) tokch=@moretokens.shift end -#=end if BlockContext===@parsestack.last and @parsestack.last.wanting_stabby_block_body @parsestack.last.wanting_stabby_block_body=false else localvars.start_block @@ -2717,11 +2844,11 @@ return(endoffile_detected result) end #----------------------------------- def endoffile_detected(s='') - @moretokens.push( *(abort_noparens!.push super(s))) + @moretokens.push( *(abort_noparens!.push rulexer_endoffile_detected(s))) if @progress_thread @progress_thread.kill @progress_thread=nil end result= @moretokens.shift @@ -2729,36 +2856,41 @@ result end #----------------------------------- def single_char_token(ch) - KeywordToken.new super(ch), input_position-1 + KeywordToken.new rulexer_single_char_token(ch), input_position-1 end #----------------------------------- def comma(ch) @moretokens.push token=single_char_token(ch) - #if assignment rhs seen inside method param list, when param list, array or hash literal, - # rescue where comma is expected, or method def param list - # then end the assignment rhs now - #+[OBS,ParamListContext|ParamListContextNoParen|WhenParamListContext|ListImmedContext| + case @parsestack[-1] + when AssignmentRhsContext; + token.tag=:rhs + #if assignment rhs seen inside method param list, when param list, + # array or hash literal, rescue where comma is expected, method def param list, + # or another right hand side + # then end the assignment rhs now + #+[OBS,ParamListContext|ParamListContextNoParen|WhenParamListContext|ListImmedContext|AssignmentRhsContext| # (RescueSMContext&-{:state=>:rescue})|(DefContext&-{:in_body=>FalseClass|nil}), # AssignmentRhsContext #]===@parsestack - if AssignmentRhsContext===@parsestack[-1] and - ParamListContext===@parsestack[-2] || - ParamListContextNoParen===@parsestack[-2] || - WhenParamListContext===@parsestack[-2] || - ListImmedContext===@parsestack[-2] || - (RescueSMContext===@parsestack[-2] && @parsestack[-2].state==:rescue) || - (DefContext===@parsestack[-2] && !@parsestack[-2].in_body) + while AssignmentRhsContext===@parsestack[-1] + pop= + case @parsestack[-2] + when ParamListContext,ParamListContextNoParen,WhenParamListContext, + ListImmedContext,AssignmentRhsContext; true + when RescueSMContext; @parsestack[-2].state==:rescue + when DefContext; !@parsestack[-2].in_body and !@parsestack[-2].has_parens? + else false + end + break unless pop @parsestack.pop - @moretokens.unshift AssignmentRhsListEndToken.new(input_position) - end - case @parsestack[-1] - when AssignmentRhsContext; token.tag=:rhs + @moretokens.unshift AssignmentRhsListEndToken.new(input_position-1) + end when ParamListContext,ParamListContextNoParen; #:call when ListImmedContext; #:array when BlockParamListLhsContext,UnparenedParamListLhsContext; #:block or stabby proc when ParenedParamListLhsContext; #stabby proc or method def'n? when KnownNestedLhsParenContext; #:nested @@ -2798,10 +2930,10 @@ end #----------------------------------- #tokenify_results_of :identifier save_offsets_in(*CHARMAPPINGS.values.uniq-[ - :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote,:caret + :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote,:caret,:plusminus ]) #save_offsets_in :symbol end