lib/rubylexer.rb in rubylexer-0.7.5 vs lib/rubylexer.rb in rubylexer-0.7.6

- old
+ new

@@ -168,10 +168,15 @@ @enable_macro=nil @base_file=nil @progress_thread=nil @rubyversion=options[:rubyversion] @encoding=options[:encoding]||:detect + @method_operators=if @rubyversion>=1.9 + /#{RUBYSYMOPERATORREX}|\A![=~]?\Z/o + else + RUBYSYMOPERATORREX + end @toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS) read_leading_encoding start_of_line_directives @@ -668,13 +673,13 @@ =end when ?( maybe_local=false lastid=lasttok&&lasttok.ident case lastid - when /\A[;(]|do\Z/: was_after_nonid_op=false - when '|': was_after_nonid_op=false unless BlockParamListLhsContext===@parsestack.last - when '{': was_after_nonid_op=false if BlockContext===@parsestack.last or BeginEndContext===@parsestack.last + when /\A[;(]|do\Z/; was_after_nonid_op=false + when '|'; was_after_nonid_op=false unless BlockParamListLhsContext===@parsestack.last + when '{'; was_after_nonid_op=false if BlockContext===@parsestack.last or BeginEndContext===@parsestack.last end if KeywordToken===lasttok was_after_nonid_op=false if NewlineToken===lasttok or lasttok.nil? want_parens=!(ws_toks.empty? or was_after_nonid_op) #or # /^(::|rescue|yield|else|case|when|if|unless|until|while|and|or|&&|\|\||[?:]|\.\.?\.?|=>)$/===lastid or # MethNameToken===lasttok or @@ -750,24 +755,25 @@ case implicit_parens_to_emit when 2; result.unshift ImplicitParamListStartToken.new(oldpos), ImplicitParamListEndToken.new(oldpos) when 1,3; - arr,pass=*param_list_coming_with_2_or_more_params? - result.push( *arr ) - unless pass + if /^(break|next|return)$/===name and + !(KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident) #only 1 param in list result.unshift ImplicitParamListStartToken.new(oldpos) - last=result.last - last.set_callsite! false if last.respond_to? :callsite? and last.callsite? #KeywordToken===last and last.ident==')' - if /^(break|next|return)$/===name and - !(KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident) - ty=KWParamListContextNoParen - else - ty=ParamListContextNoParen + @parsestack.push ParamListContextNoParen.new(@linenum) + else + arr,pass=*param_list_coming_with_2_or_more_params? + result.push( *arr ) + unless pass + #only 1 param in list + result.unshift ImplicitParamListStartToken.new(oldpos) + last=result.last + last.set_callsite! false if last.respond_to? :callsite? and last.callsite? + @parsestack.push ParamListContextNoParen.new(@linenum) end - @parsestack.push ty.new(@linenum) end when 0; #do nothing else raise 'invalid value of implicit_parens_to_emit' end return result.unshift(tok) @@ -780,12 +786,15 @@ # 'not ok:' # 'not (but should it be?)' end #----------------------------------- + #read ahead to see if there's method param list (with real parentheses) + #and 2 or more parameters (and hence a comma to separate them) + #ugly, lexer recursion def param_list_coming_with_2_or_more_params? - WHSPCHARS[prevchar] && (?(==nextchar) or return [[],false] + return [[],false] unless WHSPCHARS[prevchar] && (?(==nextchar) basesize=@parsestack.size result=[get1token] pass=loop{ tok=get1token result << tok @@ -797,10 +806,12 @@ break true elsif EoiToken===tok lexerror tok, "unexpected eof in parameter list" end } + result.concat @moretokens + @moretokens.replace [] return [result,pass] end #----------------------------------- CONTEXT2ENDTOK={ @@ -905,26 +916,26 @@ #----------------------------------- @@SPACES=/[\ \t\v\f\v]/ @@WSTOK=/\r?\n|\r*#@@SPACES+(?:#@@SPACES|\r(?!\n))*|\#[^\n]*\n|\\\r?\n| - ^=begin[\s\n](?:(?!=end).*\n)*=end[\s\n].*\n/x + ^=begin(?:[\s].*)?\n(?:(?!=end).*\n)*=end[\s\n].*\n/x @@WSTOKS=/(?!=begin)#@@WSTOK+/o def divide_ws(ws,offset) result=[] ws.scan(/\G#@@WSTOK/o){|ws| incr= $~.begin(0) klass=case ws - when /\A[\#=]/: CommentToken - when /\n\Z/: EscNlToken + when /\A[\#=]/; CommentToken + when /\n\Z/; EscNlToken else WsToken end result << klass.new(ws,offset+incr) } result.each_with_index{|ws,i| if WsToken===ws - ws.ident << result.delete(i+1).ident while WsToken===result[i+1] + ws.ident << result.delete_at(i+1).ident while WsToken===result[i+1] end } return result end @@ -1050,19 +1061,25 @@ return result end def keyword_do(str,offset,result) result.unshift(*abort_noparens_for_do!(str)) - if ExpectDoOrNlContext===@parsestack.last + ctx=@parsestack.last + if ExpectDoOrNlContext===ctx @parsestack.pop assert WantsEndContext===@parsestack.last result.last.as=";" else result.last.has_end! - @parsestack.push WantsEndContext.new(str,@linenum) - localvars.start_block - block_param_list_lookahead + if BlockContext===ctx and ctx.wanting_stabby_block_body + ctx.wanting_stabby_block_body=false + ctx.starter,ctx.ender="do","end" + else + @parsestack.push WantsEndContext.new(str,@linenum) + localvars.start_block + block_param_list_lookahead + end end return result end def keyword_def(str,offset,result) #macros too, if enabled result.first.has_end! @@ -1260,11 +1277,11 @@ result.unshift(*abort_noparens_for_rescue!(str)) else result.push KwParamListStartToken.new(offset+str.length) #corresponding EndToken emitted by abort_noparens! on leaving rescue context @parsestack.push RescueSMContext.new(@linenum) - result.unshift(*abort_noparens!(str)) +# result.unshift(*abort_noparens!(str)) end return result end def keyword_then(str,offset,result) @@ -1384,20 +1401,21 @@ # (foo,)=[1] #----------------------------------- - def block_param_list_lookahead + def block_param_list_lookahead starter=?|, ctx_type=BlockParamListLhsContext safe_recurse{ |la| set_last_token KeywordToken.new( ';' ) a=ignored_tokens - if eat_next_if(?|) - a<< KeywordToken.new("|", input_position-1) + if eat_next_if(starter) + mycontext=ctx_type.new(@linenum) + a<< KeywordToken.new(mycontext.starter, input_position-1) if true - @parsestack.push mycontext=BlockParamListLhsContext.new(@linenum) - nextchar==?| and a.push NoWsToken.new(input_position) + @parsestack.push mycontext + nextchar==mycontext.ender[0] and a.push NoWsToken.new(input_position) else if eat_next_if(?|) a.concat [NoWsToken.new(input_position-1), KeywordToken.new('|', input_position-1)] else @@ -1428,12 +1446,15 @@ @parsestack.pop a<< KeywordToken.new('|',tok.offset) @moretokens.empty? or fixme %#moretokens might be set from get1token call above...might be bad# -end end +end + elsif starter==?( + ctx_type=UnparenedParamListLhsContext #hacky... should be a param? + @parsestack.push ctx_type.new(@linenum) end set_last_token KeywordToken.new( ';' ) #a.concat ignored_tokens @@ -1592,11 +1613,12 @@ @parsestack.last.see self, :splat case @parsestack[-1] when AssignmentRhsContext; result.tag= :rhs when ParamListContext,ParamListContextNoParen; #:call when ListImmedContext; #:array - when BlockParamListLhsContext; #:block + when BlockParamListLhsContext,UnparenedParamListLhsContext; #:block or stabby proc + when ParenedParamListLhsContext; #:stabby proc or method def'n? when KnownNestedLhsParenContext; #:nested else result.tag= :lhs if cill end end end @@ -1645,11 +1667,12 @@ def is_var_name? (tok=@last_operative_token) s=tok.to_s case s - when /^[@$<]/; true + when /^[@$]/; true + when /^<</; HerePlaceholderToken===tok when /(?!#@@LETTER_DIGIT).$/o; false # when /^#@@LCLETTER/o; localvars===s or VARLIKE_KEYWORDS===s when /^#@@LETTER/o; VarNameToken===tok else raise "not var or method name: #{s}" end @@ -1712,12 +1735,13 @@ @parsestack.pop tok.as="then" end when RescueSMContext tok.as=";" - end or + end or fail ": not expected in #{@parsestack.last.class}->#{@parsestack.last.starter}" + #end ternary context, if any @parsestack.last.see self,:colon return @moretokens.shift @@ -1746,11 +1770,11 @@ start= input_position notbare and start-=1 klass=(notbare ? SymbolToken : MethNameToken) #look for operators - opmatches=readahead(3)[RUBYSYMOPERATORREX] + opmatches=readahead(3)[@method_operators] result= opmatches ? read(opmatches.size) : case nc=nextchar when ?" #" assert notbare open=':"'; close='"' @@ -1780,31 +1804,35 @@ result.close=close end return result end + #----------------------------------- def merge_assignment_op_in_setter_callsites? false end + #----------------------------------- def callsite_symbol(tok_to_errify) start= input_position #look for operators - opmatches=readahead(3)[RUBYSYMOPERATORREX] - return [opmatches ? read(opmatches.size) : - case nc=nextchar - when ?` then read(1) #` - when ?_,?a..?z,?A..?Z,NONASCII then + opmatches=readahead(3)[@method_operators] + return [read(opmatches.size), start] if opmatches + case nc=nextchar + when ?` #` + return [read(1),start] + when ?_,?a..?z,?A..?Z,NONASCII context=merge_assignment_op_in_setter_callsites? ? ?: : nc - identifier_as_string(context) - else - set_last_token KeywordToken.new(';') - lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}") - nil - end, start - ] + return [identifier_as_string(context), start] + when ?( + return [nil,start] if @enable_macro + end + + set_last_token KeywordToken.new(';') + lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}") + return [nil, start] end #----------------------------------- def here_header read(2)=='<<' or raise "parser insanity" @@ -2110,17 +2138,19 @@ assert @moretokens.empty? pre=FileAndLineToken.new(@filename,@linenum+1,input_position) pre.allow_ooo_offset=true - if NewlineToken===@last_operative_token or #hack - (KeywordToken===@last_operative_token and - @last_operative_token.ident=="rescue" and - !@last_operative_token.infix?) or - #/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack - !after_nonid_op?{false} - then #hack-o-rama: probly cases left out above + hard=NewlineToken===@last_operative_token || #hack + (KeywordToken===@last_operative_token and + @last_operative_token.ident=="rescue" and + !@last_operative_token.infix?) || + !after_nonid_op?{false} + + hard=false if @rubyversion>=1.9 and @file.check /\A\n(?:#@@WSTOKS)?\.[^.]/o + + if hard @offset_adjust=@min_offset_adjust a= abort_noparens! case @parsestack.last #these should be in the see:semi handler when ExpectDoOrNlContext: @parsestack.pop when ExpectThenOrNlContext: @parsestack.pop @@ -2400,10 +2430,19 @@ if unary_op_expected?(ch) or KeywordToken===@last_operative_token && /^(return|break|next)$/===@last_operative_token.ident if (?0..?9)===readahead(2)[1] return number(ch) + elsif @rubyversion>=1.9 and '->' == readahead(2) #stabby proc + #push down block context + localvars.start_block + @parsestack.push ctx=BlockContext.new(@linenum) + ctx.wanting_stabby_block_body=true + #read optional proc params + block_param_list_lookahead ?(, ParenedParamListLhsContext + + else #unary operator result=getchar WHSPLF[nextchar.chr] or @moretokens << NoWsToken.new(input_position) result=(operator_or_methname_token result) @@ -2573,12 +2612,17 @@ tokch= NoWsToken.new(input_position-1) end when '(' lasttok=last_token_maybe_implicit #last_operative_token #could be: lasttok===/^#@@LETTER/o - if (VarNameToken===lasttok or MethNameToken===lasttok or - lasttok===FUNCLIKE_KEYWORDS) + method_params= ( + VarNameToken===lasttok or + MethNameToken===lasttok or + lasttok===FUNCLIKE_KEYWORDS or + (@enable_macro and lasttok and lasttok.ident==')') + ) + if method_params unless WHSPCHARS[lastchar] @moretokens << tokch tokch= NoWsToken.new(input_position-1) end @parsestack.push ParamListContext.new(@linenum) @@ -2587,10 +2631,12 @@ lasttok=last_operative_token maybe_def=DefContext===ctx && !ctx.in_body && !(KeywordToken===lasttok && lasttok.ident=="def") if maybe_def or BlockParamListLhsContext===ctx or + ParenedParamListLhsContext===ctx or + UnparenedParamListLhsContext===ctx or ParenContext===ctx && ctx.lhs @parsestack.push KnownNestedLhsParenContext.new(@linenum) else @parsestack.push ParenContext.new(@linenum) end @@ -2612,13 +2658,17 @@ @moretokens.push( *(abort_1_noparen!(1).push tokch) ) tokch=@moretokens.shift end #=end - localvars.start_block - @parsestack.push BlockContext.new(@linenum) - block_param_list_lookahead + if BlockContext===@parsestack.last and @parsestack.last.wanting_stabby_block_body + @parsestack.last.wanting_stabby_block_body=false + else + localvars.start_block + @parsestack.push BlockContext.new(@linenum) + block_param_list_lookahead + end end end return (tokch) end @@ -2643,10 +2693,14 @@ @moretokens.last.as="end" if BlockContext===ctx or BeginEndContext===ctx if ParamListContext==ctx.class assert ch==')' kw.set_callsite! #not needed? end + if ParenedParamListLhsContext===ctx + assert @parsestack.last.wanting_stabby_block_body + assert ch==')' + end return @moretokens.shift end #----------------------------------- def eof(ch=nil) @@ -2703,10 +2757,11 @@ end case @parsestack[-1] when AssignmentRhsContext; token.tag=:rhs when ParamListContext,ParamListContextNoParen; #:call when ListImmedContext; #:array - when BlockParamListLhsContext; #:block + when BlockParamListLhsContext,UnparenedParamListLhsContext; #:block or stabby proc + when ParenedParamListLhsContext; #stabby proc or method def'n? when KnownNestedLhsParenContext; #:nested else token.tag=:lhs if comma_in_lvalue_list? end @parsestack.last.see self,:comma