lib/rubylexer.rb in rubylexer-0.7.5 vs lib/rubylexer.rb in rubylexer-0.7.6
- old
+ new
@@ -168,10 +168,15 @@
@enable_macro=nil
@base_file=nil
@progress_thread=nil
@rubyversion=options[:rubyversion]
@encoding=options[:encoding]||:detect
+ @method_operators=if @rubyversion>=1.9
+ /#{RUBYSYMOPERATORREX}|\A![=~]?\Z/o
+ else
+ RUBYSYMOPERATORREX
+ end
@toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
read_leading_encoding
start_of_line_directives
@@ -668,13 +673,13 @@
=end
when ?(
maybe_local=false
lastid=lasttok&&lasttok.ident
case lastid
- when /\A[;(]|do\Z/: was_after_nonid_op=false
- when '|': was_after_nonid_op=false unless BlockParamListLhsContext===@parsestack.last
- when '{': was_after_nonid_op=false if BlockContext===@parsestack.last or BeginEndContext===@parsestack.last
+ when /\A[;(]|do\Z/; was_after_nonid_op=false
+ when '|'; was_after_nonid_op=false unless BlockParamListLhsContext===@parsestack.last
+ when '{'; was_after_nonid_op=false if BlockContext===@parsestack.last or BeginEndContext===@parsestack.last
end if KeywordToken===lasttok
was_after_nonid_op=false if NewlineToken===lasttok or lasttok.nil?
want_parens=!(ws_toks.empty? or was_after_nonid_op) #or
# /^(::|rescue|yield|else|case|when|if|unless|until|while|and|or|&&|\|\||[?:]|\.\.?\.?|=>)$/===lastid or
# MethNameToken===lasttok or
@@ -750,24 +755,25 @@
case implicit_parens_to_emit
when 2;
result.unshift ImplicitParamListStartToken.new(oldpos),
ImplicitParamListEndToken.new(oldpos)
when 1,3;
- arr,pass=*param_list_coming_with_2_or_more_params?
- result.push( *arr )
- unless pass
+ if /^(break|next|return)$/===name and
+ !(KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident)
#only 1 param in list
result.unshift ImplicitParamListStartToken.new(oldpos)
- last=result.last
- last.set_callsite! false if last.respond_to? :callsite? and last.callsite? #KeywordToken===last and last.ident==')'
- if /^(break|next|return)$/===name and
- !(KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident)
- ty=KWParamListContextNoParen
- else
- ty=ParamListContextNoParen
+ @parsestack.push ParamListContextNoParen.new(@linenum)
+ else
+ arr,pass=*param_list_coming_with_2_or_more_params?
+ result.push( *arr )
+ unless pass
+ #only 1 param in list
+ result.unshift ImplicitParamListStartToken.new(oldpos)
+ last=result.last
+ last.set_callsite! false if last.respond_to? :callsite? and last.callsite?
+ @parsestack.push ParamListContextNoParen.new(@linenum)
end
- @parsestack.push ty.new(@linenum)
end
when 0; #do nothing
else raise 'invalid value of implicit_parens_to_emit'
end
return result.unshift(tok)
@@ -780,12 +786,15 @@
# 'not ok:'
# 'not (but should it be?)'
end
#-----------------------------------
+ #read ahead to see if there's method param list (with real parentheses)
+ #and 2 or more parameters (and hence a comma to separate them)
+ #ugly, lexer recursion
def param_list_coming_with_2_or_more_params?
- WHSPCHARS[prevchar] && (?(==nextchar) or return [[],false]
+ return [[],false] unless WHSPCHARS[prevchar] && (?(==nextchar)
basesize=@parsestack.size
result=[get1token]
pass=loop{
tok=get1token
result << tok
@@ -797,10 +806,12 @@
break true
elsif EoiToken===tok
lexerror tok, "unexpected eof in parameter list"
end
}
+ result.concat @moretokens
+ @moretokens.replace []
return [result,pass]
end
#-----------------------------------
CONTEXT2ENDTOK={
@@ -905,26 +916,26 @@
#-----------------------------------
@@SPACES=/[\ \t\v\f\v]/
@@WSTOK=/\r?\n|\r*#@@SPACES+(?:#@@SPACES|\r(?!\n))*|\#[^\n]*\n|\\\r?\n|
- ^=begin[\s\n](?:(?!=end).*\n)*=end[\s\n].*\n/x
+ ^=begin(?:[\s].*)?\n(?:(?!=end).*\n)*=end[\s\n].*\n/x
@@WSTOKS=/(?!=begin)#@@WSTOK+/o
def divide_ws(ws,offset)
result=[]
ws.scan(/\G#@@WSTOK/o){|ws|
incr= $~.begin(0)
klass=case ws
- when /\A[\#=]/: CommentToken
- when /\n\Z/: EscNlToken
+ when /\A[\#=]/; CommentToken
+ when /\n\Z/; EscNlToken
else WsToken
end
result << klass.new(ws,offset+incr)
}
result.each_with_index{|ws,i|
if WsToken===ws
- ws.ident << result.delete(i+1).ident while WsToken===result[i+1]
+ ws.ident << result.delete_at(i+1).ident while WsToken===result[i+1]
end
}
return result
end
@@ -1050,19 +1061,25 @@
return result
end
def keyword_do(str,offset,result)
result.unshift(*abort_noparens_for_do!(str))
- if ExpectDoOrNlContext===@parsestack.last
+ ctx=@parsestack.last
+ if ExpectDoOrNlContext===ctx
@parsestack.pop
assert WantsEndContext===@parsestack.last
result.last.as=";"
else
result.last.has_end!
- @parsestack.push WantsEndContext.new(str,@linenum)
- localvars.start_block
- block_param_list_lookahead
+ if BlockContext===ctx and ctx.wanting_stabby_block_body
+ ctx.wanting_stabby_block_body=false
+ ctx.starter,ctx.ender="do","end"
+ else
+ @parsestack.push WantsEndContext.new(str,@linenum)
+ localvars.start_block
+ block_param_list_lookahead
+ end
end
return result
end
def keyword_def(str,offset,result) #macros too, if enabled
result.first.has_end!
@@ -1260,11 +1277,11 @@
result.unshift(*abort_noparens_for_rescue!(str))
else
result.push KwParamListStartToken.new(offset+str.length)
#corresponding EndToken emitted by abort_noparens! on leaving rescue context
@parsestack.push RescueSMContext.new(@linenum)
- result.unshift(*abort_noparens!(str))
+# result.unshift(*abort_noparens!(str))
end
return result
end
def keyword_then(str,offset,result)
@@ -1384,20 +1401,21 @@
# (foo,)=[1]
#-----------------------------------
- def block_param_list_lookahead
+ def block_param_list_lookahead starter=?|, ctx_type=BlockParamListLhsContext
safe_recurse{ |la|
set_last_token KeywordToken.new( ';' )
a=ignored_tokens
- if eat_next_if(?|)
- a<< KeywordToken.new("|", input_position-1)
+ if eat_next_if(starter)
+ mycontext=ctx_type.new(@linenum)
+ a<< KeywordToken.new(mycontext.starter, input_position-1)
if true
- @parsestack.push mycontext=BlockParamListLhsContext.new(@linenum)
- nextchar==?| and a.push NoWsToken.new(input_position)
+ @parsestack.push mycontext
+ nextchar==mycontext.ender[0] and a.push NoWsToken.new(input_position)
else
if eat_next_if(?|)
a.concat [NoWsToken.new(input_position-1),
KeywordToken.new('|', input_position-1)]
else
@@ -1428,12 +1446,15 @@
@parsestack.pop
a<< KeywordToken.new('|',tok.offset)
@moretokens.empty? or
fixme %#moretokens might be set from get1token call above...might be bad#
-end
end
+end
+ elsif starter==?(
+ ctx_type=UnparenedParamListLhsContext #hacky... should be a param?
+ @parsestack.push ctx_type.new(@linenum)
end
set_last_token KeywordToken.new( ';' )
#a.concat ignored_tokens
@@ -1592,11 +1613,12 @@
@parsestack.last.see self, :splat
case @parsestack[-1]
when AssignmentRhsContext; result.tag= :rhs
when ParamListContext,ParamListContextNoParen; #:call
when ListImmedContext; #:array
- when BlockParamListLhsContext; #:block
+ when BlockParamListLhsContext,UnparenedParamListLhsContext; #:block or stabby proc
+ when ParenedParamListLhsContext; #:stabby proc or method def'n?
when KnownNestedLhsParenContext; #:nested
else result.tag= :lhs if cill
end
end
end
@@ -1645,11 +1667,12 @@
def is_var_name?
(tok=@last_operative_token)
s=tok.to_s
case s
- when /^[@$<]/; true
+ when /^[@$]/; true
+ when /^<</; HerePlaceholderToken===tok
when /(?!#@@LETTER_DIGIT).$/o; false
# when /^#@@LCLETTER/o; localvars===s or VARLIKE_KEYWORDS===s
when /^#@@LETTER/o; VarNameToken===tok
else raise "not var or method name: #{s}"
end
@@ -1712,12 +1735,13 @@
@parsestack.pop
tok.as="then"
end
when RescueSMContext
tok.as=";"
- end or
+ end or
fail ": not expected in #{@parsestack.last.class}->#{@parsestack.last.starter}"
+
#end ternary context, if any
@parsestack.last.see self,:colon
return @moretokens.shift
@@ -1746,11 +1770,11 @@
start= input_position
notbare and start-=1
klass=(notbare ? SymbolToken : MethNameToken)
#look for operators
- opmatches=readahead(3)[RUBYSYMOPERATORREX]
+ opmatches=readahead(3)[@method_operators]
result= opmatches ? read(opmatches.size) :
case nc=nextchar
when ?" #"
assert notbare
open=':"'; close='"'
@@ -1780,31 +1804,35 @@
result.close=close
end
return result
end
+ #-----------------------------------
def merge_assignment_op_in_setter_callsites?
false
end
+
#-----------------------------------
def callsite_symbol(tok_to_errify)
start= input_position
#look for operators
- opmatches=readahead(3)[RUBYSYMOPERATORREX]
- return [opmatches ? read(opmatches.size) :
- case nc=nextchar
- when ?` then read(1) #`
- when ?_,?a..?z,?A..?Z,NONASCII then
+ opmatches=readahead(3)[@method_operators]
+ return [read(opmatches.size), start] if opmatches
+ case nc=nextchar
+ when ?` #`
+ return [read(1),start]
+ when ?_,?a..?z,?A..?Z,NONASCII
context=merge_assignment_op_in_setter_callsites? ? ?: : nc
- identifier_as_string(context)
- else
- set_last_token KeywordToken.new(';')
- lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}")
- nil
- end, start
- ]
+ return [identifier_as_string(context), start]
+ when ?(
+ return [nil,start] if @enable_macro
+ end
+
+ set_last_token KeywordToken.new(';')
+ lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}")
+ return [nil, start]
end
#-----------------------------------
def here_header
read(2)=='<<' or raise "parser insanity"
@@ -2110,17 +2138,19 @@
assert @moretokens.empty?
pre=FileAndLineToken.new(@filename,@linenum+1,input_position)
pre.allow_ooo_offset=true
- if NewlineToken===@last_operative_token or #hack
- (KeywordToken===@last_operative_token and
- @last_operative_token.ident=="rescue" and
- !@last_operative_token.infix?) or
- #/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
- !after_nonid_op?{false}
- then #hack-o-rama: probly cases left out above
+ hard=NewlineToken===@last_operative_token || #hack
+ (KeywordToken===@last_operative_token and
+ @last_operative_token.ident=="rescue" and
+ !@last_operative_token.infix?) ||
+ !after_nonid_op?{false}
+
+ hard=false if @rubyversion>=1.9 and @file.check /\A\n(?:#@@WSTOKS)?\.[^.]/o
+
+ if hard
@offset_adjust=@min_offset_adjust
a= abort_noparens!
case @parsestack.last #these should be in the see:semi handler
when ExpectDoOrNlContext: @parsestack.pop
when ExpectThenOrNlContext: @parsestack.pop
@@ -2400,10 +2430,19 @@
if unary_op_expected?(ch) or
KeywordToken===@last_operative_token &&
/^(return|break|next)$/===@last_operative_token.ident
if (?0..?9)===readahead(2)[1]
return number(ch)
+ elsif @rubyversion>=1.9 and '->' == readahead(2) #stabby proc
+ #push down block context
+ localvars.start_block
+ @parsestack.push ctx=BlockContext.new(@linenum)
+ ctx.wanting_stabby_block_body=true
+ #read optional proc params
+ block_param_list_lookahead ?(, ParenedParamListLhsContext
+
+
else #unary operator
result=getchar
WHSPLF[nextchar.chr] or
@moretokens << NoWsToken.new(input_position)
result=(operator_or_methname_token result)
@@ -2573,12 +2612,17 @@
tokch= NoWsToken.new(input_position-1)
end
when '('
lasttok=last_token_maybe_implicit #last_operative_token
#could be: lasttok===/^#@@LETTER/o
- if (VarNameToken===lasttok or MethNameToken===lasttok or
- lasttok===FUNCLIKE_KEYWORDS)
+ method_params= (
+ VarNameToken===lasttok or
+ MethNameToken===lasttok or
+ lasttok===FUNCLIKE_KEYWORDS or
+ (@enable_macro and lasttok and lasttok.ident==')')
+ )
+ if method_params
unless WHSPCHARS[lastchar]
@moretokens << tokch
tokch= NoWsToken.new(input_position-1)
end
@parsestack.push ParamListContext.new(@linenum)
@@ -2587,10 +2631,12 @@
lasttok=last_operative_token
maybe_def=DefContext===ctx && !ctx.in_body &&
!(KeywordToken===lasttok && lasttok.ident=="def")
if maybe_def or
BlockParamListLhsContext===ctx or
+ ParenedParamListLhsContext===ctx or
+ UnparenedParamListLhsContext===ctx or
ParenContext===ctx && ctx.lhs
@parsestack.push KnownNestedLhsParenContext.new(@linenum)
else
@parsestack.push ParenContext.new(@linenum)
end
@@ -2612,13 +2658,17 @@
@moretokens.push( *(abort_1_noparen!(1).push tokch) )
tokch=@moretokens.shift
end
#=end
- localvars.start_block
- @parsestack.push BlockContext.new(@linenum)
- block_param_list_lookahead
+ if BlockContext===@parsestack.last and @parsestack.last.wanting_stabby_block_body
+ @parsestack.last.wanting_stabby_block_body=false
+ else
+ localvars.start_block
+ @parsestack.push BlockContext.new(@linenum)
+ block_param_list_lookahead
+ end
end
end
return (tokch)
end
@@ -2643,10 +2693,14 @@
@moretokens.last.as="end" if BlockContext===ctx or BeginEndContext===ctx
if ParamListContext==ctx.class
assert ch==')'
kw.set_callsite! #not needed?
end
+ if ParenedParamListLhsContext===ctx
+ assert @parsestack.last.wanting_stabby_block_body
+ assert ch==')'
+ end
return @moretokens.shift
end
#-----------------------------------
def eof(ch=nil)
@@ -2703,10 +2757,11 @@
end
case @parsestack[-1]
when AssignmentRhsContext; token.tag=:rhs
when ParamListContext,ParamListContextNoParen; #:call
when ListImmedContext; #:array
- when BlockParamListLhsContext; #:block
+ when BlockParamListLhsContext,UnparenedParamListLhsContext; #:block or stabby proc
+ when ParenedParamListLhsContext; #stabby proc or method def'n?
when KnownNestedLhsParenContext; #:nested
else
token.tag=:lhs if comma_in_lvalue_list?
end
@parsestack.last.see self,:comma