lib/rubylexer.rb in rubylexer-0.7.0 vs lib/rubylexer.rb in rubylexer-0.7.1
- old
+ new
@@ -1,8 +1,8 @@
-=begin copyright
+=begin legal crap
rubylexer - a ruby lexer written in ruby
- Copyright (C) 2004,2005 Caleb Clausen
+ Copyright (C) 2004,2005,2008 Caleb Clausen
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
@@ -16,11 +16,10 @@
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
=end
-
require 'rubylexer/rulexer' #must be 1st!!!
require 'rubylexer/version'
require 'rubylexer/token'
require 'rubylexer/charhandler'
require 'rubylexer/symboltable'
@@ -30,37 +29,41 @@
#-----------------------------------
class RubyLexer
include NestedContexts
+
+
RUBYSYMOPERATORREX=
- %r{^([&|^/%~]|=(==?|~)|>[=>]?|<(<|=>?)?|[+\-]@?|\*\*?|\[\]=?)}
+ %r{^([&|^/%]|=(==?)|=~|>[=>]?|<(<|=>?)?|[+~\-]@?|\*\*?|\[\]=?)}
# (nasty beastie, eh?)
#these are the overridable operators
#does not match flow-control operators like: || && ! or and if not
#or op= ops like: += -= ||=
#or .. ... ?:
#for that use:
RUBYNONSYMOPERATORREX=
- %r{^([%^~/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|=>?|![=~]?)$}
+ %r{^([%^/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|::|=>?|![=~]?)$}
RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
UNSYMOPS=/^[~!]$/ #always unary
UBSYMOPS=/^([*&+-]|::)$/ #ops that could be unary or binary
WHSPCHARS=WHSPLF+"\\#"
- OPORBEGINWORDS="(if|unless|while|until)"
- BEGINWORDS=/^(def|class|module|begin|for|case|do|#{OPORBEGINWORDS})$/o
- FUNCLIKE_KEYWORDS=/^(break|next|redo|return|raise|yield|defined\?|retry|super|BEGIN|END)$/
+ OPORBEGINWORDLIST=%w(if unless while until)
+ BEGINWORDLIST=%w(def class module begin for case do)+OPORBEGINWORDLIST
+ OPORBEGINWORDS="(#{OPORBEGINWORDLIST.join '|'})"
+ BEGINWORDS=/^(#{BEGINWORDLIST.join '|'})$/o
+ FUNCLIKE_KEYWORDS=/^(break|next|redo|return|yield|retry|super|BEGIN|END)$/
VARLIKE_KEYWORDS=/^(__FILE__|__LINE__|false|nil|self|true)$/
INNERBOUNDINGWORDS="(else|elsif|ensure|in|then|rescue|when)"
BINOPWORDS="(and|or)"
- NEVERSTARTPARAMLISTWORDS=/^(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)([^a-zA-Z0-9_!?=]|\Z)/o
+ NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)([^a-zA-Z0-9_!?=]|\Z)/o
NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST
NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
RUBYKEYWORDS=%r{
- ^(alias|#{BINOPWORDS}|not|undef|end|
+ ^(alias|#{BINOPWORDS}|defined\?|not|undef|end|
#{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}|
#{INNERBOUNDINGWORDS}|#{BEGINWORDS}
)$
}xo
#__END__ should not be in this set... its handled in start_of_line_directives
@@ -70,12 +73,13 @@
?@ => :at_identifier,
?a..?z => :identifier,
?A..?Z => :identifier,
?_ => :identifier,
?0..?9 => :number,
- %{"'} => :double_quote,
- ?` => :back_quote,
+ ?" => :double_quote, #"
+ ?' => :single_quote, #'
+ ?` => :back_quote, #`
WHSP => :whitespace, #includes \r
?, => :comma,
?; => :semicolon,
@@ -97,63 +101,128 @@
?: => :symbol_or_op,
?\n => :newline, #implicitly escaped after op
#?\r => :newline, #implicitly escaped after op
?\\ => :escnewline,
- ?\0 => :eof,
+ ?\x00 => :eof,
+ ?\x04 => :eof,
+ ?\x1a => :eof,
"[({" => :open_brace,
"])}" => :close_brace,
?# => :comment
}
- attr_reader :incomplete_here_tokens, :parsestack
+ attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit
#-----------------------------------
- def initialize(filename,file,linenum=1)
- super(filename,file, linenum)
+ def initialize(filename,file,linenum=1,offset_adjust=0)
+ @offset_adjust=0 #set again in next line
+ super(filename,file, linenum,offset_adjust)
@start_linenum=linenum
@parsestack=[TopLevelContext.new]
- @incomplete_here_tokens=[]
+ @incomplete_here_tokens=[] #not used anymore
+ @pending_here_bodies=[]
@localvars_stack=[SymbolTable.new]
@defining_lvar=nil
@in_def_name=false
+ @last_operative_token=nil
+ @last_token_maybe_implicit=nil
@toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
start_of_line_directives
+ progress_printer
end
+
+ def progress_printer
+ return unless ENV['RL_PROGRESS']
+ $stderr.puts 'printing progresses'
+ @progress_thread=Thread.new do
+ until EoiToken===@last_operative_token
+ sleep 10
+ $stderr.puts @file.pos
+ end
+ end
+ end
def localvars;
@localvars_stack.last
end
+ attr :localvars_stack
+ attr :offset_adjust
+ attr_writer :pending_here_bodies
+
#-----------------------------------
+ def set_last_token(tok)
+ @last_operative_token=@last_token_maybe_implicit=tok
+ end
+
+ #-----------------------------------
def get1token
result=super #most of the action's here
+ if ENV['PROGRESS']
+ @last_cp_pos||=0
+ @start_time||=Time.now
+ if result.offset-@last_cp_pos>100000
+ $stderr.puts "#{result.offset} #{Time.now-@start_time}"
+ @last_cp_pos=result.offset
+ end
+ end
+
#now cleanup and housekeeping
#check for bizarre token types
case result
+ when ImplicitParamListStartToken, ImplicitParamListEndToken
+ @last_token_maybe_implicit=result
+ result
when StillIgnoreToken#,nil
result
+ when StringToken
+ set_last_token result
+ assert !(IgnoreToken===@last_operative_token)
+ result.elems.map!{|frag|
+ if String===frag
+ result.translate_escapes(frag)
+ else
+ frag
+ end
+ } if AUTO_UNESCAPE_STRINGS
+ result
+
when Token#,String
- @last_operative_token=result
+ set_last_token result
assert !(IgnoreToken===@last_operative_token)
result
else
raise "#{@filename}:#{linenum}:token is a #{result.class}, last is #{@last_operative_token}"
end
end
+ #-----------------------------------
+ def eof?
+ super or EoiToken===@last_operative_token
+ end
#-----------------------------------
+ def input_position
+ super+@offset_adjust
+ end
+
+ #-----------------------------------
+ def input_position_raw
+ @file.pos
+ end
+
+ #-----------------------------------
def balanced_braces?
#@parsestack.empty?
@parsestack.size==1 and TopLevelContext===@parsestack.first
end
@@ -161,31 +230,41 @@
#-----------------------------------
def dollar_identifier(ch=nil)
s=eat_next_if(?$) or return nil
if t=((identifier_as_string(?$) or special_global))
- s<<t
+ s << t
else error= "missing $id name"
end
return lexerror(VarNameToken.new(s),error)
end
#-----------------------------------
def at_identifier(ch=nil)
result = (eat_next_if(?@) or return nil)
- result << (eat_next_if(?@)or'')
+ result << (eat_next_if(?@) or '')
if t=identifier_as_string(?@)
- result<<t
+ result << t
else error= "missing @id name"
end
- return lexerror(VarNameToken.new(result),error)
+ result=VarNameToken.new(result)
+ result.in_def=true if inside_method_def?
+ return lexerror(result,error)
end
private
#-----------------------------------
- def here_spread_over_ruby_code(rl,tok)
+ def inside_method_def?
+ @parsestack.reverse_each{|ctx|
+ ctx.starter=='def' and ctx.state!=:saw_def and return true
+ }
+ return false
+ end
+
+ #-----------------------------------
+ def here_spread_over_ruby_code(rl,tok) #not used anymore
assert(!rl.incomplete_here_tokens.empty?)
@incomplete_here_tokens += rl.incomplete_here_tokens
end
#-----------------------------------
@@ -205,14 +284,14 @@
@moretokens.unshift tok
return result
end
#-----------------------------------
- WSCHARSET=/[#\\\n\s\t\v\r\f]/
+ WSCHARSET=/[#\\\n\s\t\v\r\f\x00\x04\x1a]/
def ignored_tokens(allow_eof=false,allow_eol=true)
result=[]
- result<<@moretokens.shift while StillIgnoreToken===@moretokens.first
+ result << @moretokens.shift while StillIgnoreToken===@moretokens.first
@moretokens.empty? or return result
loop do
unless @moretokens.empty?
case @moretokens.first
when StillIgnoreToken
@@ -271,12 +350,12 @@
def special_global #handle $-a and friends
assert prevchar=='$'
result = ((
#order matters here, but it shouldn't
#(but til_charset must be last)
- eat_next_if(/[!@&+`'=~\/\\,.;<>*"$?:]/) or
- (eat_next_if('-') and ("-"+getchar)) or
+ eat_if(/-[a-z0-9_]/i,2) or
+ eat_next_if(/[!@&+`'=~\-\/\\,.;<>*"$?:]/) or
(?0..?9)===nextchar ? til_charset(/[^\d]/) : nil
))
end
#-----------------------------------
@@ -287,75 +366,49 @@
#skip keyword processing if 'escaped' as it were, by def, . or ::
#or if in a non-bare context
#just asserts because those contexts are never encountered.
#control goes through symbol(<...>,nil)
assert( /^[a-z_]$/i===context)
- assert !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
+ assert MethNameToken===@last_operative_token || !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
- @moretokens.unshift(*parse_keywords(str,oldpos) do
+ @moretokens.unshift(*parse_keywords(str,oldpos) do |tok|
#if not a keyword,
case str
when FUNCLIKE_KEYWORDS; #do nothing
when VARLIKE_KEYWORDS,RUBYKEYWORDS; raise "shouldnt see keywords here, now"
end
- safe_recurse { |a| var_or_meth_name(str,@last_operative_token,oldpos) }
+ was_last=@last_operative_token
+ @last_operative_token=tok if tok
+ safe_recurse { |a| var_or_meth_name(str,was_last,oldpos,after_nonid_op?{true}) }
end)
return @moretokens.shift
end
#-----------------------------------
+ IDENTREX={}
def identifier_as_string(context)
#must begin w/ letter or underscore
- str=eat_next_if(/[_a-z]/i) or return nil
+ /[_a-z]/i===nextchar.chr or return
#equals, question mark, and exclamation mark
#might be allowed at the end in some contexts.
#(in def headers and symbols)
#otherwise, =,?, and ! are to be considered
#separate tokens. confusing, eh?
#i hope i've captured all right conditions....
#context should always be ?: right after def, ., and :: now
- maybe_eq,maybe_qm,maybe_ex = case context
- when ?@,?$ then [nil,nil,nil]
- when ?: then [?=, ??, ?!]
- else [nil,??, ?!]
- end
-
- @in_def_name and maybe_eq= ?=
+ #= and ! only match if not part of a larger operator
+ trailers =
+ case context
+ when ?@,?$ then ""
+# when ?: then "!(?![=])|\\?|=(?![=~>])"
+ else "!(?![=])|\\?"
+ end
+ @in_def_name||context==?: and trailers<<"|=(?![=~>])"
- str<<til_charset(/[^a-z0-9_]/i)
-
- #look for ?, !, or =, if allowed
- case b=getc
- when nil #means we're at eof
- #handling nil here prevents b from ever matching
- #a nil value of maybe_qm, maybe_ex or maybe_eq
- when maybe_qm
- str << b
- when maybe_ex
- nc=(nextchar unless eof?)
- #does ex appear to be part of a larger operator?
- if nc==?= #or nc==?~
- back1char
- else
- str << b
- end
- when maybe_eq
- nc=(nextchar unless eof?)
- #does eq appear to be part of a larger operator?
- if nc==?= or nc==?~ or nc==?>
- back1char
- else
- str << b
- end
- else
- back1char
- end
-
-
- return str
+ @file.scan(IDENTREX[trailers]||=/^[_a-z][a-z0-9_]*(?:#{trailers})?/i)
end
#-----------------------------------
#contexts in which comma may appear in ruby:
#multiple lhs (terminated by assign op)
@@ -378,69 +431,82 @@
#-----------------------------------
#a comma has been seen. are we in an
#lvalue list or some other construct that uses commas?
def comma_in_lvalue_list?
- @parsestack.last.lhs= (not ListContext===@parsestack.last)
+ @parsestack.last.lhs=
+ case l=@parsestack.last
+ when ListContext:
+ when DefContext: l.in_body
+ else true
+ end
end
#-----------------------------------
def in_lvar_define_state
#@defining_lvar is a hack
@defining_lvar or case ctx=@parsestack.last
- when ForSMContext; ctx.state==:for
- when RescueSMContext; ctx.state==:arrow
+ #when ForSMContext; ctx.state==:for
+ when RescueSMContext
+ @last_operative_token.ident=="=>" and @file.match? /\A[\s\v]*([:;#\n]|then[^a-zA-Z0-9_])/m
#when BlockParamListLhsContext; true
end
end
+
+ IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT=2
#-----------------------------------
#determine if an alphabetic identifier refers to a variable
#or method name. generates implicit parenthes(es) if it is a
#call site and no explicit parens are present. starts an implicit param list
#if appropriate. adds tok to the
#local var table if its a local var being defined for the first time.
- #note: what we here call variables (rather, constants) following ::
- #might actually be methods at runtime, but that's immaterial to tokenization.
-
- #note: this routine should determine the correct token type for name and
- #create the appropriate token. currently this is not done because callers
- #typically have done it (perhaps incorrectly) already.
- def var_or_meth_name(name,lasttok,pos)
+ #in general, operators in ruby are disambuated by the before-but-not-after rule.
+ #an otherwise ambiguous operator is disambiguated by the surrounding whitespace:
+ #whitespace before but not after the 'operator' indicates it is to be considered a
+ #value token instead. otherwise it is a binary operator. (unary (prefix) ops count
+ #as 'values' here.)
+ def var_or_meth_name(name,lasttok,pos,was_after_nonid_op)
#look for call site if not a keyword or keyword is function-like
#look for and ignore local variable names
assert String===name
+ was_in_lvar_define_state=in_lvar_define_state
#maybe_local really means 'maybe local or constant'
maybe_local=case name
- when /[^a-z_0-9]$/i; #do nothing
- when /^[a-z_]/; (localvars===name or VARLIKE_KEYWORDS===name or in_lvar_define_state) and not lasttok===/^(\.|::)$/
- when /^[A-Z]/; is_const=true;not lasttok==='.' #this is the right algorithm for constants...
+ when /[^a-z_0-9]$/i #do nothing
+ when /^[a-z_]/
+ (localvars===name or
+ VARLIKE_KEYWORDS===name or
+ was_in_lvar_define_state
+ ) and not lasttok===/^(\.|::)$/
+ when /^[A-Z]/
+ is_const=true
+ not lasttok==='.' #this is the right algorithm for constants...
end
assert(@moretokens.empty?)
oldlast=@last_operative_token
- tok=@last_operative_token=VarNameToken.new(name,pos)
+ tok=set_last_token assign_lvar_type!(VarNameToken.new(name,pos))
oldpos= input_position
sawnl=false
result=ws_toks=ignored_tokens(true) {|nl| sawnl=true }
if sawnl || eof?
- if maybe_local then
- if in_lvar_define_state
- if /^[a-z_][a-zA-Z_0-9]*$/===name
- assert !(lasttok===/^(\.|::)$/)
- localvars[name]=true
- else
- lexerror tok,"not a valid variable name: #{name}"
- end
- return result.unshift(tok)
+ if was_in_lvar_define_state
+ if /^[a-z_][a-zA-Z_0-9]*$/===name
+ assert !(lasttok===/^(\.|::)$/)
+ localvars[name]=true
+ else
+ lexerror tok,"not a valid variable name: #{name}"
end
+ return result.unshift(tok)
+ elsif maybe_local
return result.unshift(tok) #if is_const
else
return result.unshift(
MethNameToken.new(name,pos), #insert implicit parens right after tok
ImplicitParamListStartToken.new( oldpos),
@@ -453,20 +519,22 @@
#then omit implicit parens
assignment_coming=case nc=nextchar
when ?=; not /^=[>=~]$/===readahead(2)
when ?,; comma_in_lvalue_list?
when ?); last_context_not_implicit.lhs
+ when ?i; /^in[^a-zA-Z_0-9]/===readahead(3) and
+ ForSMContext===last_context_not_implicit
when ?>,?<; /^(.)\1=$/===readahead(3)
when ?*,?&; /^(.)\1?=/===readahead(3)
when ?|; /^\|\|?=/===readahead(3) or
#is it a goalpost?
BlockParamListLhsContext===last_context_not_implicit &&
readahead(2)[1] != ?|
when ?%,?/,?-,?+,?^; readahead(2)[1]== ?=
end
- if (assignment_coming && !(lasttok===/^(\.|::)$/) or in_lvar_define_state)
- tok=VarNameToken.new(name,pos)
+ if (assignment_coming && !(lasttok===/^(\.|::)$/) or was_in_lvar_define_state)
+ tok=assign_lvar_type! VarNameToken.new(name,pos)
if /[^a-z_0-9]$/i===name
lexerror tok,"not a valid variable name: #{name}"
elsif /^[a-z_]/===name and !(lasttok===/^(\.|::)$/)
localvars[name]=true
end
@@ -474,59 +542,130 @@
end
implicit_parens_to_emit=
if assignment_coming
@parsestack.push AssignmentContext.new(nil) if nc==?% or nc==?/
- 0
+ IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT
else
case nc
when nil: 2
- when ?!; readahead(2)=='!=' ? 2 : 1
+ when ?!; /^![=~]$/===readahead(2) ? 2 : 1
+ when ?d;
+ if /^do([^a-zA-Z0-9_]|$)/===readahead(3)
+ if maybe_local and expecting_do?
+ ty=VarNameToken
+ 0
+ else
+ maybe_local=false
+ 2
+ end
+ else
+ 1
+ end
when NEVERSTARTPARAMLISTFIRST
(NEVERSTARTPARAMLISTWORDS===readahead(NEVERSTARTPARAMLISTMAXLEN)) ? 2 : 1
- when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~; 1
+ when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~; 1 #"
when ?{
maybe_local=false
+ 1
+=begin
x=2
x-=1 if /\A(return|break|next)\Z/===name and
!(KeywordToken===oldlast and oldlast===/\A(\.|::)\Z/)
x
+=end
when ?(;
- maybe_local=false; !(ws_toks.empty? or lasttok===/^(\.|::)$/)? 1 : 0
+ maybe_local=false
+ lastid=lasttok&&lasttok.ident
+ case lastid
+ when /\A[;(]|do\Z/: was_after_nonid_op=false
+ when '|': was_after_nonid_op=false unless BlockParamListLhsContext===@parsestack.last
+ when '{': was_after_nonid_op=false if BlockContext===@parsestack.last or BeginEndContext===@parsestack.last
+ end if KeywordToken===lasttok
+ was_after_nonid_op=false if NewlineToken===lasttok or lasttok.nil?
+ want_parens=!(ws_toks.empty? or was_after_nonid_op) #or
+# /^(::|rescue|yield|else|case|when|if|unless|until|while|and|or|&&|\|\||[?:]|\.\.?\.?|=>)$/===lastid or
+# MethNameToken===lasttok or
+# RUBYNONSYMOPERATORREX===lastid && /=$/===lastid && '!='!=lastid
+# )
+
+ #look ahead for closing paren (after some whitespace...)
+ want_parens=false if @file.match? /\A.(?:\s|\v|\#.*\n)*\)/
+# afterparen=@file.pos
+# getchar
+# ignored_tokens(true)
+# want_parens=false if nextchar==?)
+# @file.pos=afterparen
+
+ want_parens ? 1 : 0
when ?},?],?),?;,?^, ?|, ?>, ?,, ?., ?=; 2
- when ?+, ?-, ?*, ?&, ?%, ?/; (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
- when ?:,??; next2=readahead(2);
- WHSPLF[next2[1].chr] || next2=='::' ? 2 : 3
-# when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
- when ?<; (ws_toks.empty? || readahead(3)[/^<<["'`a-zA-Z_0-9-]/]) ? 3 : 2
- when ?[; ws_toks.empty? ? 2 : 3
+ when ?+, ?-, ?%, ?/
+ if /^(return|break|next)$/===@last_operative_token.ident and not(
+ KeywordToken===lasttok and /^(.|::)$/===lasttok.ident
+ )
+ 1
+ else
+ (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
+ end
+ when ?*, ?&
+ lasttok=@last_operative_token
+ if /^(return|break|next)$/===@last_operative_token.ident and not(
+ KeywordToken===lasttok and /^(.|::)$/===lasttok.ident
+ )
+ 1
+ else
+ (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}*&]/o]) ? 2 : 3
+ end
+ when ?:
+ next2=readahead(2)
+ if /^:(?:[#{WHSPLF}]|(:))$/o===next2 then
+ $1 && !ws_toks.empty? ? 3 : 2
+ else
+ 3
+ end
+ when ??; next3=readahead(3);
+ /^\?([#{WHSPLF}]|[a-z_][a-z_0-9])/io===next3 ? 2 : 3
+# when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
+ when ?<; (!ws_toks.empty? && readahead(4)[/^<<-?["'`a-zA-Z_0-9]/]) ? 3 : 2
+ when ?[; ws_toks.empty?&&!(KeywordToken===oldlast and /^(return|break|next)$/===oldlast.ident) ? 2 : 3
when ?\\, ?\s, ?\t, ?\n, ?\r, ?\v, ?#; raise 'failure'
else raise "unknown char after ident: #{nc=nextchar ? nc.chr : "<<EOF>>"}"
end
end
- if is_const and implicit_parens_to_emit==3 then
+ if is_const and implicit_parens_to_emit==3 then #needed?
implicit_parens_to_emit=1
end
- tok=if maybe_local and implicit_parens_to_emit>=2
+ if maybe_local and implicit_parens_to_emit>=2
implicit_parens_to_emit=0
- VarNameToken
+ ty=VarNameToken
else
- MethNameToken
- end.new(name,pos)
+ ty||=MethNameToken
+ end
+ tok=assign_lvar_type!(ty.new(name,pos))
+
case implicit_parens_to_emit
when 2;
result.unshift ImplicitParamListStartToken.new(oldpos),
ImplicitParamListEndToken.new(oldpos)
when 1,3;
arr,pass=*param_list_coming_with_2_or_more_params?
result.push( *arr )
unless pass
+ #only 1 param in list
result.unshift ImplicitParamListStartToken.new(oldpos)
- @parsestack.push ParamListContextNoParen.new(@linenum)
+ last=result.last
+ last.set_callsite! false if last.respond_to? :callsite? and last.callsite? #KeywordToken===last and last.ident==')'
+ if /^(break|next|return)$/===name and
+ !(KeywordToken===lasttok and /^(.|::)$/===lasttok.ident)
+ ty=KWParamListContextNoParen
+ else
+ ty=ParamListContextNoParen
+ end
+ @parsestack.push ty.new(@linenum)
end
when 0; #do nothing
else raise 'invalid value of implicit_parens_to_emit'
end
return result.unshift(tok)
@@ -545,28 +684,32 @@
WHSPCHARS[prevchar] && (?(==nextchar) or return [[],false]
basesize=@parsestack.size
result=[get1token]
pass=loop{
tok=get1token
- result<<tok
+ result << tok
if @parsestack.size==basesize
break false
elsif ','==tok.to_s and @parsestack.size==basesize+1
break true
+ elsif OperatorToken===tok and /^[&*]$/===tok.ident and tok.unary and @parsestack.size==basesize+1
+ break true
elsif EoiToken===tok
lexerror tok, "unexpected eof in parameter list"
end
}
return [result,pass]
end
#-----------------------------------
- CONTEXT2ENDTOK={AssignmentRhsContext=>AssignmentRhsListEndToken,
- ParamListContextNoParen=>ImplicitParamListEndToken,
- WhenParamListContext=>KwParamListEndToken,
- RescueSMContext=>KwParamListEndToken
- }
+ CONTEXT2ENDTOK={
+ AssignmentRhsContext=>AssignmentRhsListEndToken,
+ ParamListContextNoParen=>ImplicitParamListEndToken,
+ KWParamListContextNoParen=>ImplicitParamListEndToken,
+ WhenParamListContext=>KwParamListEndToken,
+ RescueSMContext=>KwParamListEndToken
+ }
def abort_noparens!(str='')
#assert @moretokens.empty?
result=[]
while klass=CONTEXT2ENDTOK[@parsestack.last.class]
result << klass.new(input_position-str.length)
@@ -574,32 +717,89 @@
@parsestack.pop
end
return result
end
-if false #no longer used
+ #-----------------------------------
+ CONTEXT2ENDTOK_FOR_RESCUE={
+ AssignmentRhsContext=>AssignmentRhsListEndToken,
+ ParamListContextNoParen=>ImplicitParamListEndToken,
+ KWParamListContextNoParen=>ImplicitParamListEndToken,
+ WhenParamListContext=>KwParamListEndToken,
+ RescueSMContext=>KwParamListEndToken
+ }
+ def abort_noparens_for_rescue!(str='')
+ #assert @moretokens.empty?
+ result=[]
+ ctx=@parsestack.last
+ while klass=CONTEXT2ENDTOK_FOR_RESCUE[ctx.class]
+ break if AssignmentRhsContext===ctx && !ctx.multi_assign?
+ if ParamListContextNoParen===ctx && AssignmentRhsContext===@parsestack[-2]
+ result.push ImplicitParamListEndToken.new(input_position-str.length),
+ AssignmentRhsListEndToken.new(input_position-str.length)
+ @parsestack.pop
+ @parsestack.pop
+ break
+ end
+ result << klass.new(input_position-str.length) #unless AssignmentRhsContext===ctx and !ctx.multi_assign?
+ break if RescueSMContext===ctx #why is this here?
+ @parsestack.pop
+ ctx=@parsestack.last
+ end
+ return result
+ end
+
+ #-----------------------------------
+ CONTEXT2ENDTOK_FOR_DO={
+ AssignmentRhsContext=>AssignmentRhsListEndToken,
+ ParamListContextNoParen=>ImplicitParamListEndToken,
+ ExpectDoOrNlContext=>1,
+ #WhenParamListContext=>KwParamListEndToken,
+ #RescueSMContext=>KwParamListEndToken
+ }
+ def abort_noparens_for_do!(str='')
+ #assert @moretokens.empty?
+ result=[]
+ while klass=CONTEXT2ENDTOK_FOR_DO[@parsestack.last.class]
+ break if klass==1
+ result << klass.new(input_position-str.length)
+ @parsestack.pop
+ end
+ return result
+ end
+
+ #-----------------------------------
+ def expecting_do?
+ @parsestack.reverse_each{|ctx|
+ next if AssignmentRhsContext===ctx
+ return !!CONTEXT2ENDTOK_FOR_DO[ctx.class]
+ }
+ return false
+ end
+
#-----------------------------------
def abort_1_noparen!(offs=0)
assert @moretokens.empty?
result=[]
while AssignmentRhsContext===@parsestack.last
@parsestack.pop
result << AssignmentRhsListEndToken.new(input_position-offs)
end
- ParamListContextNoParen===@parsestack.last or lexerror huh,'{} with no matching callsite'
+ if ParamListContextNoParen===@parsestack.last #or lexerror huh,'{} with no matching callsite'
@parsestack.pop
result << ImplicitParamListEndToken.new(input_position-offs)
+ end
return result
end
-end
#-----------------------------------
#parse keywords now, to prevent confusion over bare symbols
#and match end with corresponding preceding def or class or whatever.
#if arg is not a keyword, the block is called
def parse_keywords(str,offset)
assert @moretokens.empty?
+ assert !(KeywordToken===@last_operative_token and /A(.|::|def)\Z/===@last_operative_token.ident)
result=[KeywordToken.new(str,offset)]
case str
when "end"
result.unshift(*abort_noparens!(str))
@@ -617,15 +817,19 @@
start,line=ctx.starter,ctx.linenum
BEGINWORDS===start or lexerror result.last, "end does not match #{start or "nil"}"
/^(do)$/===start and localvars.end_block
/^(class|module|def)$/===start and @localvars_stack.pop
- when "class","module"
+ when "module"
result.first.has_end!
@parsestack.push WantsEndContext.new(str,@linenum)
@localvars_stack.push SymbolTable.new
+ when "class"
+ result.first.has_end!
+ @parsestack.push ClassContext.new(str,@linenum)
+
when "if","unless" #could be infix form without end
if after_nonid_op?{false} #prefix form
result.first.has_end!
@parsestack.push WantsEndContext.new(str,@linenum)
@@ -651,26 +855,27 @@
# corresponding EndToken emitted leaving ForContext ("in" branch, below)
@parsestack.push WantsEndContext.new(str,@linenum)
#expect_do_or_end_or_nl! str #handled by ForSMContext now
@parsestack.push ForSMContext.new(@linenum)
when "do"
- result.unshift(*abort_noparens!(str))
+ result.unshift(*abort_noparens_for_do!(str))
if ExpectDoOrNlContext===@parsestack.last
@parsestack.pop
assert WantsEndContext===@parsestack.last
+ result.last.as=";"
else
result.last.has_end!
@parsestack.push WantsEndContext.new(str,@linenum)
localvars.start_block
block_param_list_lookahead
end
when "def"
result.first.has_end!
- @parsestack.push WantsEndContext.new("def",@linenum)
- @localvars_stack.push SymbolTable.new
+ @parsestack.push ctx=DefContext.new(@linenum)
+ ctx.state=:saw_def
safe_recurse { |aa|
- @last_operative_token=KeywordToken.new "def" #hack
+ set_last_token KeywordToken.new "def" #hack
result.concat ignored_tokens
#read an expr like a.b.c or a::b::c
#or (expr).b.c
if nextchar==?( #look for optional parenthesised head
@@ -681,14 +886,15 @@
case tok
when/^\($/.token_pat then parencount+=1
when/^\)$/.token_pat then parencount-=1
end
EoiToken===tok and lexerror tok, "eof in def header"
- result<<tok
+ result << tok
end until parencount==0 #@parsestack.size==old_size
- else #no parentheses, all tail
- @last_operative_token=KeywordToken.new "." #hack hack
+ @localvars_stack.push SymbolTable.new
+ else #no parentheses, all tail
+ set_last_token KeywordToken.new "." #hack hack
tokindex=result.size
result << tok=symbol(false,false)
name=tok.to_s
assert !in_lvar_define_state
@@ -698,93 +904,126 @@
when /^[@$]/; true
when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS; ty=KeywordToken
when /^[a-z_]/; localvars===name
when /^[A-Z]/; is_const=true #this is the right algorithm for constants...
end
- if !ty and maybe_local
- result.push( *ignored_tokens(false,false) )
- nc=nextchar
+ result.push( *ignored_tokens(false,false) )
+ nc=nextchar
+ if !ty and maybe_local
if nc==?: || nc==?.
ty=VarNameToken
end
end
- unless ty
- ty=MethNameToken
- endofs=tok.offset+tok.to_s.length
- result[tokindex+1...tokindex+1]=
- [ImplicitParamListStartToken.new(endofs),ImplicitParamListEndToken.new(endofs)]
+ if ty.nil? or (ty==KeywordToken and nc!=?: and nc!=?.)
+ ty=MethNameToken
+ if nc != ?(
+ endofs=tok.offset+tok.to_s.length
+ newtok=ImplicitParamListStartToken.new(endofs)
+ result.insert tokindex+1, newtok
+ end
end
assert result[tokindex].equal?(tok)
- result[tokindex]=ty.new(tok.to_s,tok.offset)
+ var=assign_lvar_type! ty.new(tok.to_s,tok.offset)
+ @localvars_stack.push SymbolTable.new
+ var.in_def=true if inside_method_def? and var.respond_to? :in_def=
+ result[tokindex]=var
- #if a.b.c.d is seen, a, b, and c
+ #if a.b.c.d is seen, a, b and c
#should be considered maybe varname instead of methnames.
#the last (d in the example) is always considered a methname;
#it's what's being defined.
#b and c should be considered varnames only if
#they are capitalized and preceded by :: .
#a could even be a keyword (eg self or block_given?).
end
#read tail: .b.c.d etc
- result.reverse_each{|res| break @last_operative_token=res unless StillIgnoreToken===res}
- ###@last_operative_token=result.last #naive
+ result.reverse_each{|res| break set_last_token res unless StillIgnoreToken===res}
assert !(IgnoreToken===@last_operative_token)
state=:expect_op
@in_def_name=true
loop do
#look for start of parameter list
nc=(@moretokens.empty? ? nextchar.chr : @moretokens.first.to_s[0,1])
if state==:expect_op and /^[a-z_(&*]/i===nc
- result.concat def_param_list
+ ctx.state=:def_param_list
+ list,listend=def_param_list
+ result.concat list
+ end_index=result.index(listend)
+ ofs=listend.offset
+ if endofs
+ result.insert end_index,ImplicitParamListEndToken.new(ofs)
+ else
+ ofs+=listend.to_s.size
+ end
+ result.insert end_index+1,EndDefHeaderToken.new(ofs)
break
end
tok=get1token
- result<<tok
+ result<< tok
case tok
when EoiToken
lexerror tok,'unexpected eof in def header'
when StillIgnoreToken
when MethNameToken ,VarNameToken # /^[a-z_]/i.token_pat
lexerror tok,'expected . or ::' unless state==:expect_name
state=:expect_op
when /^(\.|::)$/.token_pat
lexerror tok,'expected ident' unless state==:expect_op
+ if endofs
+ result.insert -2, ImplicitParamListEndToken.new(endofs)
+ endofs=nil
+ end
state=:expect_name
when /^(;|end)$/.token_pat, NewlineToken #are we done with def name?
+ ctx.state=:def_body
state==:expect_op or lexerror tok,'expected identifier'
+ if endofs
+ result.insert -2,ImplicitParamListEndToken.new(tok.offset)
+ end
+ result.insert -2, EndDefHeaderToken.new(tok.offset)
break
else
lexerror(tok, "bizarre token in def name: " +
"#{tok}:#{tok.class}")
end
end
@in_def_name=false
}
when "alias"
safe_recurse { |a|
- @last_operative_token=KeywordToken.new "alias" #hack
+ set_last_token KeywordToken.new "alias" #hack
result.concat ignored_tokens
res=symbol(eat_next_if(?:),false)
- res ? result<<res : lexerror(result.first,"bad symbol in alias")
- @last_operative_token=KeywordToken.new "alias" #hack
- result.concat ignored_tokens
- res=symbol(eat_next_if(?:),false)
- res ? result<<res : lexerror(result.first,"bad symbol in alias")
+ unless res
+ lexerror(result.first,"bad symbol in alias")
+ else
+ res.ident[0]==?$ and res=VarNameToken.new(res.ident,res.offset)
+ result<< res
+ set_last_token KeywordToken.new "alias" #hack
+ result.concat ignored_tokens
+ res=symbol(eat_next_if(?:),false)
+ unless res
+ lexerror(result.first,"bad symbol in alias")
+ else
+ res.ident[0]==?$ and res=VarNameToken.new(res.ident,res.offset)
+ result<< res
+ end
+ end
}
when "undef"
safe_recurse { |a|
loop do
- @last_operative_token=KeywordToken.new "," #hack
+ set_last_token KeywordToken.new "," #hack
result.concat ignored_tokens
tok=symbol(eat_next_if(?:),false)
tok or lexerror(result.first,"bad symbol in undef")
result<< tok
- @last_operative_token=tok
+ set_last_token tok
assert !(IgnoreToken===@last_operative_token)
sawnl=false
result.concat ignored_tokens(true){|nl| sawnl=true}
@@ -807,17 +1046,17 @@
when "rescue"
unless after_nonid_op? {false}
#rescue needs to be treated differently when in operator context...
#i think no RescueSMContext should be pushed on the stack...
- #plus, the rescue token should be marked as infix
- result.first.set_infix!
+ result.first.set_infix! #plus, the rescue token should be marked as infix
+ result.unshift(*abort_noparens_for_rescue!(str))
else
result.push KwParamListStartToken.new(offset+str.length)
#corresponding EndToken emitted by abort_noparens! on leaving rescue context
- result.unshift(*abort_noparens!(str))
@parsestack.push RescueSMContext.new(@linenum)
+ result.unshift(*abort_noparens!(str))
end
when "then"
result.unshift(*abort_noparens!(str))
@parsestack.last.see self,:then
@@ -829,20 +1068,47 @@
when /\A(#{BINOPWORDS}|#{INNERBOUNDINGWORDS})\Z/o
result.unshift(*abort_noparens!(str))
when /\A(return|break|next)\Z/
- result=yield
- result.first.has_no_block! unless KeywordToken===@last_operative_token and @last_operative_token===/\A(\.|::)\Z/
+ fail if KeywordToken===@last_operative_token and @last_operative_token===/\A(\.|::)\Z/
+ tok=KeywordToken.new(str,offset)
+ result=yield tok
+ result[0]=tok
+ tok.has_no_block!
+
+
+ when 'END'
+ #END could be treated, lexically, just as if it is an
+ #ordinary method, except that local vars created in
+ #END blocks are visible to subsequent code. (Why??)
+ #That difference forces a custom parsing.
+ if @last_operative_token===/^(\.|::)$/
+ result=yield nil #should pass a keyword token here
+ else
+ safe_recurse{
+ old=result.first
+ result=[
+ MethNameToken.new(old.ident,old.offset),
+ ImplicitParamListStartToken.new(input_position),
+ ImplicitParamListEndToken.new(input_position),
+ *ignored_tokens
+ ]
+ getchar=='{' or lexerror(result.first,"expected { after #{str}")
+ result.push KeywordToken.new('{',input_position-1)
+ result.last.set_infix!
+ @parsestack.push BeginEndContext.new(str,offset)
+ }
+ end
when FUNCLIKE_KEYWORDS
- result=yield
+ result=yield nil #should be a keyword token
when RUBYKEYWORDS
#do nothing
- else result=yield
+ else result=yield nil
end
return result
end
@@ -879,15 +1145,15 @@
#-----------------------------------
def block_param_list_lookahead
safe_recurse{ |la|
- @last_operative_token=KeywordToken.new ';'
+ set_last_token KeywordToken.new ';'
a=ignored_tokens
if eat_next_if(?|)
- a<<KeywordToken.new("|", input_position-1)
+ a<< KeywordToken.new("|", input_position-1)
if true
@parsestack.push mycontext=BlockParamListLhsContext.new(@linenum)
nextchar==?| and a.push NoWsToken.new(input_position)
else
if eat_next_if(?|)
@@ -907,29 +1173,29 @@
when AssignmentRhsListStartToken; @defining_lvar=false
when AssignmentRhsListEndToken; parsestack_lastnonassign_is?(mycontext) and @defining_lvar=true
end
tok==='|' and parsestack_lastnonassign_is?(mycontext) and break
- a<<tok
+ a<< tok
end
assert@defining_lvar || AssignmentRhsContext===@parsestack.last
@defining_lvar=false
while AssignmentRhsContext===@parsestack.last
a.push( *abort_noparens!('|') )
end
@parsestack.last.object_id==mycontext.object_id or raise 'expected my BlockParamListLhsContext atop @parsestack'
@parsestack.pop
- a<<KeywordToken.new('|',tok.offset)
+ a<< KeywordToken.new('|',tok.offset)
@moretokens.empty? or
fixme %#moretokens might be set from get1token call above...might be bad#
end
end
end
- @last_operative_token=KeywordToken.new ';'
+ set_last_token KeywordToken.new ';'
#a.concat ignored_tokens
#assert @last_operative_token===';'
#a<<get1token
@@ -946,10 +1212,11 @@
#the matching endbrace is found
def def_param_list
@in_def_name=false
result=[]
normal_comma_level=old_parsestack_size=@parsestack.size
+ listend=nil
safe_recurse { |a|
assert(@moretokens.empty?)
assert((not IgnoreToken===@moretokens[0]))
assert((@moretokens[0] or not nextchar.chr[WHSPCHARS]))
@@ -970,100 +1237,123 @@
end
class << endingblock
alias === call
end
- @last_operative_token=KeywordToken.new ',' #hack
+ set_last_token KeywordToken.new ',' #hack
#read local parameter names
+ nextvar=nil
loop do
expect_name=(@last_operative_token===',' and
normal_comma_level==@parsestack.size)
expect_name and @defining_lvar||=true
result << tok=get1token
- lexerror tok, "unexpected eof in def header" if EoiToken===tok
+ break lexerror(tok, "unexpected eof in def header") if EoiToken===tok
#break if at end of param list
- endingblock===tok and
- old_parsestack_size>=@parsestack.size and break
+ if endingblock===tok and old_parsestack_size>=@parsestack.size
+ nextvar and localvars[nextvar]=true #add nextvar to local vars
+ listend=tok
+ break
+ end
#next token is a local var name
#(or the one after that if unary ops present)
#result.concat ignored_tokens
if expect_name
case tok
when IgnoreToken #, /^[A-Z]/ #do nothing
when /^,$/.token_pat #hack
-
-
+
when VarNameToken
assert@defining_lvar
@defining_lvar=false
assert((not @last_operative_token===','))
+# assert !nextvar
+ nextvar=tok.ident
+ localvars[nextvar]=false #remove nextvar from list of local vars for now
when /^[&*]$/.token_pat #unary form...
#a NoWsToken is also expected... read it now
result.concat maybe_no_ws_token #not needed?
- @last_operative_token=KeywordToken.new ','
+ set_last_token KeywordToken.new ','
else
lexerror tok,"unfamiliar var name '#{tok}'"
end
- elsif /^,$/.token_pat===tok and
- normal_comma_level+1==@parsestack.size and
- AssignmentRhsContext===@parsestack.last
- #seeing comma here should end implicit rhs started within the param list
- result[-1,0]=AssignmentRhsListEndToken.new(tok.offset)
- @parsestack.pop
+ elsif /^,$/.token_pat===tok
+ if normal_comma_level+1==@parsestack.size and
+ AssignmentRhsContext===@parsestack.last
+ #seeing comma here should end implicit rhs started within the param list
+ result << AssignmentRhsListEndToken.new(tok.offset)
+ @parsestack.pop
+ end
+ if nextvar and normal_comma_level==@parsestack.size
+ localvars[nextvar]=true #now, finally add nextvar back to local vars
+ nextvar
+ end
end
end
@defining_lvar=false
-
+ @parsestack.last.see self,:semi
assert(@parsestack.size <= old_parsestack_size)
- assert(endingblock[tok])
+ assert(endingblock[tok] || ErrorToken===tok)
#hack: force next token to look like start of a
#new stmt, if the last ignored_tokens
#call above did not find a newline
#(just in case the next token parsed
#happens to call quote_expected? or after_nonid_op)
result.concat ignored_tokens
- if nextchar.chr[/[iuw\/<|>+\-*&%?:]/] and
- !(NewlineToken===@last_operative_token) and
- !(/^(end|;)$/===@last_operative_token)
- @last_operative_token=KeywordToken.new ';'
+# if !eof? and nextchar.chr[/[iuw\/<|>+\-*&%?:({]/] and
+# !(NewlineToken===@last_operative_token) and
+# !(/^(end|;)$/===@last_operative_token)
+ #result<<EndDefHeaderToken.new(result.last.offset+result.last.to_s.size)
+ set_last_token KeywordToken.new ';'
result<< get1token
- end
+# end
}
- return result
+ return result,listend
end
#-----------------------------------
#handle % in ruby code. is it part of fancy quote or a modulo operator?
def percent(ch)
- if quote_expected? ch
+ if AssignmentContext===@parsestack.last
+ @parsestack.pop
+ op=true
+ end
+
+ if !op and quote_expected?(ch) ||
+ (@last_operative_token===/^(return|next|break)$/ and KeywordToken===@last_operative_token)
fancy_quote ch
- else
+ else
biop ch
- end
+ end
end
#-----------------------------------
#handle * & in ruby code. is unary or binary operator?
def star_or_amp(ch)
- assert('*&'[ch])
- want_unary=unary_op_expected? ch
- result=(quadriop ch)
- if want_unary
- #readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
- assert OperatorToken===result
- result.unary=true #result should distinguish unary+binary *&
- WHSPLF[nextchar.chr] or
- @moretokens << NoWsToken.new(input_position)
- end
- result
+ assert('*&'[ch])
+ want_unary=unary_op_expected?(ch) ||
+ (@last_operative_token===/^(return|next|break)$/ and KeywordToken===@last_operative_token)
+ result=quadriop(ch)
+ if want_unary
+ #readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
+ assert OperatorToken===result
+ result.unary=true #result should distinguish unary+binary *&
+ WHSPLF[nextchar.chr] or
+ @moretokens << NoWsToken.new(input_position)
+ comma_in_lvalue_list?
+ if ch=='*'
+ @parsestack.last.see self, :splat
+ end
+ end
+ result
end
#-----------------------------------
#handle ? in ruby code. is it part of ?..: or a character literal?
def char_literal_or_op(ch)
@@ -1077,19 +1367,27 @@
end
#-----------------------------------
def regex_or_div(ch)
#space after slash always means / operator, rather than regex start
- if after_nonid_op?{ !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/\s}] }
- return regex(ch)
- else #/ is operator
- result=getchar
- if eat_next_if(?=)
- result << '='
- end
- return(operator_or_methname_token result)
- end
+ #= after slash always means /= operator, rather than regex start
+ if AssignmentContext===@parsestack.last
+ @parsestack.pop
+ op=true
+ end
+
+ if !op and after_nonid_op?{
+ !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/[\s\v=]}]
+ } || (KeywordToken===@last_token_maybe_implicit and @last_token_maybe_implicit.ident=="(")
+ return regex(ch)
+ else #/ is operator
+ result=getchar
+ if eat_next_if(?=)
+ result << '='
+ end
+ return(operator_or_methname_token result)
+ end
end
#-----------------------------------
#return true if last tok corresponds to a variable or constant,
#false if its for a method, nil for something else
@@ -1099,12 +1397,12 @@
(tok=@last_operative_token)
s=tok.to_s
case s
when /[^a-z_0-9]$/i; false
- when /^[a-z_]/; localvars===s or VARLIKE_KEYWORDS===s
- when /^[A-Z]/; VarNameToken===tok
+# when /^[a-z_]/; localvars===s or VARLIKE_KEYWORDS===s
+ when /^[A-Z_]/i; VarNameToken===tok
when /^[@$<]/; true
else raise "not var or method name: #{s}"
end
end
@@ -1137,22 +1435,26 @@
#look for another colon; return single : if not found
unless eat_next_if(?:)
#cancel implicit contexts...
@moretokens.push(*abort_noparens!(':'))
+ @moretokens.push KeywordToken.new(':',startpos)
- #end ternary context, if any
- @parsestack.last.see self,:colon
-
- TernaryContext===@parsestack.last and @parsestack.pop #should be in the context's see handler
-
- if ExpectDoOrNlContext===@parsestack.last #should be in the context's see handler
+ case @parsestack.last
+ when TernaryContext: @parsestack.pop #should be in the context's see handler
+ when ExpectDoOrNlContext: #should be in the context's see handler
@parsestack.pop
assert @parsestack.last.starter[/^(while|until|for)$/]
+ @moretokens.last.as=";"
+ when RescueSMContext:
+ @moretokens.last.as=";"
+ else @moretokens.last.as="then"
end
- @moretokens.push KeywordToken.new(':',startpos)
+ #end ternary context, if any
+ @parsestack.last.see self,:colon
+
return @moretokens.shift
end
#we definately found a ::
@@ -1180,13 +1482,19 @@
#look for operators
opmatches=readahead(3)[RUBYSYMOPERATORREX]
result= opmatches ? read(opmatches.size) :
case nc=nextchar
- when ?" then assert notbare;double_quote('"')
- when ?' then assert notbare;double_quote("'")
- when ?` then read(1)
+ when ?" #"
+ assert notbare
+ open=':"'; close='"'
+ double_quote('"')
+ when ?' #'
+ assert notbare
+ open=":'"; close="'"
+ single_quote("'")
+ when ?` then read(1) #`
when ?@ then at_identifier.to_s
when ?$ then dollar_identifier.to_s
when ?_,?a..?z then identifier_as_string(?:)
when ?A..?Z then
result=identifier_as_string(?:)
@@ -1195,11 +1503,16 @@
/[A-Z_0-9]$/i===result and klass=VarNameToken
end
result
else error= "unexpected char starting symbol: #{nc.chr}"
end
- return lexerror(klass.new(result,start),error)
+ result= lexerror(klass.new(result,start,notbare ? ':' : ''),error)
+ if open
+ result.open=open
+ result.close=close
+ end
+ return result
end
def merge_assignment_op_in_setter_callsites?
false
end
@@ -1209,16 +1522,16 @@
#look for operators
opmatches=readahead(3)[RUBYSYMOPERATORREX]
return [opmatches ? read(opmatches.size) :
case nc=nextchar
- when ?` then read(1)
+ when ?` then read(1) #`
when ?_,?a..?z,?A..?Z then
context=merge_assignment_op_in_setter_callsites? ? ?: : nc
identifier_as_string(context)
else
- @last_operative_token=KeywordToken.new(';')
+ set_last_token KeywordToken.new(';')
lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}")
nil
end, start
]
end
@@ -1231,24 +1544,67 @@
quote=eat_next_if( /['"`]/)
if quote
ender=til_charset(/[#{quote}]/)
(quote==getchar) or
return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc")
+ quote_real=true
else
quote='"'
ender=til_charset(/[^a-zA-Z0-9_]/)
ender.length >= 1 or
- return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "invalid here header")
+ return lexerror(HerePlaceholderToken.new( dash, quote, ender, nil ), "invalid here header")
end
- res= HerePlaceholderToken.new( dash, quote, ender )
+ res= HerePlaceholderToken.new( dash, quote, ender, quote_real )
+if true
+ res.open=["<<",dash,quote,ender,quote].to_s
+ procrastinated=til_charset(/[\n]/)#+readnl
+ unless @base_file
+ @base_file=@file
+ @file=Sequence::List.new([@file])
+ @file.pos=@base_file.pos
+ end
+ #actually delete procrastinated from input
+ @file.delete(input_position_raw-procrastinated.size...input_position_raw)
+
+ nl=readnl or return lexerror(res, "here header without body (at eof)")
+
+ @moretokens<< res
+ bodystart=input_position
+ @offset_adjust = @min_offset_adjust+procrastinated.size
+ #was: @offset_adjust += procrastinated.size
+ body=here_body(res)
+ res.close=body.close
+ @offset_adjust = @min_offset_adjust
+ #was: @offset_adjust -= procrastinated.size
+ bodysize=input_position-bodystart
+
+ #one or two already read characters are overwritten here,
+ #in order to keep offsets correct in the long term
+ #(at present, offsets and line numbers between
+ #here header and its body will be wrong. but they should re-sync thereafter.)
+ newpos=input_position_raw-nl.size
+ #unless procrastinated.empty?
+ @file.modify(newpos,nl.size,procrastinated+nl) #vomit procrastinated text back onto input
+ #end
+ input_position_set newpos
+
+ #line numbers would be wrong within the procrastinated section
+ @linenum-=1
+
+ #be nice to get the here body token at the right place in input, too...
+ @pending_here_bodies<< body
+ @offset_adjust-=bodysize#+nl.size
+
+ return @moretokens.shift
+else
@incomplete_here_tokens.push res
#hack: normally this should just be in get1token
#this fixup is necessary because the call the get1token below
#makes a recursion.
- @last_operative_token=res
+ set_last_token res
safe_recurse { |a|
assert(a.object_id==@moretokens.object_id)
toks=[]
begin
@@ -1267,11 +1623,11 @@
break
end
tok=get1token
assert(a.equal?( @moretokens))
- toks<<tok
+ toks<< tok
EoiToken===tok and lexerror tok, "here body expected before eof"
end while res.unsafe_to_use
assert(a.equal?( @moretokens))
a[0,0]= toks #same as a=toks+a, but keeps a's id
}
@@ -1279,17 +1635,18 @@
return res
#the action continues in newline, where
#the rest of the here token is read after a
#newline has been seen and res.affix is eventually called
+end
end
#-----------------------------------
def lessthan(ch) #match quadriop('<') or here doc or spaceship op
case readahead(3)
- when /^<<['"`\-a-z0-9_]$/i
- if quote_expected?(ch) #and @last_operative_token!='class' #not needed?
+ when /^<<['"`\-a-z0-9_]$/i #'
+ if quote_expected?(ch) and not @last_operative_token==='class'
here_header
else
operator_or_methname_token read(2)
end
when "<=>" then operator_or_methname_token read(3)
@@ -1307,105 +1664,235 @@
result+=nl
else
error='illegal escape sequence'
end
- @moretokens.unshift FileAndLineToken.new(@filename,ln=@linenum,input_position)
- optional_here_bodies
+ #optimization: when thru with regurgitated text from a here document,
+ #revert back to original unadorned Sequence instead of staying in the List.
+ if @base_file and indices=@file.instance_eval{@start_pos} and
+ (indices[-2]..indices[-1])===@file.pos
+ @base_file.pos=@file.pos
+ @file=@base_file
+ @base_file=nil
+ result="\n"
+ end
+
+ @offset_adjust=@min_offset_adjust
+ @moretokens.push *optional_here_bodies
+ ln=@linenum
+ @moretokens.push lexerror(EscNlToken.new(@filename,ln-1,result,input_position-result.size), error),
+ FileAndLineToken.new(@filename,ln,input_position)
- lexerror EscNlToken.new(@filename,ln-1,result,pos), error
+ start_of_line_directives
+
+ return @moretokens.shift
end
#-----------------------------------
def optional_here_bodies
-
+ result=[]
+if true
#handle here bodies queued up by previous line
- #(we should be more compatible with dos/mac style newlines...)
+ pos=input_position
+ while body=@pending_here_bodies.shift
+ #body.offset=pos
+ result.push EscNlToken.new(@filename,nil,"\n",body.offset-1)
+ result.push FileAndLineToken.new(@filename,body.ident.line,body.offset)
+ result.push body
+ #result.push NoWsToken.new @pending_here_bodies.empty? ? input_position : @pending_here_bodies.first
+ #result.push FileAndLineToken.new(@filename,@linenum,pos) #position and line num are off
+ body.headtok.line=@linenum-1
+ end
+else
+ #...(we should be more compatible with dos/mac style newlines...)
while tofill=@incomplete_here_tokens.shift
+ result.push(
+ here_body(tofill),
+ FileAndLineToken.new(@filename,@linenum,input_position)
+ )
+ assert(eof? || "\r\n"[prevchar])
+ tofill.line=@linenum-1
+ end
+end
+ return result
+ end
+
+ #-----------------------------------
+ def here_body(tofill)
+ close="\n"
tofill.string.offset= input_position
+ linecount=1 #for terminator
+ assert("\n"==prevchar)
loop {
- assert("\r\n"[prevchar])
+ assert("\n"==prevchar)
#here body terminator?
- oldpos= input_position
+ oldpos= input_position_raw
if tofill.dash
- til_charset(/[^#{WHSP}]/o)
+ close+=til_charset(/[^#{WHSP}]/o)
end
- break if eof?
- break if read(tofill.ender.size)==tofill.ender and readnl
+ break if eof? #this is an error, should be handled better
+ if read(tofill.ender.size)==tofill.ender
+ crs=til_charset(/[^\r]/)||''
+ if nl=readnl
+ close+=tofill.ender+crs+nl
+ break
+ end
+ end
input_position_set oldpos
+ assert("\n"==prevchar)
+
if tofill.quote=="'"
- line=til_charset(/[\r\n]/)+readnl
- line.gsub! "\\\\", "\\"
+ line=til_charset(/[\n]/)
+ unless nl=readnl
+ assert eof?
+ break #this is an error, should be handled better
+ end
+ line.chomp!("\r")
+ line<< "\n"
+ assert("\n"==prevchar)
+ #line.gsub! "\\\\", "\\"
tofill.append line
- assert(line[-1..-1][/[\r\n]/])
+ tofill.string.bs_handler=:squote_heredoc_esc_seq
+ linecount+=1
+ assert("\n"==line[-1,1])
+ assert("\n"==prevchar)
else
+ assert("\n"==prevchar)
+
back1char #-1 to make newline char the next to read
@linenum-=1
+ assert /[\r\n]/===nextchar.chr
+
#retr evrything til next nl
+if FASTER_STRING_ESCAPES
+ line=all_quote("\r\n", tofill.quote, "\r\n")
+else
line=all_quote(INET_NL_REX, tofill.quote, INET_NL_REX)
+end
+ linecount+=1
#(you didn't know all_quote could take a regex, did you?)
+ assert("\n"==prevchar)
+
#get rid of fals that otherwise appear to be in the middle of
#a string (and are emitted out of order)
fal=@moretokens.pop
assert FileAndLineToken===fal || fal.nil?
+ assert line.bs_handler
+ tofill.string.bs_handler||=line.bs_handler
+
+ tofill.append_token line
+ tofill.string.elems<<'' unless String===tofill.string.elems.last
+
+ assert("\n"==prevchar)
+
back1char
@linenum-=1
assert("\r\n"[nextchar.chr])
- tofill.append_token line
tofill.append readnl
+
+ assert("\n"==prevchar)
end
+
+ assert("\n"==prevchar)
}
+
- assert(eof? || "\r\n"[prevchar])
+ str=tofill.string
+ str.bs_handler||=:dquote_esc_seq if str.elems.size==1 and str.elems.first==''
tofill.unsafe_to_use=false
- tofill.line=@linenum-1
+ assert str.bs_handler
+ #?? or tofill.string.elems==[]
+
+
+ tofill.string.instance_eval{@char="`"} if tofill.quote=="`"
+ #special cased, but I think that's all that's necessary...
- @moretokens.push \
- tofill.bodyclass.new(tofill),
- FileAndLineToken.new(@filename,@linenum,input_position)
- end
-
+ result=tofill.bodyclass.new(tofill,linecount)
+ result.open=str.open=""
+ tofill.close=close
+ result.close=str.close=close[1..-1]
+ result.offset=str.offset
+ assert str.open
+ assert str.close
+ return result
end
#-----------------------------------
def newline(ch)
assert("\r\n"[nextchar.chr])
-
-
#ordinary newline handling (possibly implicitly escaped)
assert("\r\n"[nextchar.chr])
assert !@parsestack.empty?
assert @moretokens.empty?
- result=if NewlineToken===@last_operative_token or #hack
- @last_operative_token===/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
- !after_nonid_op?{false}
- then #hack-o-rama: probly cases left out above
- a= abort_noparens!
- ExpectDoOrNlContext===@parsestack.last and @parsestack.pop
- assert !@parsestack.empty?
- @parsestack.last.see self,:semi
- a << super(ch)
- @moretokens.replace a+@moretokens
- @moretokens.shift
- else
- offset= input_position
- nl=readnl
- @moretokens << FileAndLineToken.new(@filename,@linenum,input_position)
- EscNlToken.new(@filename,@linenum-1,nl,offset)
- #WsToken.new ' ' #why? #should be "\\\n" ?
- end
+ pre=FileAndLineToken.new(@filename,@linenum+1,input_position)
+ pre.allow_ooo_offset=true
- optional_here_bodies
+ if NewlineToken===@last_operative_token or #hack
+ (KeywordToken===@last_operative_token and
+ @last_operative_token.ident=="rescue" and
+ !@last_operative_token.infix?) or
+ #/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
+ !after_nonid_op?{false}
+ then #hack-o-rama: probly cases left out above
+ @offset_adjust=@min_offset_adjust
+ a= abort_noparens!
+ ExpectDoOrNlContext===@parsestack.last and @parsestack.pop
+ assert !@parsestack.empty?
+ @parsestack.last.see self,:semi
+ a << super(ch)
+ @moretokens.replace a+@moretokens
+ else
+ @offset_adjust=@min_offset_adjust
+ offset= input_position
+ nl=readnl
+ @moretokens.push EscNlToken.new(@filename,@linenum-1,nl,offset),
+ FileAndLineToken.new(@filename,@linenum,input_position)
+ end
+
+ #optimization: when thru with regurgitated text from a here document,
+ #revert back to original unadorned Sequence instead of staying in the list.
+ if @base_file and indices=@file.instance_eval{@start_pos} and
+ (indices[-2]..indices[-1])===@file.pos and Sequence::SubSeq===@file.list.last
+ @base_file.pos=@file.pos
+ @file=@base_file
+ @base_file=nil
+ end
+
+ fal=@moretokens.last
+ assert FileAndLineToken===fal
+
+ @offset_adjust=@min_offset_adjust
+
+ @moretokens.unshift(*optional_here_bodies)
+ result=@moretokens.shift
+
+ #adjust line count in fal to account for newlines in here bodys
+ i=@moretokens.size-1
+ while(i>=0)
+ #assert FileAndLineToken===@moretokens[i]
+ i-=1 if FileAndLineToken===@moretokens[i]
+ break unless HereBodyToken===@moretokens[i]
+ pre_fal=true
+ fal.line-=@moretokens[i].linecount
+
+ i-=1
+ end
+
+ if pre_fal
+ @moretokens.unshift result
+ pre.offset=result.offset
+ result=pre
+ end
start_of_line_directives
return result
end
@@ -1422,19 +1909,20 @@
startpos= input_position
more= read(EQBEGINLENGTH-1) #get =begin
begin
eof? and raise "eof before =end"
- more<<til_charset(/[\r\n]/)
- more<<readnl
+ more<< til_charset(/[\r\n]/)
+ eof? and raise "eof before =end"
+ more<< readnl
end until readahead(EQENDLENGTH)==EQEND
#read rest of line after =end
more << til_charset(/[\r\n]/)
- assert((?\r===nextchar or ?\n===nextchar))
+ assert((eof? or ?\r===nextchar or ?\n===nextchar))
assert !(/[\r\n]/===more[-1,1])
- more<< readnl
+ more<< readnl unless eof?
# newls= more.scan(/\r\n?|\n\r?/)
# @linenum+= newls.size
#inject the fresh comment into future token results
@@ -1443,11 +1931,11 @@
end
#handle __END__
if ENDMARKER===readahead(ENDMARKERLENGTH)
assert !(ImplicitContext===@parsestack.last)
- @moretokens.unshift endoffile_detected(read(7))
+ @moretokens.unshift endoffile_detected(read(ENDMARKERLENGTH))
# input_position_set @file.size
end
end
@@ -1458,28 +1946,25 @@
#returns whether current token is to be the start of a literal
IDBEGINCHAR=/^[a-zA-Z_$@]/
def unary_op_expected?(ch) #yukko hack
'*&='[readahead(2)[1..1]] and return false
+ return true if KeywordToken===@last_operative_token and @last_operative_token==='for'
+
after_nonid_op? {
#possible func-call as operator
not is_var_name? and
- WHSPLF[prevchar]
+ WHSPLF[prevchar] and !WHSPLF[readahead(2)[1..1]]
}
end
#-----------------------------------
#used to resolve the ambiguity of
# <<, %, ? in ruby
#returns whether current token is to be the start of a literal
def quote_expected?(ch) #yukko hack
- if AssignmentContext===@parsestack.last
- @parsestack.pop
- return false
- end
-
case ch[0]
when ?? then readahead(2)[/^\?[#{WHSPLF}]$/o] #not needed?
when ?% then readahead(3)[/^%([a-pt-vyzA-PR-VX-Z]|[QqrswWx][a-zA-Z0-9])/]
when ?< then !readahead(4)[/^<<-?['"`a-z0-9_]/i]
else raise 'unexpected ch (#{ch}) in quote_expected?'
@@ -1498,21 +1983,27 @@
#returns false if last token was an value, true if it was an operator.
#returns what block yields if last token was a method name.
#used to resolve the ambiguity of
# <<, %, /, ?, :, and newline (among others) in ruby
def after_nonid_op?
+
+ #this is how it should be, I think, and then no handlers for methnametoken and FUNCLIKE_KEYWORDS are needed
+# if ImplicitParamListStartToken===@last_token_including_implicit
+# huh return true
+# end
case @last_operative_token
- when MethNameToken, FUNCLIKE_KEYWORDS.token_pat ,VarNameToken
+ when VarNameToken , MethNameToken, FUNCLIKE_KEYWORDS.token_pat
#VarNameToken should really be left out of this case...
#should be in next branch instread
#callers all check for last token being not a variable if they pass anything
- #but {false} in the block
+ #but {false} in the block
+ #(hmmm... some now have true or other non-varname checks in them... could these be bugs?)
return yield
when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
%r{^(
- class|module|end|self|true|false|nil|
- __FILE__|__LINE__|[\})\]]|alias|(un)?def|for
+ end|self|true|false|nil|
+ __FILE__|__LINE__|[\})\]]
)$}x.token_pat
#dunno about def/undef
#maybe class/module shouldn't he here either?
#for is also in NewlineToken branch, below.
#what about rescue?
@@ -1520,21 +2011,20 @@
when /^(#{RUBYOPERATORREX}|#{INNERBOUNDINGWORDS}|do)$/o.token_pat
#regexs above must match whole string
#assert(@last_operative_token==$&) #disabled 'cause $& is now always nil :(
return true
when NewlineToken, nil, #nil means we're still at beginning of file
- /^([({\[]|or|not|and|if|unless|then|elsif|else|
- while|until|begin|for|in|case|when|ensure)$
+ /^([({\[]|or|not|and|if|unless|then|elsif|else|class|module|def|
+ while|until|begin|for|in|case|when|ensure|defined\?)$
/x.token_pat
return true
- #when KeywordToken
- # return true
+ when KeywordToken
+ return true if /^(alias|undef)$/===@last_operative_token.ident #is this ever actually true???
when IgnoreToken
raise "last_operative_token shouldn't be ignoreable"
- else
- raise "after_nonid_op? after #{@last_operative_token}:#{@last_operative_token.class} -- now what"
end
+ raise "after_nonid_op? after #{@last_operative_token}:#{@last_operative_token.class} -- now what"
end
@@ -1575,14 +2065,14 @@
end
#-----------------------------------
def biop(ch) #match /%=?/ (% or %=)
- assert(ch[/^[%^~]$/])
+ assert(ch[/^[%^]$/])
result=getchar
if eat_next_if(?=)
- result <<?=
+ result << ?=
end
return operator_or_methname_token( result)
end
#-----------------------------------
def tilde(ch) #match ~
@@ -1608,31 +2098,31 @@
#match /[+\-]=?/ (+ or +=)
#could be beginning of number, too
#fixme: handle +@ and -@ here as well... (currently, this is done in symbol()?)
def plusminus(ch)
assert(/^[+\-]$/===ch)
- if unary_op_expected?(ch)
+ if unary_op_expected?(ch) or
+ KeywordToken===@last_operative_token &&
+ /^(return|break|next)$/===@last_operative_token.ident
if (?0..?9)===readahead(2)[1]
return number(ch)
else #unary operator
result=getchar
WHSPLF[nextchar.chr] or
@moretokens << NoWsToken.new(input_position)
result=(operator_or_methname_token result)
result.unary=true
- #todo: result should distinguish unary+binary +-
end
else #binary operator
assert(! want_op_name)
result=getchar
if eat_next_if(?=)
result << ?=
end
result=(operator_or_methname_token result)
- #todo: result should distinguish unary+binary +-
end
- result
+ return result
end
#-----------------------------------
def equals(ch) #match /=(>|~|==?)?/ (= or == or =~ or === or =>)
offset= input_position
@@ -1640,23 +2130,35 @@
assert str=='='
c=(eat_next_if(/[~=>]/)or'')
str << c
result= operator_or_methname_token( str,offset)
case c
- when '=': str<< (eat_next_if(?=)or'')
+ when '=': #===,==
+ str<< (eat_next_if(?=)or'')
- when '>':
+ when '>': #=>
unless ParamListContextNoParen===@parsestack.last
@moretokens.unshift result
@moretokens.unshift( *abort_noparens!("=>"))
result=@moretokens.shift
end
@parsestack.last.see self,:arrow
- when '': #record local variable definitions
-
+ when '': #plain assignment: record local variable definitions
+ last_context_not_implicit.lhs=false
+ @moretokens.push *ignored_tokens(true).map{|x|
+ NewlineToken===x ? EscNlToken.new(@filename,@linenum,x.ident,x.offset) : x
+ }
@parsestack.push AssignmentRhsContext.new(@linenum)
- @moretokens.unshift AssignmentRhsListStartToken.new( offset+1)
+ if eat_next_if ?*
+ tok=OperatorToken.new('*', input_position-1)
+ tok.unary=true
+ @moretokens.push tok
+ WHSPLF[nextchar.chr] or
+ @moretokens << NoWsToken.new(input_position)
+ comma_in_lvalue_list? #is this needed?
+ end
+ @moretokens.push AssignmentRhsListStartToken.new( input_position)
end
return result
end
#-----------------------------------
@@ -1664,10 +2166,11 @@
assert nextchar==?!
result=getchar
k=eat_next_if(/[~=]/)
if k
result+=k
+ elsif eof?: #do nothing
else
WHSPLF[nextchar.chr] or
@moretokens << NoWsToken.new(input_position)
end
return KeywordToken.new(result, input_position-result.size)
@@ -1691,34 +2194,35 @@
return result
end
#-----------------------------------
def dot_rhs(prevtok)
safe_recurse { |a|
- @last_operative_token=prevtok
+ set_last_token prevtok
aa= ignored_tokens
+ was=after_nonid_op?{true}
tok,pos=callsite_symbol(prevtok)
- tok and aa.push(*var_or_meth_name(tok,prevtok,pos))
+ tok and aa.push(*var_or_meth_name(tok,prevtok,pos,was))
a.unshift(*aa)
}
end
#-----------------------------------
def back_quote(ch=nil)
if @last_operative_token===/^(def|::|\.)$/
oldpos= input_position
- MethNameToken.new(eat_next_if(?`), oldpos)
+ MethNameToken.new(eat_next_if(?`), oldpos) #`
else
double_quote(ch)
end
end
if false
#-----------------------------------
def comment(str)
result=""
#loop{
- result<<super(nil).to_s
+ result<< super(nil).to_s
if /^\#.*\#$/===result #if comment was ended by a crunch
#that's not a legal comment end in ruby, so just keep reading
assert(result.to_s[-1]==?#)
@@ -1760,11 +2264,11 @@
if (VarNameToken===lasttok or ImplicitParamListEndToken===lasttok or MethNameToken===lasttok) and !WHSPCHARS[lastchar]
@moretokens << (tokch)
tokch= NoWsToken.new(input_position-1)
end
when '('
- lasttok=last_operative_token
+ lasttok=last_token_maybe_implicit #last_operative_token
#could be: lasttok===/^[a-z_]/i
if (VarNameToken===lasttok or MethNameToken===lasttok or
lasttok===FUNCLIKE_KEYWORDS)
unless WHSPCHARS[lastchar]
@moretokens << tokch
@@ -1779,19 +2283,21 @@
#check if we are in a hash literal or string inclusion (#{}),
#in which case below would be bad.
if after_nonid_op?{false} or @last_operative_token.has_no_block?
@parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
else
+ #abort_noparens!
tokch.set_infix!
-=begin not needed now, i think
+ tokch.as="do"
+#=begin not needed now, i think
# 'need to find matching callsite context and end it if implicit'
lasttok=last_operative_token
- unless lasttok===')' and lasttok.callsite?
+ if !(lasttok===')' and lasttok.callsite?) #or ParamListContextNoParen===parsestack.last
@moretokens.push *(abort_1_noparen!(1).push tokch)
tokch=@moretokens.shift
end
-=end
+#=end
localvars.start_block
@parsestack.push BlockContext.new(@linenum)
block_param_list_lookahead
end
@@ -1809,38 +2315,48 @@
lexerror kw,"unmatched brace: #{ch}"
return @moretokens.shift
end
ctx=@parsestack.pop
origch,line=ctx.starter,ctx.linenum
- ch==PAIRS[origch] or
+ if ch!=PAIRS[origch]
+ #kw.extend MismatchedBrace
lexerror kw,"mismatched braces: #{origch}#{ch}\n" +
"matching brace location", @filename, line
- BlockContext===ctx and localvars.end_block
+ end
+ if BlockContext===ctx
+ localvars.end_block
+ @moretokens.last.as="end"
+ end
if ParamListContext==ctx.class
assert ch==')'
- #kw.set_callsite! #not needed?
+ kw.set_callsite! #not needed?
end
return @moretokens.shift
end
#-----------------------------------
def eof(ch=nil)
#this must be the very last character...
oldpos= input_position
- assert(?\0==getc)
+ assert(/\A[\x0\x4\x1a]\Z/===nextchar.chr)
- result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
+ result=@file.read!
+# result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
- eof? or
- lexerror result,'nul character is not at the end of file'
- input_position_set @file.size
+# eof? or
+# lexerror result,'nul character is not at the end of file'
+# input_position_set @file.size
return(endoffile_detected result)
end
#-----------------------------------
def endoffile_detected(s='')
@moretokens.push( *(abort_noparens!.push super(s)))
+ if @progress_thread
+ @progress_thread.kill
+ @progress_thread=nil
+ end
result= @moretokens.shift
balanced_braces? or (lexerror result,"unbalanced braces at eof. parsestack=#{@parsestack.inspect}")
result
end
@@ -1849,11 +2365,30 @@
KeywordToken.new super(ch), input_position-1
end
#-----------------------------------
def comma(ch)
- single_char_token(ch)
+ @moretokens.push token=single_char_token(ch)
+ if AssignmentRhsContext===@parsestack[-1] and
+ ParamListContext===@parsestack[-2] ||
+ ParamListContextNoParen===@parsestack[-2] ||
+ WhenParamListContext===@parsestack[-2] ||
+ (RescueSMContext===@parsestack[-2] && @parsestack[-2].state==:rescue) ||
+ (DefContext===@parsestack[-2] && !@parsestack[-2].in_body)
+ @parsestack.pop
+ @moretokens.unshift AssignmentRhsListEndToken.new(input_position)
+ end
+ token.comma_type=
+ case @parsestack[-1]
+ when AssignmentRhsContext: :rhs
+ when ParamListContext,ParamListContextNoParen: :call
+ when ListImmedContext: :array
+ else
+ :lhs if comma_in_lvalue_list?
+ end
+ @parsestack.last.see self,:comma
+ return @moretokens.shift
end
#-----------------------------------
def semicolon(ch)
assert @moretokens.empty?
@@ -1870,22 +2405,20 @@
#-----------------------------------
def operator_or_methname_token(s,offset=nil)
assert RUBYOPERATORREX===s
if RUBYNONSYMOPERATORREX===s
KeywordToken
- elsif @last_operative_token===/^(\.|::|def|undef|alias|defined\?)$/
+ elsif want_op_name
MethNameToken
else
OperatorToken
end.new(s,offset)
end
#-----------------------------------
#tokenify_results_of :identifier
save_offsets_in(*CHARMAPPINGS.values.uniq-[
- :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote
-
-
+ :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote
])
#save_offsets_in :symbol
end