lib/rubylexer.rb in rubylexer-0.7.7 vs lib/rubylexer.rb in rubylexer-0.8.0
- old
+ new
@@ -1,8 +1,9 @@
+#encoding: binary
=begin
rubylexer - a ruby lexer written in ruby
- Copyright (C) 2004,2005,2008 Caleb Clausen
+ Copyright (C) 2004,2005,2008, 2011 Caleb Clausen
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
@@ -15,68 +16,82 @@
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
=end
-
require 'rubylexer/rulexer' #must be 1st!!!
require 'rubylexer/version'
require 'rubylexer/token'
require 'rubylexer/charhandler'
require 'rubylexer/symboltable'
#require "io.each_til_charset"
require 'rubylexer/context'
require 'rubylexer/tokenprinter'
-
#-----------------------------------
class RubyLexer
include NestedContexts
-
+ #here's a list of other constants that should already be defined at this point:
+ [WHSP, VERSION, Token, CharSet, CharHandler, SymbolTable, SimpleTokenPrinter].each{|k| fail if k.nil? }
- RUBYSYMOPERATORREX=
+ RUBYUNOPERATORS=%w{ +@ ~ ~@ -@ ! !@ }
+ RUBYBINOPERATORS=%w{ & | ^ / % == === =~ > >= >> < <= << <=> + - * ** }
+ RUBYCOMPOPERATORS=%w{== === =~ > >= < <= <=>}
+ RUBYSYMOPERATORS=RUBYUNOPERATORS+RUBYBINOPERATORS+%w{ [] []= }
+ RUBYNONSYMOPERATORS=%w{!= !~ = => :: ? : , ; . .. ... || && ||= &&=}+
+ (RUBYBINOPERATORS-RUBYCOMPOPERATORS).map{|op| op+'='}
+ RUBYSYMOPERATORREX=
%r{^([&|^/%]|=(==?)|=~|>[=>]?|<(<|=>?)?|[+~\-]@?|\*\*?|\[\]=?)}
# (nasty beastie, eh?)
#these are the overridable operators
#does not match flow-control operators like: || && ! or and if not
#or op= ops like: += -= ||=
#or .. ... ?:
#for that use:
- RUBYNONSYMOPERATORREX=
+ RUBYNONSYMOPERATORREX=
%r{^([%^/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|::|=>?|![=~]?)$}
- RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
- UNSYMOPS=/^[~!]$/ #always unary
- UBSYMOPS=/^([*&+-]|::)$/ #ops that could be unary or binary
- WHSPCHARS=WHSPLF+"\\#"
- OPORBEGINWORDLIST=%w(if unless while until)
- BEGINWORDLIST=%w(def class module begin for case do)+OPORBEGINWORDLIST
- OPORBEGINWORDS="(#{OPORBEGINWORDLIST.join '|'})"
- BEGINWORDS=/^(#{BEGINWORDLIST.join '|'})$/o
- FUNCLIKE_KEYWORDLIST=%w/break next redo return yield retry super BEGIN END/
- FUNCLIKE_KEYWORDS=/^(#{FUNCLIKE_KEYWORDLIST.join '|'})$/
- VARLIKE_KEYWORDLIST=%w/__FILE__ __LINE__ false nil self true/
- VARLIKE_KEYWORDS=/^(#{VARLIKE_KEYWORDLIST.join '|'})$/
- INNERBOUNDINGWORDLIST=%w"else elsif ensure in then rescue when"
- INNERBOUNDINGWORDS="(#{INNERBOUNDINGWORDLIST.join '|'})"
- BINOPWORDLIST=%w"and or"
- BINOPWORDS="(#{BINOPWORDLIST.join '|'})"
+ RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
+ UNSYMOPS=/^[~!]$/ #always unary
+ UBSYMOPS=/^(?:[*&+-]|::)$/ #ops that could be unary or binary
+ WHSPCHARS=WHSPLF+"\\#"
+ OPORBEGINWORDLIST=%w(if unless while until)
+ BEGINWORDLIST=%w(def class module begin for case do)+OPORBEGINWORDLIST
+ OPORBEGINWORDS="(?:#{OPORBEGINWORDLIST.join '|'})"
+ BEGINWORDS=/^(?:#{BEGINWORDLIST.join '|'})$/o
+ FUNCLIKE_KEYWORDLIST_1_9=%w[not]
+ FUNCLIKE_KEYWORDLIST=%w/break next redo return yield retry super BEGIN END/
+ FUNCLIKE_KEYWORDS=/^(?:#{FUNCLIKE_KEYWORDLIST.join '|'})$/
+ VARLIKE_KEYWORDLIST_1_9=%w[__ENCODING__]
+ VARLIKE_KEYWORDLIST=%w/__FILE__ __LINE__ false nil self true/
+ VARLIKE_KEYWORDS=/^(?:#{VARLIKE_KEYWORDLIST.join '|'})$/
+ attr_reader :FUNCLIKE_KEYWORDS, :VARLIKE_KEYWORDS
+
+ INNERBOUNDINGWORDLIST=%w"else elsif ensure in then rescue when"
+ INNERBOUNDINGWORDS="(?:#{INNERBOUNDINGWORDLIST.join '|'})"
+ BINOPWORDLIST=%w"and or"
+ BINOPWORDS="(?:#{BINOPWORDLIST.join '|'})"
- RUBYKEYWORDS=%r{
- ^(alias|#{BINOPWORDS}|defined\?|not|undef|end|
+ RUBYKEYWORDS=%r{
+ ^(?:alias|#{BINOPWORDS}|defined\?|not|undef|end|
#{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}|
#{INNERBOUNDINGWORDS}|#{BEGINWORDS}
)$
}xo
+ RUBYKEYWORDLIST=%w{alias defined? not undef end}+
+ BINOPWORDLIST+
+ VARLIKE_KEYWORDLIST+FUNCLIKE_KEYWORDLIST+
+ INNERBOUNDINGWORDLIST+BEGINWORDLIST+
+ VARLIKE_KEYWORDLIST_1_9
#__END__ should not be in this set... its handled in start_of_line_directives
- HIGHASCII=?\x80..?\xFF
- NONASCII=HIGHASCII
- #NONASCII=?\x80..?xFFFFFFFF #or is it 10FFFF, whatever the highest conceivable code point
+ HIGHASCII=?\x80..?\xFF
+ NONASCII=HIGHASCII
+ #NONASCII=?\x80..?xFFFFFFFF #or is it 10FFFF, whatever the highest conceivable code point
- CHARMAPPINGS = {
+ CHARMAPPINGS = {
?$ => :dollar_identifier,
?@ => :at_identifier,
?a..?z => :identifier,
?A..?Z => :identifier,
?_ => :identifier,
@@ -123,37 +138,37 @@
?\x01..?\x03 => :illegal_char,
?\x05..?\x08 => :illegal_char,
?\x0E..?\x19 => :illegal_char,
?\x1b..?\x1F => :illegal_char,
?\x7F => :illegal_char,
- }
+ }
- attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit
+ attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit
- UCLETTER=@@UCLETTER="[A-Z]"
+ UCLETTER=@@UCLETTER="[A-Z]"
- #cheaters way, treats utf chars as always 1 byte wide
- #all high-bit chars are lowercase letters
- #works, but strings compare with strict binary identity, not unicode collation
- #works for euc too, I think
- #(the ruby spec for utf8 support permits this interpretation)
- LCLETTER=@@LCLETTER="[a-z_\x80-\xFF]"
- LETTER=@@LETTER="[A-Za-z_\x80-\xFF]"
- LETTER_DIGIT=@@LETTER_DIGIT="[A-Za-z_0-9\x80-\xFF]"
- eval %w[UCLETTER LCLETTER LETTER LETTER_DIGIT].map{|n| "
+ #cheaters way, treats utf chars as always 1 byte wide
+ #all high-bit chars are lowercase letters
+ #works, but strings compare with strict binary identity, not unicode collation
+ #works for euc too, I think
+ #(the ruby spec for utf8 support permits this interpretation)
+ LCLETTER=@@LCLETTER="[a-z_\x80-\xFF]"
+ LETTER=@@LETTER="[A-Za-z_\x80-\xFF]"
+ LETTER_DIGIT=@@LETTER_DIGIT="[A-Za-z_0-9\x80-\xFF]"
+ eval %w[UCLETTER LCLETTER LETTER LETTER_DIGIT].map{|n| "
def #{n}; #{n}; end
def self.#{n}; @@#{n}; end
"
- }.join
+ }.join
- NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)((?:(?!#@@LETTER_DIGIT).)|\Z)/om
- if ?A.is_a? String #ruby >= 1.9
- NEVERSTARTPARAMLISTFIRST=/[aoeitrwu]/
- else
- NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST
- end
- NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
+ NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)((?:(?!#@@LETTER_DIGIT).)|\Z)/om
+ if ?A.is_a? String #ruby >= 1.9
+ NEVERSTARTPARAMLISTFIRST=/[aoeitrwu]/
+ else
+ NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST
+ end
+ NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
=begin
require 'jcode'
utf8=String::PATTERN_UTF8 #or euc, or sjis...
LCLETTER_U="(?>[a-z_]|#{utf8})"
@@ -161,11 +176,18 @@
LETTER_DIGIT_U="(?>[A-Za-z_0-9]|#{utf8})"
=end
#-----------------------------------
def initialize(filename,file,linenum=1,offset_adjust=0,options={})
- @offset_adjust=0 #set again in next line
+ if file.respond_to? :set_encoding
+ file.set_encoding 'binary'
+ elsif file.respond_to? :force_encoding
+ file=file.dup if file.frozen?
+ file.force_encoding 'binary'
+ end
+
+ @offset_adjust=@offset_adjust2=0 #set again in next line
rulexer_initialize(filename,file, linenum,offset_adjust)
@start_linenum=linenum
@parsestack=[TopLevelContext.new]
@incomplete_here_tokens=[] #not used anymore
@pending_here_bodies=[]
@@ -177,69 +199,147 @@
@enable_macro=nil
@base_file=nil
@progress_thread=nil
@rubyversion=options[:rubyversion]||1.8
@encoding=options[:encoding]||:detect
- @method_operators=if @rubyversion>=1.9
- /#{RUBYSYMOPERATORREX}|\A![=~@]?/o
- else
- RUBYSYMOPERATORREX
- end
+ @always_binary_chars=CharSet['}]);|>,.=^']
+ @unary_or_binary_chars=CharSet['+-%/']
+
+
+ @FUNCLIKE_KEYWORDS=FUNCLIKE_KEYWORDS
+ @VARLIKE_KEYWORDS=VARLIKE_KEYWORDS
+
@toptable=CharHandler.new(self, :identifier, CHARMAPPINGS)
- extend RubyLexer1_9 if @rubyversion>=1.9
- read_leading_encoding
- start_of_line_directives
+ if @rubyversion>=1.9
+ extend RubyLexer1_9
+ end
+ rubylexer_modules_init
+ @method_operators=build_method_operators
+ if input_position.zero?
+ read_leading_encoding
+ @encoding=:binary if @rubyversion<=1.8
+ start_of_line_directives
+ end
progress_printer
end
- ENCODING_ALIASES={
- 'utf-8'=>'utf8',
+ def rubylexer_modules_init
- 'ascii-8bit'=>'binary',
- 'ascii-7bit'=>'ascii',
+ end
+
+ alias dump inspect # preserve old inspect functionality
+
+ # irb friendly #inspect/#to_s
+ def to_s
+ mods=class<<self;self end.ancestors-self.class.ancestors
+ mods=mods.map{|mod| mod.name }.join('+')
+ mods="+"<<mods unless mods.empty?
+ "#<#{self.class.name}#{mods}: [#{@file.inspect}]>"
+ end
+
+ alias inspect to_s
+
+
+ def build_method_operators
+ /#{RUBYSYMOPERATORREX}|\A`/o
+ end
+
+
+ RAW_ENCODING_ALIASES={
+ #'utf-8'=>'utf8',
+
+ 'ascii-8-bit'=>'binary',
+ 'ascii-7-bit'=>'ascii',
'euc-jp'=>'euc',
- 'ascii8bit'=>'binary',
- 'ascii7bit'=>'ascii',
- 'eucjp'=>'euc',
+ 'iso-8859-1'=>'binary',
+ 'latin-1'=>'binary',
+ #'ascii8bit'=>'binary',
+ #'ascii7bit'=>'ascii',
+ #'eucjp'=>'euc',
'us-ascii'=>'ascii',
'shift-jis'=>'sjis',
'autodetect'=>'detect',
}
+ ENCODING_ALIASES=Hash[*RAW_ENCODING_ALIASES.map{|long,short| [long.tr_s('-_',''),short] }.flatten]
ENCODINGS=%w[ascii binary utf8 euc sjis]
+ NONWORKING_ENCODINGS=%w[sjis]
+ WSCHARS=@@WSCHARS= /[\s]/==="\v" ? '\s' : '\s\v' #same as WHSP
+ WSNONLCHARS=@@WSNONLCHARS=/(?!\n)[#@@WSCHARS]/o #same as WHSPLF
+
+ NOPARAMLONGOPTIONS=%w[copyright version verbose debug yydebug help]
+ PARAMLONGOPTIONS=%w[encoding dump]
+ DASHPARAMLONGOPTIONS=%w[enable disable]
+ NOPARAMOPTIONS="SacdhlnpsvwyU"
+ OCTALPARAMOPTIONS="0"
+ CHARPARAMOPTIONS="KTW"
+ PARAMSHORTOPTIONS="CXFIEeir"
+ MAYBEPARAMSHORTOPTIONS="x"
+ NEWIN1_9OPTIONS=%w[encoding dump enable disable X U W E]
+ LONGOPTIONS=/
+ --(#{NOPARAMLONGOPTIONS.join'|'})|
+ --(#{PARAMLONGOPTIONS.join'|'})(=|#@@WSNONLCHARS+)[^#@@WSCHARS]+|
+ --(#{DASHPARAMLONGOPTIONS.join'|'})-[^#@@WSCHARS]+
+ /ox
+ CHAINOPTIONS=/
+ [#{NOPARAMOPTIONS}]+|
+ [#{OCTALPARAMOPTIONS}][0-7]{1,3}|
+ [#{CHARPARAMOPTIONS}].
+ /ox
+ PARAMOPTIONS=/
+ [#{PARAMSHORTOPTIONS}]#@@WSNONLCHARS*[^#@@WSCHARS]+|
+ [#{MAYBEPARAMSHORTOPTIONS}]#@@WSNONLCHARS*[^#@@WSCHARS]*
+ /ox
+ OPTIONS=/
+ (#@@WSNONLCHARS*(
+ #{LONGOPTIONS} | --? |
+ -#{CHAINOPTIONS}*( #{PARAMOPTIONS} | #{CHAINOPTIONS} )
+ ))*
+ /ox
+
def read_leading_encoding
- return unless @encoding==:detect
- @encoding=:ascii
- @encoding=:utf8 if @file.skip( "\xEF\xBB\xBF" ) #bom
- if @file.skip( /\A#!/ )
+ @encoding=nil if @encoding==:detect
+ if enc=@file.scan( "\xEF\xBB\xBF" ) #bom
+ encpos=0
+ @encoding||=:utf8
+ elsif @file.skip( /\A#!/ )
+ lastpos=@file.pos
loop do
- til_charset( /[\s\v]/ )
- break if @file.match( /^\n|[\s\v]([^-\s\v]|--?[\s\v])/,4 )
- if @file.skip( /.-K(.)/ )
- case $1
- when 'u'; @encoding=:utf8
- when 'e'; @encoding=:euc
- when 's'; @encoding=:sjis
+ til_charset( /[#@@WSCHARS]/o )
+ assert @file.pos > lastpos
+ break if @file.match( /^\n|#@@WSNONLCHARS([^-#@@WSCHARS])/o,4 )
+ if @file.skip( /.-#{CHAINOPTIONS}*K#@@WSNONLCHARS*([a-zA-Z0-9])/o )
+ case @file.last_match[1]
+ when 'u','U'; @encoding||=:utf8
+ when 'e','E'; @encoding||=:euc
+ when 's','S'; @encoding||=:sjis
end
+ elsif @file.skip( /.#{LONGOPTIONS}/o )
end
+ getchar
+ lastpos=@file.pos
end
til_charset( /[\n]/ )
+ @moretokens<<ShebangToken.new(@file[0...@file.pos])
+ pos=input_position
+ @moretokens<<EscNlToken.new(readnl,pos,@filename,2)
+ @moretokens<<FileAndLineToken.new(@filename,2,input_position)
end
- if @rubyversion>=1.9 and @file.skip(
- /\A#[\x00-\x7F]*?(?:en)?coding[\s\v]*[:=][\s\v]*([a-z0-9_-]+)[\x00-\x7F]*\n/i
- )
- name=$1
- name.downcase!
- name=ENCODING_ALIASES[name] if ENCODING_ALIASES[name]
- @encoding=name.to_sym if ENCODINGS.include? name
- end
+ encpos=input_position unless enc
+ enc||=read_encoding_line
+ ensure
+ @moretokens<<EncodingDeclToken.new(enc||'',@encoding,enc ? encpos : input_position) if @encoding
+ @encoding||=:ascii
end
+ def read_encoding_line
+ end
+
def progress_printer
return unless ENV['RL_PROGRESS']
$stderr.puts 'printing progresses'
@progress_thread=Thread.new do
until EoiToken===@last_operative_token
@@ -308,17 +408,22 @@
raise "#{@filename}:#{linenum}:token is a #{result.class}, last is #{@last_operative_token}"
end
end
#-----------------------------------
+ def unshift(*tokens)
+ @moretokens.unshift(*tokens)
+ end
+
+ #-----------------------------------
def eof?
rulexer_eof? or EoiToken===@last_operative_token
end
#-----------------------------------
def input_position
- rulexer_input_position+@offset_adjust
+ rulexer_input_position+@offset_adjust+@offset_adjust2
end
#-----------------------------------
def input_position_raw
@file.pos
@@ -390,11 +495,11 @@
@moretokens.unshift tok
return result
end
#-----------------------------------
- WSCHARSET=/[#\\\n\s\t\v\r\f\x00\x04\x1a]/
+ WSCHARSET=/[#\\\n#@@WSCHARS\x00\x04\x1a]/o
def ignored_tokens(allow_eof=false,allow_eol=true)
result=[]
result << @moretokens.shift while StillIgnoreToken===@moretokens.first
@moretokens.empty? or return result
loop do
@@ -426,11 +531,11 @@
=begin
@whsphandler||=CharHandler.new(self, :==,
"#" => :comment,
"\n" => :newline,
"\\" => :escnewline,
- "\s\t\v\r\f" => :whitespace
+ "#@@WSCHARS\t\r\f" => :whitespace
)
#tok=nil
while tok=@whsphandler.go((nextchar or return result))
block_given? and NewlineToken===tok and yield tok
result << tok
@@ -474,23 +579,23 @@
#just asserts because those contexts are never encountered.
#control goes through symbol(<...>,nil)
assert( /^#@@LETTER$/o===context)
assert MethNameToken===@last_operative_token || !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
- if @parsestack.last.wantarrow and @rubyversion>=1.9 and @file.skip ":"
- @moretokens.push SymbolToken.new(str,oldpos), KeywordToken.new("=>",input_position-1)
- else
- @moretokens.unshift(*parse_keywords(str,oldpos) do |tok,except|
+# if @parsestack.last.wantarrow and @rubyversion>=1.9 and @file.skip ":"
+# @moretokens.unshift SymbolToken.new(str,oldpos), KeywordToken.new(":",input_position-1,:as=>"=>")
+# else
+ @moretokens.unshift(*special_identifier?(str,oldpos) do |tok,except|
#most callers of this block pass nothing(==nil) for except. only _keyword_funclike passes a true val
was_last=@last_operative_token
@last_operative_token=tok if tok
normally=safe_recurse { |a| var_or_meth_name(str,was_last,oldpos,after_nonid_op?{true}) }
(Array===normally ? normally[0]=except : normally=except) if except
normally
end)
- end
+# end
return @moretokens.shift
end
#-----------------------------------
IDENTREX={}
@@ -510,13 +615,13 @@
#= and ! only match if not part of a larger operator
trailers =
case context
when ?@,?$ then ""
# when ?: then "!(?![=])|\\?|=(?![=~>])"
- else "!(?![=])|\\?"
+ else "!(?=\\z|[^=]|=[=~>])|\\?"
end
- @in_def_name||context==?: and trailers<<"|=(?![=~>])"
+ @in_def_name||context==?: and trailers<<"|=(?![~>]|=[^~=>])"
@file.scan(IDENTREX[trailers]||=/^(?>#@@LETTER#@@LETTER_DIGIT*(?:#{trailers})?)/)
end
#-----------------------------------
@@ -551,16 +656,18 @@
end
end
#-----------------------------------
def in_lvar_define_state lasttok=@last_operative_token
- #@defining_lvar is a hack
- @defining_lvar or case ctx=@parsestack.last
+ return true if @defining_lvar #@defining_lvar is a hack
+ ctx=@parsestack.last
+ case ctx
#when ForSMContext; ctx.state==:for
- when UnparenedParamListLhsContext; /^(->|,|;)$/===lasttok.ident
+ when UnparenedParamListLhsContext
+ /^(->|,|;)$/===lasttok.ident or /^[*&]$/===lasttok.ident && lasttok.unary
when RescueSMContext
- lasttok.ident=="=>" and @file.match?( /\A[\s\v]*([:;#\n]|then(?!#@@LETTER_DIGIT))/om )
+ lasttok.ident=="=>" and @file.match?( /\A[#@@WSCHARS]*([:;#\n]|then(?!#@@LETTER_DIGIT))/om )
#when BlockParamListLhsContext; true
end
end
IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT=2
@@ -585,11 +692,11 @@
assert String===name
was_in_lvar_define_state=in_lvar_define_state(lasttok)
#maybe_local really means 'maybe local or constant'
maybe_local=case name
- when /(?!#@@LETTER_DIGIT).$/o #do nothing
+ when /[?!=]$/o #do nothing
when /^#@@LCLETTER/o
(localvars===name or
#VARLIKE_KEYWORDS===name or
was_in_lvar_define_state
) and not lasttok===/^(\.|::)$/
@@ -603,10 +710,13 @@
oldlast=@last_operative_token
tok=set_last_token assign_lvar_type!(VarNameToken.new(name,pos))
oldpos= input_position
+ oldline= linenum
+
+ #deal with ws following the ident
sawnl=false
result=ws_toks=ignored_tokens(true) {|nl| sawnl=true }
if sawnl || eof?
if was_in_lvar_define_state
if /^#@@LCLETTER#@@LETTER_DIGIT*$/o===name
@@ -615,20 +725,36 @@
end
return result.unshift(tok)
elsif maybe_local
return result.unshift(tok) #if is_const
else
- return result.unshift(
+ toks=[
MethNameToken.new(name,pos), #insert implicit parens right after tok
ImplicitParamListStartToken.new( oldpos),
ImplicitParamListEndToken.new( oldpos)
- )
+ ]
+ toks.each{|t| t.endline=oldline}
+ return result.unshift(*toks)
end
end
#if next op is assignment (or comma in lvalue list)
#then omit implicit parens
+ assignment_coming=
+ /\A(?:
+ =[^>=~] | (,) | (;) | (\)) |
+ (in(?!#@@LETTER_DIGIT)) | (\|[^\|=]) | [%\/\-+^*&|]= | ([<>*&|])\6=
+ )/mox===readahead(3) &&
+ case
+ when $1; comma_in_lvalue_list? #comma
+ when $2; semicolon_in_block_param_list?
+ when $3; last_context_not_implicit.lhs #right paren in lhs
+ when $4; ForSMContext===last_context_not_implicit #in
+ when $5; BlockParamListLhsContext===last_context_not_implicit #ending goalpost
+ else true
+ end
+=begin was
assignment_coming=case nc=nextchar
when ?=; not( /^=[>=~]$/===readahead(2) )
when ?,; comma_in_lvalue_list?
when (?; if @rubyversion>=1.9); ParenedParamListLhsContext===@parsestack.last
when ?); last_context_not_implicit.lhs
@@ -640,19 +766,24 @@
#is it a goalpost?
BlockParamListLhsContext===last_context_not_implicit &&
readahead(2)[1] != ?|
when ?%,?/,?-,?+,?^; readahead(2)[1]== ?=
end
+=end
+
if (assignment_coming && !(lasttok===/^(\.|::)$/) or was_in_lvar_define_state)
tok=assign_lvar_type! VarNameToken.new(name,pos)
- if /(?!#@@LETTER_DIGIT).$/o===name
- elsif /^#@@LCLETTER/o===name and !(lasttok===/^(\.|::)$/)
+ #if /(?!#@@LETTER_DIGIT).$/o===name
+ #nonalphabetics... operator? skip it
+ #els
+ if /^#@@LCLETTER/o===name #and !(lasttok===/^(\.|::)$/)
localvars[name]=true
end
return result.unshift(tok)
end
+ nc=nextchar
implicit_parens_to_emit=
if assignment_coming
@parsestack.push AssignmentContext.new(nil) if nc==?% or nc==?/
IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT
else
@@ -675,59 +806,43 @@
(NEVERSTARTPARAMLISTWORDS===readahead(NEVERSTARTPARAMLISTMAXLEN)) ? 2 : 1
when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~,NONASCII; 1 #"
when ?{
maybe_local=false
1
-=begin
- x=2
- x-=1 if /\A(return|break|next)\Z/===name and
- !(KeywordToken===oldlast and oldlast===/\A(\.|::)\Z/)
- x
-=end
when ?(
maybe_local=false
lastid=lasttok&&lasttok.ident
case lastid
when /\A[;(]|do\Z/; was_after_nonid_op=false
when '|'; was_after_nonid_op=false unless BlockParamListLhsContext===@parsestack.last
when '{'; was_after_nonid_op=false if BlockContext===@parsestack.last or BeginEndContext===@parsestack.last
end if KeywordToken===lasttok
was_after_nonid_op=false if NewlineToken===lasttok or lasttok.nil?
- want_parens=!(ws_toks.empty? or was_after_nonid_op) #or
-# /^(::|rescue|yield|else|case|when|if|unless|until|while|and|or|&&|\|\||[?:]|\.\.?\.?|=>)$/===lastid or
-# MethNameToken===lasttok or
-# RUBYNONSYMOPERATORREX===lastid && /=$/===lastid && '!='!=lastid
-# )
+ want_parens=!(ws_toks.empty? or was_after_nonid_op)
#look ahead for closing paren (after some whitespace...)
want_parens=false if @file.match?( /\A.(?:\s|\v|\#.*\n)*\)/ )
-# afterparen=@file.pos
-# getchar
-# ignored_tokens(true)
-# want_parens=false if nextchar==?)
-# @file.pos=afterparen
want_parens=true if /^(return|break|next)$/===@last_operative_token.ident and not(
KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident
)
want_parens ? 1 : 0
- when ?},?],?),?;,(?^ unless @enable_macro), ?|, ?>, ?,, ?., ?=; 2
- when ?+, ?-, ?%, ?/, (?^ if @enable_macro)
+ when @always_binary_chars; 2 # ?},?],?),?;,(?^ unless @enable_macro), ?|, ?>, ?,, ?., ?=; 2
+ when @unary_or_binary_chars; #?+, ?-, ?%, ?/, (?^ if @enable_macro)
if /^(return|break|next)$/===@last_operative_token.ident and not(
KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident
)
1
else
(ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
end
when ?*, ?&
- # lasttok=@last_operative_token
if /^(return|break|next)$/===@last_operative_token.ident and not(
KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident
)
1
else
- (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}*&]/o]) ? 2 : 3
+ (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}*&]/o] and !@in_def_name) ? 2 : 3
end
when ?:
next2=readahead(2)
if /^:(?:[#{WHSPLF}]|(:))$/o===next2 then
$1 && !ws_toks.empty? ? 3 : 2
@@ -736,11 +851,10 @@
end
when ??; next3=readahead(3)
#? never begins a char constant if immediately followed
#by 2 or more letters or digits
/^\?([#{WHSPLF}]|#@@LETTER_DIGIT{2})/o===next3 ? 2 : 3
-# when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
when ?<; (!ws_toks.empty? && readahead(4)[/^<<-?(?:["'`]|#@@LETTER_DIGIT)/o]) ? 3 : 2
when ?[;
if ws_toks.empty?
(KeywordToken===oldlast and /^(return|break|next)$/===oldlast.ident) ? 3 : 2
else
@@ -795,10 +909,12 @@
# 'then else elsif rescue ensure (illegal in value context)'
# 'need to pop noparen from parsestack on these tokens: (in operator context)'
# 'not ok:'
# 'not (but should it be?)'
+ ensure
+ result.first.endline||=oldline unless result.empty?
end
#-----------------------------------
#read ahead to see if there's method param list (with real parentheses)
#and 2 or more parameters (and hence a comma to separate them)
@@ -816,33 +932,43 @@
break true
elsif OperatorToken===tok and /^[&*]$/===tok.ident and tok.tag and @parsestack.size==basesize+1
break true
elsif EoiToken===tok
lexerror tok, "unexpected eof in parameter list"
+ break
end
}
result.concat @moretokens
@moretokens.replace []
return [result,pass]
end
+
#-----------------------------------
+ module NestedContexts
+ class VContext<NestedContext
+ end
+ end
+ VContext=NestedContexts::VContext
CONTEXT2ENDTOK={
AssignmentRhsContext=>AssignmentRhsListEndToken,
ParamListContextNoParen=>ImplicitParamListEndToken,
KWParamListContextNoParen=>ImplicitParamListEndToken, #break,next,return
WhenParamListContext=>KwParamListEndToken,
- RescueSMContext=>KwParamListEndToken
+ RescueSMContext=>KwParamListEndToken,
+ VContext=>0
}
- def abort_noparens!(str='')
+ def abort_noparens!(str='',adj=str.size)
#assert @moretokens.empty?
result=[]
- while klass=CONTEXT2ENDTOK[@parsestack.last.class]
- result << klass.new(input_position-str.length)
- break if RescueSMContext===@parsestack.last #and str==':'
- break if WhenParamListContext===@parsestack.last and str==':'
+ ctx=@parsestack.last
+ while klass=CONTEXT2ENDTOK[ctx.class]
+ result << klass.new(input_position-adj) if Class===klass
+ break if RescueSMContext===ctx #and str==':'
+ break if WhenParamListContext===ctx and str==':'
@parsestack.pop
+ ctx=@parsestack.last
end
return result
end
#-----------------------------------
@@ -876,18 +1002,22 @@
#-----------------------------------
CONTEXT2ENDTOK_FOR_DO={
AssignmentRhsContext=>AssignmentRhsListEndToken,
ParamListContextNoParen=>ImplicitParamListEndToken,
- UnparenedParamListLhsContext=>KwParamListEndToken,
+ UnparenedParamListLhsContext=>ImplicitParamListEndToken,
ExpectDoOrNlContext=>1,
#WhenParamListContext=>KwParamListEndToken,
#RescueSMContext=>KwParamListEndToken
}
def abort_noparens_for_do!(str='')
#assert @moretokens.empty?
result=[]
+ return result if @parsestack[-1].class==AssignmentRhsContext and
+ @parsestack[-2].class==ParamListContextNoParen and
+ @parsestack[-3].class==DefContext and
+ !@parsestack[-3].in_body
while klass=CONTEXT2ENDTOK_FOR_DO[@parsestack.last.class]
if klass==AssignmentRhsListEndToken
i=@parsestack.size
end_the_assign=false
while AssignmentRhsContext===@parsestack[i-=1]
@@ -928,42 +1058,54 @@
end
return result
end
#-----------------------------------
- def enable_macros!
- @enable_macro="macro"
+ def enable_macros! #this wholemethod should be unnecessary now
+ @enable_macro="macro" #shouldn't be needed anymore... should be safe to remove
class <<self
alias keyword_macro keyword_def
end
+ @unary_or_binary_chars.add '^'
+ @always_binary_chars.remove '^'
end
public :enable_macros!
#-----------------------------------
- @@SPACES=/[\ \t\v\f\v]/
+ @@SPACES=/[\ \t\f\v]/
@@WSTOK=/(?>
(?>\r?)\n|
(?>\r*)(?>#@@SPACES+)(?>(?:#@@SPACES|\r(?!\n))*)|
- \#(?>[^\n]*)\n|
+ \#(?>[^\n]*)(?=\n)|
\\(?>\r?)\n|
^=begin(?>(?>#@@SPACES.*)?)\n
(?>(?:(?!=end)(?>.*)\n))*
=end(?>(?>#@@SPACES.*)?)\n
)/x
@@WSTOKS=/(?!=begin)(?>#@@WSTOK+)/o
+ WSTOKS=@@WSTOKS
def divide_ws(ws0,offset)
result=[]
ws0.scan(/\G#@@WSTOK/o){|ws|
incr= $~.begin(0)
- tok=case ws
- when /\A[\#=]/; IgnoreToken.new(ws,offset+incr)
- when /\n\Z/; EscNlToken.new(ws,offset+incr,@filename,@linenum)
- else WsToken.new(ws,offset+incr)
+ lines=ws.count "\n"
+ case ws
+ when /\A\#/
+ result<< IgnoreToken.new(ws,offset+incr)
+ when /\A=/
+ tok=IgnoreToken.new(ws,offset+incr)
+ tok.startline=@linenum
+ tok.endline=@linenum+lines
+ result<<tok
+ when /\n\Z/
+ result<< EscNlToken.new(ws,offset+incr,@filename,@linenum+1)
+ else
+ result<< WsToken.new(ws,offset+incr)
end
- result << tok
- @linenum+=ws.count "\n"
+ result<< FileAndLineToken.new(@filename,@linenum+lines,offset+incr+ws.size) if lines>0
+ @linenum+=lines
}
result.each_with_index{|ws,i|
if WsToken===ws
ws.ident << result.delete_at(i+1).ident while WsToken===result[i+1]
end
@@ -990,17 +1132,17 @@
#-----------------------------------
#parse keywords now, to prevent confusion over bare symbols
#and match end with corresponding preceding def or class or whatever.
#if arg is not a keyword, the block is called
- def parse_keywords(str,offset,&block)
+ def special_identifier?(str,offset,&block)
assert @moretokens.empty?
assert !(KeywordToken===@last_operative_token and /A(\.|::|def)\Z/===@last_operative_token.ident)
- result=[KeywordToken.new(str,offset)]
- m=:"keyword_#{str}"
- respond_to?(m) ? (send m,str,offset,result,&block) : block[MethNameToken.new(str)]
+ m="keyword_#{str}"
+ return yield( MethNameToken.new(str) )unless respond_to?(m)
+ send m,str,offset,[KeywordToken.new(str,offset)],&block
end
public #these have to be public so respond_to? can see them (sigh)
def keyword_end(str,offset,result)
result.unshift(*abort_noparens!(str))
@parsestack.last.see self,:semi #sorta hacky... should make an :end event instead?
@@ -1043,10 +1185,12 @@
@localvars_stack.push SymbolTable.new
while @file.check( /\A::/ )
#VarNameToken===@moretokens.last or
#KeywordToken===@moretokens.last && @moretokens.last.ident=="::"
@file.scan(/\A(#@@WSTOKS)?(::)?(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)/o) or break
+ #should not allow newline around :: here
+
md=@file.last_match
all,ws1,dc,ws2,name=*md
if ws1
@moretokens.concat divide_ws(ws1,md.begin(1))
incr=ws1.size
@@ -1136,16 +1280,16 @@
end
end
return result
end
def keyword_def(str,offset,result) #macros too, if enabled
- result.first.has_end!
- @parsestack.push ctx=DefContext.new(@linenum)
- ctx.state=:saw_def
+ result.first.has_end!
+ @parsestack.push ctx=DefContext.new(@linenum)
+ ctx.state=:saw_def
old_moretokens=@moretokens
@moretokens=[]
- aa=@moretokens
+ #aa=@moretokens
#safe_recurse { |aa|
set_last_token KeywordToken.new(str) #hack
result.concat ignored_tokens
#read an expr like a.b.c or a::b::c
@@ -1154,32 +1298,52 @@
old_size=@parsestack.size
parencount=0
begin
tok=get1token
case tok
- when/^\($/.token_pat then parencount+=1
- when/^\)$/.token_pat then parencount-=1
+ when /^\($/.token_pat ; parencount+=1
+ when /^\)$/.token_pat ; parencount-=1
+ when EoiToken
+ @moretokens= old_moretokens.concat @moretokens
+ return result<<lexerror( tok, "eof in def header" )
end
- EoiToken===tok and lexerror tok, "eof in def header"
result << tok
end until parencount==0 #@parsestack.size==old_size
@localvars_stack.push SymbolTable.new
else #no parentheses, all tail
set_last_token KeywordToken.new(".") #hack hack
tokindex=result.size
+ tokline=result.last.endline
result << tok=symbol(false,false)
name=tok.to_s
assert !in_lvar_define_state
#maybe_local really means 'maybe local or constant'
+ @maybe_local_pat||=%r{
+ ((?!#@@LETTER_DIGIT).$) | ^[@$] | (#@VARLIKE_KEYWORDS | #@FUNCLIKE_KEYWORDS) |
+ (^#@@LCLETTER) | (^#@@UCLETTER)
+ }x
+ @maybe_local_pat === name and
+ maybe_local=
+ case
+ when $1; maybe_local=false #operator or non-ident
+ when $2; ty=KeywordToken #keyword
+ when $3; maybe_local=localvars===name #lvar or method
+ when $4; is_const=true #constant
+ else true
+ end
+ #maybe_local=ty=KeywordToken if is__ENCODING__keyword?(name) #"__ENCODING__"==name and @rubyversion>=1.9
+=begin was
maybe_local=case name
when /(?!#@@LETTER_DIGIT).$/o; #do nothing
when /^[@$]/; true
- when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS,("__ENCODING__" if @rubyversion>=1.9); ty=KeywordToken
+ when /#@VARLIKE_KEYWORDS|#@FUNCLIKE_KEYWORDS/,("__ENCODING__" if @rubyversion>=1.9); ty=KeywordToken
when /^#@@LCLETTER/o; localvars===name
when /^#@@UCLETTER/o; is_const=true #this is the right algorithm for constants...
end
+=end
+
result.push( *ignored_tokens(false,false) )
nc=nextchar
if !ty and maybe_local
if nc==?: || nc==?.
ty=VarNameToken
@@ -1193,11 +1357,17 @@
result.insert tokindex+1, newtok
end
end
assert result[tokindex].equal?(tok)
- var=assign_lvar_type! ty.new(tok.to_s,tok.offset)
+ var=ty.new(tok.to_s,tok.offset)
+ if ty==KeywordToken and name[0,2]=="__"
+ send("keyword_#{name}",name,tok.offset,[var])
+ end
+ var.endline=tokline
+
+ var=assign_lvar_type! var
@localvars_stack.push SymbolTable.new
var.in_def=true if inside_method_def? and var.respond_to? :in_def=
result[tokindex]=var
@@ -1228,19 +1398,24 @@
if endofs
result.insert end_index,ImplicitParamListEndToken.new(ofs)
else
ofs+=listend.to_s.size
end
- result.insert end_index+1,EndHeaderToken.new(ofs)
+ tok=EndHeaderToken.new(ofs)
+ tok.endline= result[end_index-1].endline #@linenum
+ result.insert end_index+1,tok
break
end
tok=get1token
result<< tok
case tok
when EoiToken
lexerror tok,'unexpected eof in def header'
+ @moretokens= old_moretokens.concat @moretokens
+ return result
+
when StillIgnoreToken
when MethNameToken ,VarNameToken # /^#@@LETTER/o.token_pat
lexerror tok,'expected . or ::' unless state==:expect_name
state=:expect_op
when /^(\.|::)$/.token_pat
@@ -1254,11 +1429,14 @@
ctx.state=:def_body
state==:expect_op or lexerror tok,'expected identifier'
if endofs
result.insert( -2,ImplicitParamListEndToken.new(tok.offset) )
end
- result.insert( -2, EndHeaderToken.new(tok.offset) )
+ ehtok= EndHeaderToken.new(tok.offset)
+ #ehtok.endline=tok.endline
+ #ehtok.endline-=1 if NewlineToken===tok
+ result.insert( -2, ehtok )
break
else
lexerror(tok, "bizarre token in def name: " +
"#{tok}:#{tok.class}")
end
@@ -1423,19 +1601,301 @@
def keyword___LINE__(str,offset,result)
result.last.value=@linenum
return result
end
+
+ #-----------------------------------
+ def encoding_name_normalize name
+ name=name.dup
+ name.downcase!
+ name.tr_s! '-_',''
+ name=ENCODING_ALIASES[name] if ENCODING_ALIASES[name]
+ return name
+ end
+
module RubyLexer1_9
+ FUNCLIKE_KEYWORDLIST=RubyLexer::FUNCLIKE_KEYWORDLIST+FUNCLIKE_KEYWORDLIST_1_9
+ VARLIKE_KEYWORDLIST=RubyLexer::VARLIKE_KEYWORDLIST+VARLIKE_KEYWORDLIST_1_9
+ FUNCLIKE_KEYWORDS=/^(?:#{FUNCLIKE_KEYWORDLIST.join '|'})$/
+ VARLIKE_KEYWORDS=/^(?:#{VARLIKE_KEYWORDLIST.join '|'})$/
+ def FUNCLIKE_KEYWORDS orig=nil
+ /(?:#{orig||super()}|^(?:#{FUNCLIKE_KEYWORDLIST_1_9.join '|'})$)/
+ end
+
+ def VARLIKE_KEYWORDS orig=nil
+ /(?:#{orig||super()}|^(?:#{VARLIKE_KEYWORDLIST_1_9.join '|'})$)/
+ end
+
+ def rubylexer_modules_init
+ super
+ @FUNCLIKE_KEYWORDS=FUNCLIKE_KEYWORDS @FUNCLIKE_KEYWORDS unless @FUNCLIKE_KEYWORDS==="->"
+ @VARLIKE_KEYWORDS=VARLIKE_KEYWORDS @VARLIKE_KEYWORDS unless @VARLIKE_KEYWORDS==="__ENCODING__"
+ end
+
+ #-----------------------------------
+ def dquote_handle(ch)
+ dquote19_esc_seq(ch,'"','"')
+ end
+ #-----------------------------------
+ def dquote_handler_name
+ :dquote19_esc_seq
+ end
+ #-----------------------------------
+ def Wquote_handler_name
+ :Wquote19_esc_seq
+ end
+
+ #-----------------------------------
+ def method_params? # .()
+ lasttok=last_token_maybe_implicit #last_operative_token
+ super or
+ (lasttok and lasttok.ident=='.')
+ end
+
+ #-----------------------------------
+ def callsite_symbol(x)
+ return if nextchar==?(
+ super
+ end
+
+ #-----------------------------------
+ def read_encoding_line
+ if line=@file.scan(
+ /\A#{WSNONLCHARS}*#[\x00-\x7F]*?(?:en)?coding#{WSNONLCHARS}*[:=]#{WSNONLCHARS}*([a-z0-9_-]+)[\x00-\x7F]*$/io
+ )
+ name=@file.last_match[1]
+ name=encoding_name_normalize name
+ @encoding=name.to_sym if ENCODINGS.include? name
+ return line
+ end
+ end
+
+ #-----------------------------------
def keyword___ENCODING__(str,offset,result)
#result.last.value=huh
return result
end
+ #-----------------------------------
def keyword_not(*args,&block) _keyword_funclike(*args,&block) end
- end
+ #-----------------------------------
+ def special_identifier?(str,oldpos)
+ if @parsestack.last.wantarrow and @file.skip ":"
+ return SymbolToken.new(str,oldpos), KeywordToken.new(":",input_position-1,:as=>"=>")
+ else
+ return super
+ end
+ end
+
+ #-----------------------------------
+ def want_hard_nl?
+ return false if @file.check( /\A\n(?:#{WSTOKS})?[.:][^.:]/o )
+ super
+ end
+
+ #-----------------------------------
+ #RE_* shamelessly stolen from jcode.rb
+ RE_UTF8= /[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]|[\xf0-\xf7][\x80-\xbf]{3}/n #longer sequences are possible
+ RE_EUC= /[\xa1-\xfe][\xa1-\xfe]/n #is this complete?
+ RE_SJIS= /[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc]/n #is this complete? windows31j?
+ ENCODING2EXTCHAR={
+ :utf8=>RE_UTF8,
+ :euc=>RE_EUC,
+ :sjis=>RE_SJIS,
+ :binary=>/[\x80-\xFF]/n,
+ :ascii=>nil
+ }
+
+ #handle ? in ruby code. is it part of ?..: or a character literal?
+ def char_literal_or_op(ch) #unicode char literals, etc
+ if colon_quote_expected? ch
+ #char literal
+ pos=input_position
+ getchar
+ extchar= ENCODING2EXTCHAR[@encoding]
+ result=
+ if extchar and extchar=@file.scan( extchar )
+ assign_encoding!(StringToken.new('"', extchar))
+ else
+ getchar_maybe_escape
+ assign_encoding!(StringToken.new('"', @file[pos+1...input_position]))
+ end
+ result.offset=pos
+ result.bs_handler=:dquote19_esc_seq
+ result.open='?'
+ result.close=''
+ return result
+ else #(ternary) operator
+ super
+ end
+ end
+
+ #-----------------------------------
+ def plusminus(ch) #->
+ pos=input_position
+ assert(/^[+\-]$/===ch)
+ if unary_op_expected?(ch) or
+ KeywordToken===@last_operative_token &&
+ /^(return|break|next)$/===@last_operative_token.ident
+ if '->' == readahead(2) #stabby proc
+ @file.pos+=2
+ #push down block context
+ localvars.start_block
+ @parsestack.push ctx=RubyLexer::BlockContext.new(@linenum)
+ ctx.wanting_stabby_block_body=true
+ #read optional proc params
+ block_param_list_lookahead ?(, RubyLexer::ParenedParamListLhsContext
+ result=RubyLexer::KeywordToken.new('->',pos)
+ result.offset=pos
+ return result
+ end
+ end
+ super
+ end
+
+ #-----------------------------------
+ #match /=(>|~|==?)?/ (= or == or =~ or === or =>)
+ def equals(ch) # /(?<foo>bar)/=~'bar'; declares foo lvar
+ if readahead(2)=='=~' # =~... after regex, maybe?
+ last=last_operative_token
+
+ if StringToken===last and last.lvars
+ #ruby delays adding lvars from regexps to known lvars table
+ #for several tokens in some cases. not sure why or if on purpose
+ #i'm just going to add them right away
+ last.lvars.each{|lvar| localvars[lvar]=true }
+ end
+ end
+ return super
+ end
+
+ #-----------------------------------
+ def assign_encoding! str
+ #search for nonascii bytes
+ #either directly or via hex (\xXX) or octal (\NNN) escapes
+ #and \u escapes also
+ utf8=nonascii=false
+ str.elems.grep(String).each do|frag|
+ frag.scan(/#{EVEN_BS_S}(?:\\u|\\2[0-7][0-7]|\\x[89a-fA-F][0-9a-fA-F])|[^\x00-\x7F]/o) do |match|
+ if match[-1]==?u
+ utf8=true
+ break if nonascii
+ else
+ nonascii=true
+ break if utf8
+ end
+ end or break
+ end
+
+ lexerror(str,"utf8 and nonascii intermixed") if utf8 and nonascii and @encoding!=:utf8
+
+ #encoding is source encoding unless \u escape is found
+ str.utf8! if utf8
+
+ #maybe assign string fragments encodings if running under >=1.9?
+
+ return str
+ end
+
+ #-----------------------------------
+ def regex(ch=nil)
+ result=super
+ named_brs=[]
+ if result.elems.size==1 and String===result.elems.first
+ elem=result.elems.first
+ index=0
+ while index=elem.index(/(#{EVEN_BS_S})( \(\?[<'] | \(\?\# | \[ )/xo,index)
+ index+=$1.size
+ case $2
+ when "(?<"
+ index=elem.index(/\G...(#{LCLETTER}#{LETTER_DIGIT}+)>/o,index)
+ break lexerror(result, "malformed named backreference") unless index
+ index+=$&.size
+ named_brs<<$1
+ when "(?'"
+ index=elem.index(/\G...(#{LCLETTER}#{LETTER_DIGIT}+)'/o,index)
+ break lexerror(result, "malformed named backreference") unless index
+ index+=$&.size
+ named_brs<<$1
+ when "(?#"
+ index+=3
+ index=elem.index(/#{EVEN_BS_S}\)/o,index)
+ break lexerror(result, "unterminated regexp comment") unless index
+ index+=$&.size
+ when "["
+ index+=1
+ paren_ctr=1
+ loop do
+ index=elem.index(/#{EVEN_BS_S}(&&\[\^|\])/o,index)
+ break lexerror(result, "unterminated character class") unless index
+ index+=$&.size
+ if $1==']'
+ paren_ctr-=1
+ break if paren_ctr==0
+ else
+ paren_ctr+=1
+ end
+ end
+ break unless index
+
+ end
+ end
+ result.lvars= named_brs unless named_brs.empty?
+ end
+ return result
+ end
+
+ def build_method_operators
+ /#{RUBYSYMOPERATORREX}|\A![=~@]?|\A`/o
+ end
+
+ include RubyLexer::NestedContexts
+
+ def semicolon_in_block_param_list?
+ ParenedParamListLhsContext===@parsestack.last ||
+ BlockParamListLhsContext===@parsestack.last
+ end
+
+ def is__ENCODING__keyword?(name)
+ "__ENCODING__"==name
+ end
+
+ #-----------------------------------
+ def colon_operator tok
+ if TernaryContext===@parsestack.last
+ tok.ternary=true
+ @parsestack.pop #should be in the context's see handler
+ end
+ end
+
+ def maybe_end_stabby_block_param_list(tokch)
+ stabby_params_just_ended=false
+ (@parsestack.size-1).downto(1){|i|
+ case @parsestack[i]
+ when ParamListContextNoParen,AssignmentRhsContext
+ #do nothing yet... see if inside a UnparenedParamListLhsContext
+ when UnparenedParamListLhsContext #stabby proc
+ @moretokens<<tokch
+ (@parsestack.size-1).downto(i){|j|
+ @moretokens.unshift @parsestack[j].endtoken(input_position-1)
+ }
+ @parsestack[i..-1]=[]
+ tokch=@moretokens.shift
+ stabby_params_just_ended=true
+ break
+ else break
+ end
+ }
+ return stabby_params_just_ended,tokch
+ end
+ end #module RubyLexer1_9
+
+ def semicolon_in_block_param_list?; end
+ def is__ENCODING__keyword?(name); end
+
def _keyword_funclike(str,offset,result)
if @last_operative_token===/^(\.|::)$/
result=yield MethNameToken.new(str) #should pass a methname token here
else
tok=KeywordToken.new(str)
@@ -1490,11 +1950,11 @@
#-----------------------------------
def block_param_list_lookahead starter=?|, ctx_type=BlockParamListLhsContext
safe_recurse{ |la|
- set_last_token KeywordToken.new( ';' )
+ set_last_token KeywordToken.new( ';' )
a=ignored_tokens
if eat_next_if(starter)
mycontext=ctx_type.new(@linenum)
a<< KeywordToken.new(mycontext.starter, input_position-1)
@@ -1538,11 +1998,11 @@
end
end
elsif starter==?(
ctx_type=UnparenedParamListLhsContext #hacky... should be a param?
@parsestack.push ctx_type.new(@linenum)
- a<<KwParamListStartToken.new( input_position )
+ a<<ImplicitParamListStartToken.new( input_position )
end
set_last_token KeywordToken.new( ';' )
#a.concat ignored_tokens
@@ -1620,11 +2080,11 @@
def method_parameters(result,normal_comma_level,endingblock,old_parsestack_size)
listend=nil
set_last_token KeywordToken.new( ',' )#hack
nextvar=nil
loop do
- expect_name=(@last_operative_token===',' and
+ expect_name=(/^[,;]$/===@last_operative_token.ident and
normal_comma_level==@parsestack.size)
expect_name and @defining_lvar||=true
result << tok=get1token
break lexerror(tok, "unexpected eof in def header") if EoiToken===tok
@@ -1695,11 +2155,11 @@
assert('*&'[ch])
want_unary=unary_op_expected?(ch) ||
(@last_operative_token===/^(return|next|break)$/ and KeywordToken===@last_operative_token)
result=quadriop(ch)
if want_unary
- #readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
+ #readahead(2)[1..1][/[#@@WSCHARS#\\]/o] or #not needed?
assert OperatorToken===result
result.tag=:unary #result should distinguish unary+binary *&
WHSPLF[nextchar.chr] or
@moretokens << NoWsToken.new(input_position)
cill=comma_in_lvalue_list?
@@ -1722,17 +2182,19 @@
#-----------------------------------
#handle ? in ruby code. is it part of ?..: or a character literal?
def char_literal_or_op(ch)
if colon_quote_expected? ch
getchar
- if @rubyversion >= 1.9
- StringToken.new getchar_maybe_escape
- else
+# if @rubyversion >= 1.9
+# assign_encoding! StringToken.new getchar_maybe_escape
+# else
ch=getchar_maybe_escape[0]
ch=ch.ord if ch.respond_to? :ord
- NumberToken.new ch
- end
+ result=NumberToken.new ch
+ result.char_literal=true
+ return result
+# end
else
@parsestack.push TernaryContext.new(@linenum)
KeywordToken.new getchar #operator
end
end
@@ -1745,11 +2207,11 @@
@parsestack.pop
op=true
end
if !op and after_nonid_op?{
- !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/[\s\v=]}]
+ !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/[#@@WSCHARS=]}o]
} || (KeywordToken===@last_token_maybe_implicit and @last_token_maybe_implicit.ident=="(")
return regex(ch)
else #/ is operator
result=getchar
if eat_next_if(?=)
@@ -1770,20 +2232,20 @@
s=tok.to_s
case s
when /^[@$]/; true
when /^<</; HerePlaceholderToken===tok
when /(?!#@@LETTER_DIGIT).$/o; false
-# when /^#@@LCLETTER/o; localvars===s or VARLIKE_KEYWORDS===s
+# when /^#@@LCLETTER/o; localvars===s or @VARLIKE_KEYWORDS===s
when /^#@@LETTER/o; VarNameToken===tok
else raise "not var or method name: #{s}"
end
end
#-----------------------------------
def colon_quote_expected?(ch) #yukko hack
assert ':?'[ch]
- readahead(2)[/^(\?[^#{WHSPLF}]|:[^\s\r\n\t\f\v :])$/o] or return false
+ readahead(2)[/^(\?[^#{WHSPLF}]|:[^#@@WSCHARS :])$/o] or return false
after_nonid_op? {
#possible func-call as operator
not is_var_name? and
@@ -1802,68 +2264,70 @@
qe= colon_quote_expected?(ch)
lastchar=prevchar
eat_next_if(ch[0]) or raise "needed: "+ch
- if nextchar==?( and @enable_macro
+ if nextchar==?( and @enable_macro #factored
result= OperatorToken.new(':', startpos)
result.unary=true
return result
end
#handle quoted symbols like :"foobar", :"[]"
- qe and return symbol(':')
+ if qe
+ return symbol(':')
+ elsif eat_next_if(?:)
+ #we definately found a ::
- #look for another colon; return single : if not found
- unless eat_next_if(?:)
+ colon2=KeywordToken.new( '::',startpos)
+ lasttok=@last_operative_token
+ assert !(String===lasttok)
+ if (VarNameToken===lasttok or MethNameToken===lasttok) and
+ lasttok===/^(?:[$@]|#@@LETTER)/o and !WHSPCHARS[lastchar]
+ then
+ @moretokens << colon2
+ result= NoWsToken.new(startpos)
+ else
+ result=colon2
+ end
+ dot_rhs(colon2)
+ return result
+
+ #return single : token
+ else
#cancel implicit contexts...
- @moretokens.push(*abort_noparens!(':'))
+ @moretokens.push(*abort_noparens!(':')) #special treatment not needed in 1.9 mode?
@moretokens.push tok=KeywordToken.new(':',startpos)
- case @parsestack.last
- when TernaryContext
- tok.ternary=true
- @parsestack.pop #should be in the context's see handler
- when ExpectDoOrNlContext #should be in the context's see handler
- if @rubyversion<1.9
- @parsestack.pop
- assert @parsestack.last.starter[/^(while|until|for)$/]
- tok.as=";"
- end
- when ExpectThenOrNlContext,WhenParamListContext
- if @rubyversion<1.9
- #should be in the context's see handler
- @parsestack.pop
- tok.as="then"
- end
- when RescueSMContext
- tok.as=";"
- end or
+ colon_operator(tok) or
fail ": not expected in #{@parsestack.last.class}->#{@parsestack.last.starter}"
-
#end ternary context, if any
@parsestack.last.see self,:colon
return @moretokens.shift
end
- #we definately found a ::
+ end
- colon2=KeywordToken.new( '::',startpos)
- lasttok=@last_operative_token
- assert !(String===lasttok)
- if (VarNameToken===lasttok or MethNameToken===lasttok) and
- lasttok===/^(?:[$@]|#@@LETTER)/o and !WHSPCHARS[lastchar]
- then
- @moretokens << colon2
- result= NoWsToken.new(startpos)
- else
- result=colon2
- end
- dot_rhs(colon2)
- return result
+ #-----------------------------------
+ def colon_operator tok
+ case @parsestack.last
+ when TernaryContext
+ tok.ternary=true
+ @parsestack.pop #should be in the context's see handler
+ when ExpectDoOrNlContext #should be in the context's see handler
+ @parsestack.pop
+ assert @parsestack.last.starter[/^(while|until|for)$/]
+ tok.as=";"
+ when ExpectThenOrNlContext,WhenParamListContext
+ #should be in the context's see handler
+ @parsestack.pop
+ tok.as="then"
+ when RescueSMContext
+ tok.as=";"
+ end
end
#-----------------------------------
def symbol(notbare,couldbecallsite=!notbare)
assert !couldbecallsite
@@ -1881,17 +2345,18 @@
double_quote('"')
when ?' #'
assert notbare
open=":'"; close="'"
single_quote("'")
- when ?` then read(1) #`
+# when ?` then read(1) #`
when ?@ then at_identifier.to_s
when ?$ then dollar_identifier.to_s
when ?_,?a..?z,NONASCII then identifier_as_string(?:)
when ?A..?Z then
result=identifier_as_string(?:)
if @last_operative_token==='::'
+ fail #i think this can't happen anymore now
assert klass==MethNameToken
/#@@LETTER_DIGIT$/o===result and klass=VarNameToken
end
result
else
@@ -1917,17 +2382,17 @@
#look for operators
opmatches=readahead(3)[@method_operators]
return [read(opmatches.size), start] if opmatches
case nc=nextchar
- when ?` #`
- return [read(1),start]
+# when ?` #`
+# return [read(1),start]
when ?_,?a..?z,?A..?Z,NONASCII
context=merge_assignment_op_in_setter_callsites? ? ?: : nc
return [identifier_as_string(context), start]
when ?(
- return [nil,start] if @enable_macro or @rubyversion>=1.9
+ return [nil,start] if @enable_macro or @rubyversion>=1.9 #factored
end
set_last_token KeywordToken.new(';')
lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}")
return [nil, start]
@@ -1940,20 +2405,21 @@
dash=eat_next_if(?-)
quote=eat_next_if( /['"`]/)
if quote
ender=til_charset(/[#{quote}]/)
(quote==getchar) or
- return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc")
+ return lexerror(res=HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc")
quote_real=true
else
quote='"'
ender=@file.scan(/#@@LETTER_DIGIT+/o)
ender.length >= 1 or
- return lexerror(HerePlaceholderToken.new( dash, quote, ender, nil ), "invalid here header")
+ return lexerror(res=HerePlaceholderToken.new( dash, quote, ender, nil ), "invalid here header")
end
res= HerePlaceholderToken.new( dash, quote, ender, quote_real )
+ res.line=linenum
if true
res.open=["<<",dash,quote,ender,quote].join
procrastinated=til_charset(/[\n]/)#+readnl
unless @base_file
@base_file=@file
@@ -1980,18 +2446,19 @@
#one or two already read characters are overwritten here,
#in order to keep offsets correct in the long term
#(at present, offsets and line numbers between
#here header and its body will be wrong. but they should re-sync thereafter.)
- newpos=input_position_raw-nl.size
+ newpos=input_position_raw
#unless procrastinated.empty?
- @file.modify(newpos,nl.size,procrastinated+nl) #vomit procrastinated text back onto input
+ @file.modify(newpos,0,procrastinated) #vomit procrastinated text back onto input
#end
+ #@offset_adjust2=-1 #nice idea, but crashes 1.9.2 and causes more warnings than it fixes... :(
input_position_set newpos
#line numbers would be wrong within the procrastinated section
- @linenum-=1
+ @linenum=res.line #was: @linenum-=1
#be nice to get the here body token at the right place in input, too...
@pending_here_bodies<< body
@offset_adjust-=bodysize#+nl.size
@@ -2036,10 +2503,12 @@
#the action continues in newline, where
#the rest of the here token is read after a
#newline has been seen and res.affix is eventually called
end
+ ensure
+ assign_encoding!(res.string) if res
end
#-----------------------------------
def lessthan(ch) #match quadriop('<') or here doc or spaceship op
case readahead(3)
@@ -2071,17 +2540,17 @@
if @base_file and indices=@file.instance_eval{@start_pos} and
(indices[-2]..indices[-1])===@file.pos
@base_file.pos=@file.pos
@file=@base_file
@base_file=nil
- result="\n"
+# result="\n"
end
@offset_adjust=@min_offset_adjust
@moretokens.push( *optional_here_bodies )
ln=@linenum
- @moretokens.push lexerror(EscNlToken.new(result,input_position-result.size,@filename,ln-1), error),
+ @moretokens.push lexerror(EscNlToken.new(result,input_position-result.size,@filename,ln), error),
FileAndLineToken.new(@filename,ln,input_position)
start_of_line_directives
return @moretokens.shift
@@ -2089,21 +2558,24 @@
#-----------------------------------
def optional_here_bodies
result=[]
if true
- #handle here bodies queued up by previous line
- pos=input_position
- while body=@pending_here_bodies.shift
+ #handle here bodies queued up by previous line
+ pos=input_position
+ while body=@pending_here_bodies.shift
#body.offset=pos
- result.push EscNlToken.new("\n",body.offset-1,@filename,nil)
- result.push FileAndLineToken.new(@filename,body.ident.line,body.offset)
+ result.push EscNlToken.new("\n",body.offset-1,@filename,@linenum)
+ result.push FileAndLineToken.new(@filename,@linenum,body.offset)
result.push body
#result.push NoWsToken.new @pending_here_bodies.empty? ? input_position : @pending_here_bodies.first
#result.push FileAndLineToken.new(@filename,@linenum,pos) #position and line num are off
- body.headtok.line=@linenum-1
- end
+ @linenum+=body.linecount
+ body.endline=@linenum-1
+ # body.startline=@linenum-1-body.linecount
+ end
+
else
#...(we should be more compatible with dos/mac style newlines...)
while tofill=@incomplete_here_tokens.shift
result.push(
here_body(tofill),
@@ -2120,10 +2592,11 @@
def here_body(tofill)
close="\n"
tofill.string.offset= input_position
linecount=1 #for terminator
assert("\n"==prevchar)
+ startline=@linenum
loop {
assert("\n"==prevchar)
#here body terminator?
oldpos= input_position_raw
@@ -2135,12 +2608,14 @@
lexerror tofill.string, "unterminated here body"
break
end
if read(tofill.ender.size)==tofill.ender
crs=til_charset(/[^\r]/)||''
- if nl=readnl
- close+=tofill.ender+crs+nl
+ nl=nextchar
+ if !nl or nl==?\n
+ close+=tofill.ender+crs
+ close+="\n" if nl
break
end
end
input_position_set oldpos
@@ -2221,35 +2696,39 @@
result=tofill.bodyclass.new(tofill,linecount)
result.open=str.open=""
tofill.close=close
result.close=str.close=close[1..-1]
result.offset=str.offset
+ result.endline=@linenum-1
+ result.startline=startline
assert str.open
assert str.close
return result
end
#-----------------------------------
+ def want_hard_nl?
+ NewlineToken===@last_operative_token || #hack
+ (KeywordToken===@last_operative_token and
+ @last_operative_token.ident=="rescue" and
+ !@last_operative_token.infix?) ||
+ !after_nonid_op?{false}
+ end
+
+ #-----------------------------------
def newline(ch)
assert("\r\n"[nextchar.chr])
#ordinary newline handling (possibly implicitly escaped)
assert("\r\n"[nextchar.chr])
assert !@parsestack.empty?
assert @moretokens.empty?
- pre=FileAndLineToken.new(@filename,@linenum+1,input_position)
- pre.allow_ooo_offset=true
+ hard=want_hard_nl?
- hard=NewlineToken===@last_operative_token || #hack
- (KeywordToken===@last_operative_token and
- @last_operative_token.ident=="rescue" and
- !@last_operative_token.infix?) ||
- !after_nonid_op?{false}
+# hard=false if @rubyversion>=1.9 and @file.check( /\A\n(?:#@@WSTOKS)?[.:][^.:]/o )
- hard=false if @rubyversion>=1.9 and @file.check( /\A\n(?:#@@WSTOKS)?[.:][^.:]/o )
-
if hard
@offset_adjust=@min_offset_adjust
a= abort_noparens!
case @parsestack.last #these should be in the see:semi handler
when ExpectDoOrNlContext; @parsestack.pop
@@ -2257,17 +2736,19 @@
end
assert !@parsestack.empty?
@parsestack.last.see self,:semi
a << rulexer_newline(ch)
- @moretokens.replace a+@moretokens
+ a+=@moretokens
+ @moretokens.replace a
else
@offset_adjust=@min_offset_adjust
offset= input_position
nl=readnl
- @moretokens.push EscNlToken.new(nl,offset,@filename,@linenum-1),
- FileAndLineToken.new(@filename,@linenum,input_position)
+ a=[EscNlToken.new(nl,offset,@filename,@linenum),
+ FileAndLineToken.new(@filename,@linenum,input_position)]
+ @moretokens.push( *a )
end
#optimization: when thru with regurgitated text from a here document,
#revert back to original unadorned Sequence instead of staying in the list.
if @base_file and indices=@file.instance_eval{@start_pos} and
@@ -2282,27 +2763,17 @@
@offset_adjust=@min_offset_adjust
@moretokens.unshift(*optional_here_bodies)
- #adjust line count in fal to account for newlines in here bodys
- i=@moretokens.size-1
- while(i>=0)
- #assert FileAndLineToken===@moretokens[i]
- i-=1 if FileAndLineToken===@moretokens[i]
- break unless HereBodyToken===@moretokens[i]
- pre_fal=true
- fal.line-=@moretokens[i].linecount
+ #adjust line #s to account for newlines in here bodys
+ l=@linenum
+ a.reverse_each{|implicit|
+ implicit.endline=l
+ l-=1 if EscNlToken===implicit or NewlineToken===implicit
+ }
- i-=1
- end
-
- if pre_fal
- result=@moretokens.first
- pre.offset=result.offset
- @moretokens.unshift pre
- end
start_of_line_directives
result=@moretokens.shift
return result
end
@@ -2315,10 +2786,11 @@
ENDMARKER=/^__END__[\r\n]?\Z/
ENDMARKERLENGTH=8
def start_of_line_directives
#handle =begin...=end (at start of a line)
while EQBEGIN===readahead(EQBEGINLENGTH)
+ startline=@linenum
startpos= input_position
more= read(EQBEGINLENGTH-1) #get =begin
begin
eof? and raise "eof before =end"
@@ -2335,12 +2807,14 @@
# newls= more.scan(/\r\n?|\n\r?/)
# @linenum+= newls.size
#inject the fresh comment into future token results
- @moretokens.push IgnoreToken.new(more,startpos),
- FileAndLineToken.new(@filename,@linenum,input_position)
+ comment=IgnoreToken.new(more,startpos)
+ comment.startline=startline
+ comment.endline=@linenum
+ @moretokens.push comment, FileAndLineToken.new(@filename,@linenum,input_position)
end
#handle __END__
if ENDMARKER===readahead(ENDMARKERLENGTH)
assert !(ImplicitContext===@parsestack.last)
@@ -2351,16 +2825,18 @@
#-----------------------------------
#used to resolve the ambiguity of
- # unary ops (+, -, *, &, ~ !) in ruby
+ # unary ops (+, -, *, &, (and ^ if macros enabled) ) in ruby
#returns whether current token is to be the start of a literal
IDBEGINCHAR=/^(?:#@@LETTER|[$@])/o
def unary_op_expected?(ch) #yukko hack
- '*&='[readahead(2)[1..1]] and return false
+ #not unary if its anything followed by = or &/* followed by themselves
+ return false if /^(?:.=|([&*])\1)$/===readahead(2)
+
return true if KeywordToken===@last_operative_token and @last_operative_token==='for'
after_nonid_op? {
#possible func-call as operator
@@ -2395,25 +2871,25 @@
#returns what block yields if last token was a method name.
#used to resolve the ambiguity of
# <<, %, /, ?, :, and newline (among others) in ruby
def after_nonid_op?
- #this is how it should be, I think, and then no handlers for methnametoken and FUNCLIKE_KEYWORDS are needed
+ #this is how it should be, I think, and then no handlers for methnametoken and @FUNCLIKE_KEYWORDS are needed
# if ImplicitParamListStartToken===@last_token_including_implicit
# huh return true
# end
case @last_operative_token
- when VarNameToken , MethNameToken, FUNCLIKE_KEYWORDS.token_pat
+ when VarNameToken , MethNameToken, @FUNCLIKE_KEYWORDS.token_pat
#VarNameToken should really be left out of this case...
#should be in next branch instread
#callers all check for last token being not a variable if they pass anything
#but {false} in the block
#(hmmm... some now have true or other non-varname checks in them... could these be bugs?)
return yield
when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
%r{^(
- end|self|true|false|nil|->|
+ end|self|true|false|nil|
__FILE__|__LINE__|__ENCODING__|[\})\]]
)$}x.token_pat
#dunno about def/undef
#maybe class/module shouldn't he here either?
#for is also in NewlineToken branch, below.
@@ -2423,11 +2899,11 @@
#regexs above must match whole string
#assert(@last_operative_token==$&) #disabled 'cause $& is now always nil :(
return true if OperatorToken===@last_operative_token || KeywordToken===@last_operative_token
when NewlineToken, nil, #nil means we're still at beginning of file
/^([({\[]|or|not|and|if|unless|then|elsif|else|class|module|def|
- while|until|begin|for|in|case|when|ensure|defined\?)$
+ while|until|begin|for|in|case|when|ensure|defined\?|->)$
/x.token_pat
return true
when KeywordToken
return true if /^(alias|undef)$/===@last_operative_token.ident #is this ever actually true???
when IgnoreToken
@@ -2476,11 +2952,11 @@
end
#-----------------------------------
def caret(ch) #match /^=?/ (^ or ^=) (maybe unary ^ too)
- if @enable_macro and (@last_token_maybe_implicit and
+ if @enable_macro and (@last_token_maybe_implicit and #factored
@last_token_maybe_implicit.ident=='(') || unary_op_expected?(ch)
result=OperatorToken.new(read(1),input_position)
result.unary=true
result
else
@@ -2531,19 +3007,19 @@
if unary_op_expected?(ch) or
KeywordToken===@last_operative_token &&
/^(return|break|next)$/===@last_operative_token.ident
if (?0..?9)===readahead(2)[1]
result= number(ch)
- elsif @rubyversion>=1.9 and '->' == readahead(2) #stabby proc
- @file.pos+=2
- #push down block context
- localvars.start_block
- @parsestack.push ctx=BlockContext.new(@linenum)
- ctx.wanting_stabby_block_body=true
- #read optional proc params
- block_param_list_lookahead ?(, ParenedParamListLhsContext
- result=KeywordToken.new('->',pos)
+# elsif @rubyversion>=1.9 and '->' == readahead(2) #stabby proc
+# @file.pos+=2
+# #push down block context
+# localvars.start_block
+# @parsestack.push ctx=BlockContext.new(@linenum)
+# ctx.wanting_stabby_block_body=true
+# #read optional proc params
+# block_param_list_lookahead ?(, ParenedParamListLhsContext
+# result=KeywordToken.new('->',pos)
else #unary operator
result=getchar
WHSPLF[nextchar.chr] or
@moretokens << NoWsToken.new(input_position)
@@ -2579,24 +3055,24 @@
@moretokens.unshift result
@moretokens.unshift( *abort_noparens!("=>"))
result=@moretokens.shift
end
@parsestack.last.see self,:arrow
- when '~' # =~... after regex, maybe?
- last=last_operative_token
-
- if @rubyversion>=1.9 and StringToken===last and last.lvars
- #ruby delays adding lvars from regexps to known lvars table
- #for several tokens in some cases. not sure why or if on purpose
- #i'm just going to add them right away
- last.lvars.each{|lvar| localvars[lvar]=true }
- end
+# when '~' # =~... after regex, maybe?
+# last=last_operative_token
+#
+# if @rubyversion>=1.9 and StringToken===last and last.lvars
+# #ruby delays adding lvars from regexps to known lvars table
+# #for several tokens in some cases. not sure why or if on purpose
+# #i'm just going to add them right away
+# last.lvars.each{|lvar| localvars[lvar]=true }
+# end
when '' #plain assignment: record local variable definitions
last_context_not_implicit.lhs=false
@last_operative_token=result
@moretokens.push( *ignored_tokens(true).map{|x|
- NewlineToken===x ? EscNlToken.new(x.ident,x.offset,@filename,@linenum) : x
+ NewlineToken===x ? EscNlToken.new(x.ident,x.offset,x.filename,x.linenum) : x
} )
@parsestack.push AssignmentRhsContext.new(@linenum)
@moretokens.push AssignmentRhsListStartToken.new( input_position)
if eat_next_if ?*
tok=OperatorToken.new('*', input_position-1)
@@ -2619,41 +3095,43 @@
result+=k
elsif eof? or WHSPLF[nextchar.chr] #do nothing
else
@moretokens << NoWsToken.new(input_position)
end
- ty= @rubyversion>=1.9 ? OperatorToken : KeywordToken
+ ty=OperatorToken
result=ty.new(result, input_position-result.size)
result.unary=!k #result should distinguish unary !
return result
end
-
#-----------------------------------
def dot(ch)
str=''
eat_next_if(?.) or raise "lexer confusion"
#three lumps of sugar or two?
eat_next_if(?.) and
- return KeywordToken.new(eat_next_if(?.)? "..." : "..")
+ return OperatorToken.new(eat_next_if(?.)? "..." : "..")
#else saw just single .
#match a valid ruby id after the dot
- result= KeywordToken.new( ".")
+ result= KeywordToken.new( "." )
dot_rhs(result)
return result
end
#-----------------------------------
def dot_rhs(prevtok)
safe_recurse { |a|
set_last_token prevtok
aa= ignored_tokens
was=after_nonid_op?{true}
tok,pos=callsite_symbol(prevtok)
- tok and aa.push(*var_or_meth_name(tok,prevtok,pos,was))
+ if tok
+ toks=var_or_meth_name(tok,prevtok,pos,was)
+ aa.push(*toks)
+ end
a.unshift(*aa)
}
end
#-----------------------------------
@@ -2690,12 +3168,24 @@
#}
return IgnoreToken.new(result)
end
end
+
#-----------------------------------
+ def method_params?
+ lasttok=last_token_maybe_implicit #last_operative_token
+ VarNameToken===lasttok or
+ MethNameToken===lasttok or
+ lasttok===@FUNCLIKE_KEYWORDS or
+ (@enable_macro and lasttok and lasttok.ident==')') #factored
+ end
+
+ #-----------------------------------
def open_brace(ch)
+ #there are 3 distinct cases here; this method should be divided in 3
+
assert((ch!='[' or !want_op_name))
assert(@moretokens.empty?)
lastchar=prevchar
ch=eat_next_if(/[({\[]/)or raise "lexer confusion"
tokch=KeywordToken.new(ch, input_position-1)
@@ -2705,30 +3195,23 @@
case tokch.ident
when '['
# in contexts expecting an (operator) method name, we
# would want to match [] or []= at this point
#but control never comes this way in those cases... goes
- #to custom parsers for alias, undef, and def in #parse_keywords
+ #to custom parsers for alias, undef, and def in #special_identifier?
tokch.set_infix! unless after_nonid_op?{WHSPLF[lastchar]}
@parsestack.push ListImmedContext.new(ch,@linenum)
lasttok=last_operative_token
#could be: lasttok===/^#@@LETTER/o
if (VarNameToken===lasttok or ImplicitParamListEndToken===lasttok or
- MethNameToken===lasttok or lasttok===FUNCLIKE_KEYWORDS) and !WHSPCHARS[lastchar]
+ MethNameToken===lasttok or lasttok===@FUNCLIKE_KEYWORDS) and !WHSPCHARS[lastchar]
@moretokens << (tokch)
tokch= NoWsToken.new(input_position-1)
end
when '('
- lasttok=last_token_maybe_implicit #last_operative_token
#could be: lasttok===/^#@@LETTER/o
- method_params= (
- VarNameToken===lasttok or
- MethNameToken===lasttok or
- lasttok===FUNCLIKE_KEYWORDS or
- (@enable_macro and lasttok and lasttok.ident==')')
- )
- if method_params
+ if method_params?
unless WHSPCHARS[lastchar]
@moretokens << tokch
tokch= NoWsToken.new(input_position-1)
end
@parsestack.push ParamListContext.new(@linenum)
@@ -2751,17 +3234,23 @@
when '{'
#check if we are in a hash literal or string inclusion (#{}),
#in which case below would be bad.
if !(UnparenedParamListLhsContext===@parsestack.last) and
after_nonid_op?{false} || @last_operative_token.has_no_block?
- @parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
+ if @file.readbehind(2)=='#{'
+ @parsestack.push StringInclusionContext.new(@linenum)
+ else
+ @parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
+ end
else
#abort_noparens!
tokch.set_infix!
tokch.as="do"
#if (perhaps deep) inside a stabby block param list context, end it
+ stabby_params_just_ended,tokch=maybe_end_stabby_block_param_list(tokch)
+=begin was
if @rubyversion>=1.9
stabby_params_just_ended=false
(@parsestack.size-1).downto(1){|i|
case @parsestack[i]
when ParamListContextNoParen,AssignmentRhsContext
@@ -2777,10 +3266,11 @@
break
else break
end
}
end
+=end
# 'need to find matching callsite context and end it if implicit'
lasttok=last_operative_token
if !(lasttok===')' and lasttok.callsite?) and !stabby_params_just_ended #or ParamListContextNoParen===parsestack.last
@moretokens.push( *(abort_1_noparen!(1).push tokch) )
@@ -2798,10 +3288,15 @@
end
return (tokch)
end
#-----------------------------------
+ def maybe_end_stabby_block_param_list(tokch)
+ return false,tokch
+ end
+
+ #-----------------------------------
def close_brace(ch)
ch==eat_next_if(/[)}\]]/) or raise "lexer confusion"
@moretokens.concat abort_noparens!(ch)
@parsestack.last.see self,:semi #hack
@moretokens<< kw=KeywordToken.new( ch, input_position-1)
@@ -2844,16 +3339,24 @@
return(endoffile_detected result)
end
#-----------------------------------
def endoffile_detected(s='')
- @moretokens.push( *(abort_noparens!.push rulexer_endoffile_detected(s)))
+ @linenum+=1 #optional_here_bodies expects to be called after a newline was seen and @linenum bumped
+ #in this case, there is no newline, but we need to pretend there is. otherwise optional_here_bodies
+ #makes tokens with wrong line numbers
+
+ @moretokens.concat optional_here_bodies
+ @linenum-=1 #now put it back
+ @moretokens.concat abort_noparens!
+ @moretokens.push rulexer_endoffile_detected(s)
if @progress_thread
@progress_thread.kill
@progress_thread=nil
end
result= @moretokens.shift
+ assert @pending_here_bodies.empty?
balanced_braces? or (lexerror result,"unbalanced braces at eof. parsestack=#{@parsestack.inspect}")
result
end
#-----------------------------------
@@ -2877,12 +3380,15 @@
# AssignmentRhsContext
#]===@parsestack
while AssignmentRhsContext===@parsestack[-1]
pop=
case @parsestack[-2]
- when ParamListContext,ParamListContextNoParen,WhenParamListContext,
- ListImmedContext,AssignmentRhsContext; true
+ when ParamListContext,ParamListContextNoParen,
+ WhenParamListContext,ListImmedContext,AssignmentRhsContext,
+ ParenedParamListLhsContext,UnparenedParamListLhsContext,
+ BlockParamListLhsContext,KnownNestedLhsParenContext
+ true
when RescueSMContext; @parsestack[-2].state==:rescue
when DefContext; !@parsestack[-2].in_body and !@parsestack[-2].has_parens?
else false
end
break unless pop
@@ -2902,11 +3408,11 @@
end
#-----------------------------------
def semicolon(ch)
assert @moretokens.empty?
- @moretokens.push(*abort_noparens!)
+ @moretokens.push(*abort_noparens!(';',0))
@parsestack.last.see self,:semi
case @parsestack.last #should be in context's see:semi handler
when ExpectThenOrNlContext
@parsestack.pop
when ExpectDoOrNlContext
@@ -2930,11 +3436,56 @@
end
#-----------------------------------
#tokenify_results_of :identifier
save_offsets_in(*CHARMAPPINGS.values.uniq-[
- :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote,:caret,:plusminus
+ :symbol_or_op,:open_brace,:whitespace,:exclam,:caret,:plusminus
])
+ save_linenums_in :symbol_or_op,:open_brace,:whitespace,:exclam,:caret,:plusminus
#save_offsets_in :symbol
end
+
+#defense against my class being redefined by a a certain other project,
+module Kernel
+ eval %w[require load].map{|name| <<-END }.join
+ #{name}__without_rubylexer_protection=instance_method :#{name}
+ define_method(:#{name}) do |file|
+ if /\\Aruby_(lexer|parser)(\\.rb)?\\z/i===File.basename(file)
+ warn "Uh-oh, you're trying to use ruby_parser and rubylexer at the same time."
+ warn "ruby_parser causes a namespace conflict with rubylexer"
+ warn "because ruby_parser redefines the class RubyLexer"
+ warn "in a way which is incompatible with standard RubyLexer."
+ warn "The rubylexer gem owns the namespace ::RubyLexer,"
+ warn "and claimed it at least 2 years before ruby_parser existed."
+ warn "Attempt to redefine RubyLexer in an incompatible way disabled."
+ else
+ begin
+ #{name}__without_rubylexer_protection.bind(self).call file
+ rescue Exception=>e
+ e.backtrace.delete_if{|x| /\\A\#{__FILE__}:\#{__LINE__-2}:/o===x }
+ raise e
+ end
+ end
+ end
+ END
+end
+
+eval %w[class\ Module module\ Kernel].map{|ctx| <<END }.join
+ #{ctx}
+ autoload__without_rubylexer_protection=instance_method :autoload
+ define_method(:autoload) do |mod,file|
+ if /\\Aruby_(lexer|parser)(\\.rb)?\\z/i===File.basename(file)
+ warn "Uh-oh, you're trying to use ruby_parser and rubylexer at the same time."
+ warn "ruby_parser causes a namespace conflict with rubylexer"
+ warn "because ruby_parser redefines the class RubyLexer"
+ warn "in a way which is incompatible with standard RubyLexer."
+ warn "The rubylexer gem owns the namespace ::RubyLexer,"
+ warn "and claimed it at least 2 years before ruby_parser existed."
+ warn "Attempt to redefine RubyLexer in an incompatible way disabled."
+ else
+ autoload__without_rubylexer_protection.bind(self).call mod,file
+ end
+ end
+ end
+END