lib/rubylexer.rb in rubylexer-0.7.7 vs lib/rubylexer.rb in rubylexer-0.8.0

- old
+ new

@@ -1,8 +1,9 @@ +#encoding: binary =begin rubylexer - a ruby lexer written in ruby - Copyright (C) 2004,2005,2008 Caleb Clausen + Copyright (C) 2004,2005,2008, 2011 Caleb Clausen This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. @@ -15,68 +16,82 @@ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA =end - require 'rubylexer/rulexer' #must be 1st!!! require 'rubylexer/version' require 'rubylexer/token' require 'rubylexer/charhandler' require 'rubylexer/symboltable' #require "io.each_til_charset" require 'rubylexer/context' require 'rubylexer/tokenprinter' - #----------------------------------- class RubyLexer include NestedContexts - + #here's a list of other constants that should already be defined at this point: + [WHSP, VERSION, Token, CharSet, CharHandler, SymbolTable, SimpleTokenPrinter].each{|k| fail if k.nil? } - RUBYSYMOPERATORREX= + RUBYUNOPERATORS=%w{ +@ ~ ~@ -@ ! !@ } + RUBYBINOPERATORS=%w{ & | ^ / % == === =~ > >= >> < <= << <=> + - * ** } + RUBYCOMPOPERATORS=%w{== === =~ > >= < <= <=>} + RUBYSYMOPERATORS=RUBYUNOPERATORS+RUBYBINOPERATORS+%w{ [] []= } + RUBYNONSYMOPERATORS=%w{!= !~ = => :: ? : , ; . .. ... || && ||= &&=}+ + (RUBYBINOPERATORS-RUBYCOMPOPERATORS).map{|op| op+'='} + RUBYSYMOPERATORREX= %r{^([&|^/%]|=(==?)|=~|>[=>]?|<(<|=>?)?|[+~\-]@?|\*\*?|\[\]=?)} # (nasty beastie, eh?) #these are the overridable operators #does not match flow-control operators like: || && ! or and if not #or op= ops like: += -= ||= #or .. ... ?: #for that use: - RUBYNONSYMOPERATORREX= + RUBYNONSYMOPERATORREX= %r{^([%^/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|::|=>?|![=~]?)$} - RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o - UNSYMOPS=/^[~!]$/ #always unary - UBSYMOPS=/^([*&+-]|::)$/ #ops that could be unary or binary - WHSPCHARS=WHSPLF+"\\#" - OPORBEGINWORDLIST=%w(if unless while until) - BEGINWORDLIST=%w(def class module begin for case do)+OPORBEGINWORDLIST - OPORBEGINWORDS="(#{OPORBEGINWORDLIST.join '|'})" - BEGINWORDS=/^(#{BEGINWORDLIST.join '|'})$/o - FUNCLIKE_KEYWORDLIST=%w/break next redo return yield retry super BEGIN END/ - FUNCLIKE_KEYWORDS=/^(#{FUNCLIKE_KEYWORDLIST.join '|'})$/ - VARLIKE_KEYWORDLIST=%w/__FILE__ __LINE__ false nil self true/ - VARLIKE_KEYWORDS=/^(#{VARLIKE_KEYWORDLIST.join '|'})$/ - INNERBOUNDINGWORDLIST=%w"else elsif ensure in then rescue when" - INNERBOUNDINGWORDS="(#{INNERBOUNDINGWORDLIST.join '|'})" - BINOPWORDLIST=%w"and or" - BINOPWORDS="(#{BINOPWORDLIST.join '|'})" + RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o + UNSYMOPS=/^[~!]$/ #always unary + UBSYMOPS=/^(?:[*&+-]|::)$/ #ops that could be unary or binary + WHSPCHARS=WHSPLF+"\\#" + OPORBEGINWORDLIST=%w(if unless while until) + BEGINWORDLIST=%w(def class module begin for case do)+OPORBEGINWORDLIST + OPORBEGINWORDS="(?:#{OPORBEGINWORDLIST.join '|'})" + BEGINWORDS=/^(?:#{BEGINWORDLIST.join '|'})$/o + FUNCLIKE_KEYWORDLIST_1_9=%w[not] + FUNCLIKE_KEYWORDLIST=%w/break next redo return yield retry super BEGIN END/ + FUNCLIKE_KEYWORDS=/^(?:#{FUNCLIKE_KEYWORDLIST.join '|'})$/ + VARLIKE_KEYWORDLIST_1_9=%w[__ENCODING__] + VARLIKE_KEYWORDLIST=%w/__FILE__ __LINE__ false nil self true/ + VARLIKE_KEYWORDS=/^(?:#{VARLIKE_KEYWORDLIST.join '|'})$/ + attr_reader :FUNCLIKE_KEYWORDS, :VARLIKE_KEYWORDS + + INNERBOUNDINGWORDLIST=%w"else elsif ensure in then rescue when" + INNERBOUNDINGWORDS="(?:#{INNERBOUNDINGWORDLIST.join '|'})" + BINOPWORDLIST=%w"and or" + BINOPWORDS="(?:#{BINOPWORDLIST.join '|'})" - RUBYKEYWORDS=%r{ - ^(alias|#{BINOPWORDS}|defined\?|not|undef|end| + RUBYKEYWORDS=%r{ + ^(?:alias|#{BINOPWORDS}|defined\?|not|undef|end| #{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}| #{INNERBOUNDINGWORDS}|#{BEGINWORDS} )$ }xo + RUBYKEYWORDLIST=%w{alias defined? not undef end}+ + BINOPWORDLIST+ + VARLIKE_KEYWORDLIST+FUNCLIKE_KEYWORDLIST+ + INNERBOUNDINGWORDLIST+BEGINWORDLIST+ + VARLIKE_KEYWORDLIST_1_9 #__END__ should not be in this set... its handled in start_of_line_directives - HIGHASCII=?\x80..?\xFF - NONASCII=HIGHASCII - #NONASCII=?\x80..?xFFFFFFFF #or is it 10FFFF, whatever the highest conceivable code point + HIGHASCII=?\x80..?\xFF + NONASCII=HIGHASCII + #NONASCII=?\x80..?xFFFFFFFF #or is it 10FFFF, whatever the highest conceivable code point - CHARMAPPINGS = { + CHARMAPPINGS = { ?$ => :dollar_identifier, ?@ => :at_identifier, ?a..?z => :identifier, ?A..?Z => :identifier, ?_ => :identifier, @@ -123,37 +138,37 @@ ?\x01..?\x03 => :illegal_char, ?\x05..?\x08 => :illegal_char, ?\x0E..?\x19 => :illegal_char, ?\x1b..?\x1F => :illegal_char, ?\x7F => :illegal_char, - } + } - attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit + attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit - UCLETTER=@@UCLETTER="[A-Z]" + UCLETTER=@@UCLETTER="[A-Z]" - #cheaters way, treats utf chars as always 1 byte wide - #all high-bit chars are lowercase letters - #works, but strings compare with strict binary identity, not unicode collation - #works for euc too, I think - #(the ruby spec for utf8 support permits this interpretation) - LCLETTER=@@LCLETTER="[a-z_\x80-\xFF]" - LETTER=@@LETTER="[A-Za-z_\x80-\xFF]" - LETTER_DIGIT=@@LETTER_DIGIT="[A-Za-z_0-9\x80-\xFF]" - eval %w[UCLETTER LCLETTER LETTER LETTER_DIGIT].map{|n| " + #cheaters way, treats utf chars as always 1 byte wide + #all high-bit chars are lowercase letters + #works, but strings compare with strict binary identity, not unicode collation + #works for euc too, I think + #(the ruby spec for utf8 support permits this interpretation) + LCLETTER=@@LCLETTER="[a-z_\x80-\xFF]" + LETTER=@@LETTER="[A-Za-z_\x80-\xFF]" + LETTER_DIGIT=@@LETTER_DIGIT="[A-Za-z_0-9\x80-\xFF]" + eval %w[UCLETTER LCLETTER LETTER LETTER_DIGIT].map{|n| " def #{n}; #{n}; end def self.#{n}; @@#{n}; end " - }.join + }.join - NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)((?:(?!#@@LETTER_DIGIT).)|\Z)/om - if ?A.is_a? String #ruby >= 1.9 - NEVERSTARTPARAMLISTFIRST=/[aoeitrwu]/ - else - NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST - end - NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST + NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)((?:(?!#@@LETTER_DIGIT).)|\Z)/om + if ?A.is_a? String #ruby >= 1.9 + NEVERSTARTPARAMLISTFIRST=/[aoeitrwu]/ + else + NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST + end + NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST =begin require 'jcode' utf8=String::PATTERN_UTF8 #or euc, or sjis... LCLETTER_U="(?>[a-z_]|#{utf8})" @@ -161,11 +176,18 @@ LETTER_DIGIT_U="(?>[A-Za-z_0-9]|#{utf8})" =end #----------------------------------- def initialize(filename,file,linenum=1,offset_adjust=0,options={}) - @offset_adjust=0 #set again in next line + if file.respond_to? :set_encoding + file.set_encoding 'binary' + elsif file.respond_to? :force_encoding + file=file.dup if file.frozen? + file.force_encoding 'binary' + end + + @offset_adjust=@offset_adjust2=0 #set again in next line rulexer_initialize(filename,file, linenum,offset_adjust) @start_linenum=linenum @parsestack=[TopLevelContext.new] @incomplete_here_tokens=[] #not used anymore @pending_here_bodies=[] @@ -177,69 +199,147 @@ @enable_macro=nil @base_file=nil @progress_thread=nil @rubyversion=options[:rubyversion]||1.8 @encoding=options[:encoding]||:detect - @method_operators=if @rubyversion>=1.9 - /#{RUBYSYMOPERATORREX}|\A![=~@]?/o - else - RUBYSYMOPERATORREX - end + @always_binary_chars=CharSet['}]);|>,.=^'] + @unary_or_binary_chars=CharSet['+-%/'] + + + @FUNCLIKE_KEYWORDS=FUNCLIKE_KEYWORDS + @VARLIKE_KEYWORDS=VARLIKE_KEYWORDS + @toptable=CharHandler.new(self, :identifier, CHARMAPPINGS) - extend RubyLexer1_9 if @rubyversion>=1.9 - read_leading_encoding - start_of_line_directives + if @rubyversion>=1.9 + extend RubyLexer1_9 + end + rubylexer_modules_init + @method_operators=build_method_operators + if input_position.zero? + read_leading_encoding + @encoding=:binary if @rubyversion<=1.8 + start_of_line_directives + end progress_printer end - ENCODING_ALIASES={ - 'utf-8'=>'utf8', + def rubylexer_modules_init - 'ascii-8bit'=>'binary', - 'ascii-7bit'=>'ascii', + end + + alias dump inspect # preserve old inspect functionality + + # irb friendly #inspect/#to_s + def to_s + mods=class<<self;self end.ancestors-self.class.ancestors + mods=mods.map{|mod| mod.name }.join('+') + mods="+"<<mods unless mods.empty? + "#<#{self.class.name}#{mods}: [#{@file.inspect}]>" + end + + alias inspect to_s + + + def build_method_operators + /#{RUBYSYMOPERATORREX}|\A`/o + end + + + RAW_ENCODING_ALIASES={ + #'utf-8'=>'utf8', + + 'ascii-8-bit'=>'binary', + 'ascii-7-bit'=>'ascii', 'euc-jp'=>'euc', - 'ascii8bit'=>'binary', - 'ascii7bit'=>'ascii', - 'eucjp'=>'euc', + 'iso-8859-1'=>'binary', + 'latin-1'=>'binary', + #'ascii8bit'=>'binary', + #'ascii7bit'=>'ascii', + #'eucjp'=>'euc', 'us-ascii'=>'ascii', 'shift-jis'=>'sjis', 'autodetect'=>'detect', } + ENCODING_ALIASES=Hash[*RAW_ENCODING_ALIASES.map{|long,short| [long.tr_s('-_',''),short] }.flatten] ENCODINGS=%w[ascii binary utf8 euc sjis] + NONWORKING_ENCODINGS=%w[sjis] + WSCHARS=@@WSCHARS= /[\s]/==="\v" ? '\s' : '\s\v' #same as WHSP + WSNONLCHARS=@@WSNONLCHARS=/(?!\n)[#@@WSCHARS]/o #same as WHSPLF + + NOPARAMLONGOPTIONS=%w[copyright version verbose debug yydebug help] + PARAMLONGOPTIONS=%w[encoding dump] + DASHPARAMLONGOPTIONS=%w[enable disable] + NOPARAMOPTIONS="SacdhlnpsvwyU" + OCTALPARAMOPTIONS="0" + CHARPARAMOPTIONS="KTW" + PARAMSHORTOPTIONS="CXFIEeir" + MAYBEPARAMSHORTOPTIONS="x" + NEWIN1_9OPTIONS=%w[encoding dump enable disable X U W E] + LONGOPTIONS=/ + --(#{NOPARAMLONGOPTIONS.join'|'})| + --(#{PARAMLONGOPTIONS.join'|'})(=|#@@WSNONLCHARS+)[^#@@WSCHARS]+| + --(#{DASHPARAMLONGOPTIONS.join'|'})-[^#@@WSCHARS]+ + /ox + CHAINOPTIONS=/ + [#{NOPARAMOPTIONS}]+| + [#{OCTALPARAMOPTIONS}][0-7]{1,3}| + [#{CHARPARAMOPTIONS}]. + /ox + PARAMOPTIONS=/ + [#{PARAMSHORTOPTIONS}]#@@WSNONLCHARS*[^#@@WSCHARS]+| + [#{MAYBEPARAMSHORTOPTIONS}]#@@WSNONLCHARS*[^#@@WSCHARS]* + /ox + OPTIONS=/ + (#@@WSNONLCHARS*( + #{LONGOPTIONS} | --? | + -#{CHAINOPTIONS}*( #{PARAMOPTIONS} | #{CHAINOPTIONS} ) + ))* + /ox + def read_leading_encoding - return unless @encoding==:detect - @encoding=:ascii - @encoding=:utf8 if @file.skip( "\xEF\xBB\xBF" ) #bom - if @file.skip( /\A#!/ ) + @encoding=nil if @encoding==:detect + if enc=@file.scan( "\xEF\xBB\xBF" ) #bom + encpos=0 + @encoding||=:utf8 + elsif @file.skip( /\A#!/ ) + lastpos=@file.pos loop do - til_charset( /[\s\v]/ ) - break if @file.match( /^\n|[\s\v]([^-\s\v]|--?[\s\v])/,4 ) - if @file.skip( /.-K(.)/ ) - case $1 - when 'u'; @encoding=:utf8 - when 'e'; @encoding=:euc - when 's'; @encoding=:sjis + til_charset( /[#@@WSCHARS]/o ) + assert @file.pos > lastpos + break if @file.match( /^\n|#@@WSNONLCHARS([^-#@@WSCHARS])/o,4 ) + if @file.skip( /.-#{CHAINOPTIONS}*K#@@WSNONLCHARS*([a-zA-Z0-9])/o ) + case @file.last_match[1] + when 'u','U'; @encoding||=:utf8 + when 'e','E'; @encoding||=:euc + when 's','S'; @encoding||=:sjis end + elsif @file.skip( /.#{LONGOPTIONS}/o ) end + getchar + lastpos=@file.pos end til_charset( /[\n]/ ) + @moretokens<<ShebangToken.new(@file[0...@file.pos]) + pos=input_position + @moretokens<<EscNlToken.new(readnl,pos,@filename,2) + @moretokens<<FileAndLineToken.new(@filename,2,input_position) end - if @rubyversion>=1.9 and @file.skip( - /\A#[\x00-\x7F]*?(?:en)?coding[\s\v]*[:=][\s\v]*([a-z0-9_-]+)[\x00-\x7F]*\n/i - ) - name=$1 - name.downcase! - name=ENCODING_ALIASES[name] if ENCODING_ALIASES[name] - @encoding=name.to_sym if ENCODINGS.include? name - end + encpos=input_position unless enc + enc||=read_encoding_line + ensure + @moretokens<<EncodingDeclToken.new(enc||'',@encoding,enc ? encpos : input_position) if @encoding + @encoding||=:ascii end + def read_encoding_line + end + def progress_printer return unless ENV['RL_PROGRESS'] $stderr.puts 'printing progresses' @progress_thread=Thread.new do until EoiToken===@last_operative_token @@ -308,17 +408,22 @@ raise "#{@filename}:#{linenum}:token is a #{result.class}, last is #{@last_operative_token}" end end #----------------------------------- + def unshift(*tokens) + @moretokens.unshift(*tokens) + end + + #----------------------------------- def eof? rulexer_eof? or EoiToken===@last_operative_token end #----------------------------------- def input_position - rulexer_input_position+@offset_adjust + rulexer_input_position+@offset_adjust+@offset_adjust2 end #----------------------------------- def input_position_raw @file.pos @@ -390,11 +495,11 @@ @moretokens.unshift tok return result end #----------------------------------- - WSCHARSET=/[#\\\n\s\t\v\r\f\x00\x04\x1a]/ + WSCHARSET=/[#\\\n#@@WSCHARS\x00\x04\x1a]/o def ignored_tokens(allow_eof=false,allow_eol=true) result=[] result << @moretokens.shift while StillIgnoreToken===@moretokens.first @moretokens.empty? or return result loop do @@ -426,11 +531,11 @@ =begin @whsphandler||=CharHandler.new(self, :==, "#" => :comment, "\n" => :newline, "\\" => :escnewline, - "\s\t\v\r\f" => :whitespace + "#@@WSCHARS\t\r\f" => :whitespace ) #tok=nil while tok=@whsphandler.go((nextchar or return result)) block_given? and NewlineToken===tok and yield tok result << tok @@ -474,23 +579,23 @@ #just asserts because those contexts are never encountered. #control goes through symbol(<...>,nil) assert( /^#@@LETTER$/o===context) assert MethNameToken===@last_operative_token || !(@last_operative_token===/^(\.|::|(un)?def|alias)$/) - if @parsestack.last.wantarrow and @rubyversion>=1.9 and @file.skip ":" - @moretokens.push SymbolToken.new(str,oldpos), KeywordToken.new("=>",input_position-1) - else - @moretokens.unshift(*parse_keywords(str,oldpos) do |tok,except| +# if @parsestack.last.wantarrow and @rubyversion>=1.9 and @file.skip ":" +# @moretokens.unshift SymbolToken.new(str,oldpos), KeywordToken.new(":",input_position-1,:as=>"=>") +# else + @moretokens.unshift(*special_identifier?(str,oldpos) do |tok,except| #most callers of this block pass nothing(==nil) for except. only _keyword_funclike passes a true val was_last=@last_operative_token @last_operative_token=tok if tok normally=safe_recurse { |a| var_or_meth_name(str,was_last,oldpos,after_nonid_op?{true}) } (Array===normally ? normally[0]=except : normally=except) if except normally end) - end +# end return @moretokens.shift end #----------------------------------- IDENTREX={} @@ -510,13 +615,13 @@ #= and ! only match if not part of a larger operator trailers = case context when ?@,?$ then "" # when ?: then "!(?![=])|\\?|=(?![=~>])" - else "!(?![=])|\\?" + else "!(?=\\z|[^=]|=[=~>])|\\?" end - @in_def_name||context==?: and trailers<<"|=(?![=~>])" + @in_def_name||context==?: and trailers<<"|=(?![~>]|=[^~=>])" @file.scan(IDENTREX[trailers]||=/^(?>#@@LETTER#@@LETTER_DIGIT*(?:#{trailers})?)/) end #----------------------------------- @@ -551,16 +656,18 @@ end end #----------------------------------- def in_lvar_define_state lasttok=@last_operative_token - #@defining_lvar is a hack - @defining_lvar or case ctx=@parsestack.last + return true if @defining_lvar #@defining_lvar is a hack + ctx=@parsestack.last + case ctx #when ForSMContext; ctx.state==:for - when UnparenedParamListLhsContext; /^(->|,|;)$/===lasttok.ident + when UnparenedParamListLhsContext + /^(->|,|;)$/===lasttok.ident or /^[*&]$/===lasttok.ident && lasttok.unary when RescueSMContext - lasttok.ident=="=>" and @file.match?( /\A[\s\v]*([:;#\n]|then(?!#@@LETTER_DIGIT))/om ) + lasttok.ident=="=>" and @file.match?( /\A[#@@WSCHARS]*([:;#\n]|then(?!#@@LETTER_DIGIT))/om ) #when BlockParamListLhsContext; true end end IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT=2 @@ -585,11 +692,11 @@ assert String===name was_in_lvar_define_state=in_lvar_define_state(lasttok) #maybe_local really means 'maybe local or constant' maybe_local=case name - when /(?!#@@LETTER_DIGIT).$/o #do nothing + when /[?!=]$/o #do nothing when /^#@@LCLETTER/o (localvars===name or #VARLIKE_KEYWORDS===name or was_in_lvar_define_state ) and not lasttok===/^(\.|::)$/ @@ -603,10 +710,13 @@ oldlast=@last_operative_token tok=set_last_token assign_lvar_type!(VarNameToken.new(name,pos)) oldpos= input_position + oldline= linenum + + #deal with ws following the ident sawnl=false result=ws_toks=ignored_tokens(true) {|nl| sawnl=true } if sawnl || eof? if was_in_lvar_define_state if /^#@@LCLETTER#@@LETTER_DIGIT*$/o===name @@ -615,20 +725,36 @@ end return result.unshift(tok) elsif maybe_local return result.unshift(tok) #if is_const else - return result.unshift( + toks=[ MethNameToken.new(name,pos), #insert implicit parens right after tok ImplicitParamListStartToken.new( oldpos), ImplicitParamListEndToken.new( oldpos) - ) + ] + toks.each{|t| t.endline=oldline} + return result.unshift(*toks) end end #if next op is assignment (or comma in lvalue list) #then omit implicit parens + assignment_coming= + /\A(?: + =[^>=~] | (,) | (;) | (\)) | + (in(?!#@@LETTER_DIGIT)) | (\|[^\|=]) | [%\/\-+^*&|]= | ([<>*&|])\6= + )/mox===readahead(3) && + case + when $1; comma_in_lvalue_list? #comma + when $2; semicolon_in_block_param_list? + when $3; last_context_not_implicit.lhs #right paren in lhs + when $4; ForSMContext===last_context_not_implicit #in + when $5; BlockParamListLhsContext===last_context_not_implicit #ending goalpost + else true + end +=begin was assignment_coming=case nc=nextchar when ?=; not( /^=[>=~]$/===readahead(2) ) when ?,; comma_in_lvalue_list? when (?; if @rubyversion>=1.9); ParenedParamListLhsContext===@parsestack.last when ?); last_context_not_implicit.lhs @@ -640,19 +766,24 @@ #is it a goalpost? BlockParamListLhsContext===last_context_not_implicit && readahead(2)[1] != ?| when ?%,?/,?-,?+,?^; readahead(2)[1]== ?= end +=end + if (assignment_coming && !(lasttok===/^(\.|::)$/) or was_in_lvar_define_state) tok=assign_lvar_type! VarNameToken.new(name,pos) - if /(?!#@@LETTER_DIGIT).$/o===name - elsif /^#@@LCLETTER/o===name and !(lasttok===/^(\.|::)$/) + #if /(?!#@@LETTER_DIGIT).$/o===name + #nonalphabetics... operator? skip it + #els + if /^#@@LCLETTER/o===name #and !(lasttok===/^(\.|::)$/) localvars[name]=true end return result.unshift(tok) end + nc=nextchar implicit_parens_to_emit= if assignment_coming @parsestack.push AssignmentContext.new(nil) if nc==?% or nc==?/ IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT else @@ -675,59 +806,43 @@ (NEVERSTARTPARAMLISTWORDS===readahead(NEVERSTARTPARAMLISTMAXLEN)) ? 2 : 1 when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~,NONASCII; 1 #" when ?{ maybe_local=false 1 -=begin - x=2 - x-=1 if /\A(return|break|next)\Z/===name and - !(KeywordToken===oldlast and oldlast===/\A(\.|::)\Z/) - x -=end when ?( maybe_local=false lastid=lasttok&&lasttok.ident case lastid when /\A[;(]|do\Z/; was_after_nonid_op=false when '|'; was_after_nonid_op=false unless BlockParamListLhsContext===@parsestack.last when '{'; was_after_nonid_op=false if BlockContext===@parsestack.last or BeginEndContext===@parsestack.last end if KeywordToken===lasttok was_after_nonid_op=false if NewlineToken===lasttok or lasttok.nil? - want_parens=!(ws_toks.empty? or was_after_nonid_op) #or -# /^(::|rescue|yield|else|case|when|if|unless|until|while|and|or|&&|\|\||[?:]|\.\.?\.?|=>)$/===lastid or -# MethNameToken===lasttok or -# RUBYNONSYMOPERATORREX===lastid && /=$/===lastid && '!='!=lastid -# ) + want_parens=!(ws_toks.empty? or was_after_nonid_op) #look ahead for closing paren (after some whitespace...) want_parens=false if @file.match?( /\A.(?:\s|\v|\#.*\n)*\)/ ) -# afterparen=@file.pos -# getchar -# ignored_tokens(true) -# want_parens=false if nextchar==?) -# @file.pos=afterparen want_parens=true if /^(return|break|next)$/===@last_operative_token.ident and not( KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident ) want_parens ? 1 : 0 - when ?},?],?),?;,(?^ unless @enable_macro), ?|, ?>, ?,, ?., ?=; 2 - when ?+, ?-, ?%, ?/, (?^ if @enable_macro) + when @always_binary_chars; 2 # ?},?],?),?;,(?^ unless @enable_macro), ?|, ?>, ?,, ?., ?=; 2 + when @unary_or_binary_chars; #?+, ?-, ?%, ?/, (?^ if @enable_macro) if /^(return|break|next)$/===@last_operative_token.ident and not( KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident ) 1 else (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3 end when ?*, ?& - # lasttok=@last_operative_token if /^(return|break|next)$/===@last_operative_token.ident and not( KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident ) 1 else - (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}*&]/o]) ? 2 : 3 + (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}*&]/o] and !@in_def_name) ? 2 : 3 end when ?: next2=readahead(2) if /^:(?:[#{WHSPLF}]|(:))$/o===next2 then $1 && !ws_toks.empty? ? 3 : 2 @@ -736,11 +851,10 @@ end when ??; next3=readahead(3) #? never begins a char constant if immediately followed #by 2 or more letters or digits /^\?([#{WHSPLF}]|#@@LETTER_DIGIT{2})/o===next3 ? 2 : 3 -# when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3 when ?<; (!ws_toks.empty? && readahead(4)[/^<<-?(?:["'`]|#@@LETTER_DIGIT)/o]) ? 3 : 2 when ?[; if ws_toks.empty? (KeywordToken===oldlast and /^(return|break|next)$/===oldlast.ident) ? 3 : 2 else @@ -795,10 +909,12 @@ # 'then else elsif rescue ensure (illegal in value context)' # 'need to pop noparen from parsestack on these tokens: (in operator context)' # 'not ok:' # 'not (but should it be?)' + ensure + result.first.endline||=oldline unless result.empty? end #----------------------------------- #read ahead to see if there's method param list (with real parentheses) #and 2 or more parameters (and hence a comma to separate them) @@ -816,33 +932,43 @@ break true elsif OperatorToken===tok and /^[&*]$/===tok.ident and tok.tag and @parsestack.size==basesize+1 break true elsif EoiToken===tok lexerror tok, "unexpected eof in parameter list" + break end } result.concat @moretokens @moretokens.replace [] return [result,pass] end + #----------------------------------- + module NestedContexts + class VContext<NestedContext + end + end + VContext=NestedContexts::VContext CONTEXT2ENDTOK={ AssignmentRhsContext=>AssignmentRhsListEndToken, ParamListContextNoParen=>ImplicitParamListEndToken, KWParamListContextNoParen=>ImplicitParamListEndToken, #break,next,return WhenParamListContext=>KwParamListEndToken, - RescueSMContext=>KwParamListEndToken + RescueSMContext=>KwParamListEndToken, + VContext=>0 } - def abort_noparens!(str='') + def abort_noparens!(str='',adj=str.size) #assert @moretokens.empty? result=[] - while klass=CONTEXT2ENDTOK[@parsestack.last.class] - result << klass.new(input_position-str.length) - break if RescueSMContext===@parsestack.last #and str==':' - break if WhenParamListContext===@parsestack.last and str==':' + ctx=@parsestack.last + while klass=CONTEXT2ENDTOK[ctx.class] + result << klass.new(input_position-adj) if Class===klass + break if RescueSMContext===ctx #and str==':' + break if WhenParamListContext===ctx and str==':' @parsestack.pop + ctx=@parsestack.last end return result end #----------------------------------- @@ -876,18 +1002,22 @@ #----------------------------------- CONTEXT2ENDTOK_FOR_DO={ AssignmentRhsContext=>AssignmentRhsListEndToken, ParamListContextNoParen=>ImplicitParamListEndToken, - UnparenedParamListLhsContext=>KwParamListEndToken, + UnparenedParamListLhsContext=>ImplicitParamListEndToken, ExpectDoOrNlContext=>1, #WhenParamListContext=>KwParamListEndToken, #RescueSMContext=>KwParamListEndToken } def abort_noparens_for_do!(str='') #assert @moretokens.empty? result=[] + return result if @parsestack[-1].class==AssignmentRhsContext and + @parsestack[-2].class==ParamListContextNoParen and + @parsestack[-3].class==DefContext and + !@parsestack[-3].in_body while klass=CONTEXT2ENDTOK_FOR_DO[@parsestack.last.class] if klass==AssignmentRhsListEndToken i=@parsestack.size end_the_assign=false while AssignmentRhsContext===@parsestack[i-=1] @@ -928,42 +1058,54 @@ end return result end #----------------------------------- - def enable_macros! - @enable_macro="macro" + def enable_macros! #this wholemethod should be unnecessary now + @enable_macro="macro" #shouldn't be needed anymore... should be safe to remove class <<self alias keyword_macro keyword_def end + @unary_or_binary_chars.add '^' + @always_binary_chars.remove '^' end public :enable_macros! #----------------------------------- - @@SPACES=/[\ \t\v\f\v]/ + @@SPACES=/[\ \t\f\v]/ @@WSTOK=/(?> (?>\r?)\n| (?>\r*)(?>#@@SPACES+)(?>(?:#@@SPACES|\r(?!\n))*)| - \#(?>[^\n]*)\n| + \#(?>[^\n]*)(?=\n)| \\(?>\r?)\n| ^=begin(?>(?>#@@SPACES.*)?)\n (?>(?:(?!=end)(?>.*)\n))* =end(?>(?>#@@SPACES.*)?)\n )/x @@WSTOKS=/(?!=begin)(?>#@@WSTOK+)/o + WSTOKS=@@WSTOKS def divide_ws(ws0,offset) result=[] ws0.scan(/\G#@@WSTOK/o){|ws| incr= $~.begin(0) - tok=case ws - when /\A[\#=]/; IgnoreToken.new(ws,offset+incr) - when /\n\Z/; EscNlToken.new(ws,offset+incr,@filename,@linenum) - else WsToken.new(ws,offset+incr) + lines=ws.count "\n" + case ws + when /\A\#/ + result<< IgnoreToken.new(ws,offset+incr) + when /\A=/ + tok=IgnoreToken.new(ws,offset+incr) + tok.startline=@linenum + tok.endline=@linenum+lines + result<<tok + when /\n\Z/ + result<< EscNlToken.new(ws,offset+incr,@filename,@linenum+1) + else + result<< WsToken.new(ws,offset+incr) end - result << tok - @linenum+=ws.count "\n" + result<< FileAndLineToken.new(@filename,@linenum+lines,offset+incr+ws.size) if lines>0 + @linenum+=lines } result.each_with_index{|ws,i| if WsToken===ws ws.ident << result.delete_at(i+1).ident while WsToken===result[i+1] end @@ -990,17 +1132,17 @@ #----------------------------------- #parse keywords now, to prevent confusion over bare symbols #and match end with corresponding preceding def or class or whatever. #if arg is not a keyword, the block is called - def parse_keywords(str,offset,&block) + def special_identifier?(str,offset,&block) assert @moretokens.empty? assert !(KeywordToken===@last_operative_token and /A(\.|::|def)\Z/===@last_operative_token.ident) - result=[KeywordToken.new(str,offset)] - m=:"keyword_#{str}" - respond_to?(m) ? (send m,str,offset,result,&block) : block[MethNameToken.new(str)] + m="keyword_#{str}" + return yield( MethNameToken.new(str) )unless respond_to?(m) + send m,str,offset,[KeywordToken.new(str,offset)],&block end public #these have to be public so respond_to? can see them (sigh) def keyword_end(str,offset,result) result.unshift(*abort_noparens!(str)) @parsestack.last.see self,:semi #sorta hacky... should make an :end event instead? @@ -1043,10 +1185,12 @@ @localvars_stack.push SymbolTable.new while @file.check( /\A::/ ) #VarNameToken===@moretokens.last or #KeywordToken===@moretokens.last && @moretokens.last.ident=="::" @file.scan(/\A(#@@WSTOKS)?(::)?(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)/o) or break + #should not allow newline around :: here + md=@file.last_match all,ws1,dc,ws2,name=*md if ws1 @moretokens.concat divide_ws(ws1,md.begin(1)) incr=ws1.size @@ -1136,16 +1280,16 @@ end end return result end def keyword_def(str,offset,result) #macros too, if enabled - result.first.has_end! - @parsestack.push ctx=DefContext.new(@linenum) - ctx.state=:saw_def + result.first.has_end! + @parsestack.push ctx=DefContext.new(@linenum) + ctx.state=:saw_def old_moretokens=@moretokens @moretokens=[] - aa=@moretokens + #aa=@moretokens #safe_recurse { |aa| set_last_token KeywordToken.new(str) #hack result.concat ignored_tokens #read an expr like a.b.c or a::b::c @@ -1154,32 +1298,52 @@ old_size=@parsestack.size parencount=0 begin tok=get1token case tok - when/^\($/.token_pat then parencount+=1 - when/^\)$/.token_pat then parencount-=1 + when /^\($/.token_pat ; parencount+=1 + when /^\)$/.token_pat ; parencount-=1 + when EoiToken + @moretokens= old_moretokens.concat @moretokens + return result<<lexerror( tok, "eof in def header" ) end - EoiToken===tok and lexerror tok, "eof in def header" result << tok end until parencount==0 #@parsestack.size==old_size @localvars_stack.push SymbolTable.new else #no parentheses, all tail set_last_token KeywordToken.new(".") #hack hack tokindex=result.size + tokline=result.last.endline result << tok=symbol(false,false) name=tok.to_s assert !in_lvar_define_state #maybe_local really means 'maybe local or constant' + @maybe_local_pat||=%r{ + ((?!#@@LETTER_DIGIT).$) | ^[@$] | (#@VARLIKE_KEYWORDS | #@FUNCLIKE_KEYWORDS) | + (^#@@LCLETTER) | (^#@@UCLETTER) + }x + @maybe_local_pat === name and + maybe_local= + case + when $1; maybe_local=false #operator or non-ident + when $2; ty=KeywordToken #keyword + when $3; maybe_local=localvars===name #lvar or method + when $4; is_const=true #constant + else true + end + #maybe_local=ty=KeywordToken if is__ENCODING__keyword?(name) #"__ENCODING__"==name and @rubyversion>=1.9 +=begin was maybe_local=case name when /(?!#@@LETTER_DIGIT).$/o; #do nothing when /^[@$]/; true - when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS,("__ENCODING__" if @rubyversion>=1.9); ty=KeywordToken + when /#@VARLIKE_KEYWORDS|#@FUNCLIKE_KEYWORDS/,("__ENCODING__" if @rubyversion>=1.9); ty=KeywordToken when /^#@@LCLETTER/o; localvars===name when /^#@@UCLETTER/o; is_const=true #this is the right algorithm for constants... end +=end + result.push( *ignored_tokens(false,false) ) nc=nextchar if !ty and maybe_local if nc==?: || nc==?. ty=VarNameToken @@ -1193,11 +1357,17 @@ result.insert tokindex+1, newtok end end assert result[tokindex].equal?(tok) - var=assign_lvar_type! ty.new(tok.to_s,tok.offset) + var=ty.new(tok.to_s,tok.offset) + if ty==KeywordToken and name[0,2]=="__" + send("keyword_#{name}",name,tok.offset,[var]) + end + var.endline=tokline + + var=assign_lvar_type! var @localvars_stack.push SymbolTable.new var.in_def=true if inside_method_def? and var.respond_to? :in_def= result[tokindex]=var @@ -1228,19 +1398,24 @@ if endofs result.insert end_index,ImplicitParamListEndToken.new(ofs) else ofs+=listend.to_s.size end - result.insert end_index+1,EndHeaderToken.new(ofs) + tok=EndHeaderToken.new(ofs) + tok.endline= result[end_index-1].endline #@linenum + result.insert end_index+1,tok break end tok=get1token result<< tok case tok when EoiToken lexerror tok,'unexpected eof in def header' + @moretokens= old_moretokens.concat @moretokens + return result + when StillIgnoreToken when MethNameToken ,VarNameToken # /^#@@LETTER/o.token_pat lexerror tok,'expected . or ::' unless state==:expect_name state=:expect_op when /^(\.|::)$/.token_pat @@ -1254,11 +1429,14 @@ ctx.state=:def_body state==:expect_op or lexerror tok,'expected identifier' if endofs result.insert( -2,ImplicitParamListEndToken.new(tok.offset) ) end - result.insert( -2, EndHeaderToken.new(tok.offset) ) + ehtok= EndHeaderToken.new(tok.offset) + #ehtok.endline=tok.endline + #ehtok.endline-=1 if NewlineToken===tok + result.insert( -2, ehtok ) break else lexerror(tok, "bizarre token in def name: " + "#{tok}:#{tok.class}") end @@ -1423,19 +1601,301 @@ def keyword___LINE__(str,offset,result) result.last.value=@linenum return result end + + #----------------------------------- + def encoding_name_normalize name + name=name.dup + name.downcase! + name.tr_s! '-_','' + name=ENCODING_ALIASES[name] if ENCODING_ALIASES[name] + return name + end + module RubyLexer1_9 + FUNCLIKE_KEYWORDLIST=RubyLexer::FUNCLIKE_KEYWORDLIST+FUNCLIKE_KEYWORDLIST_1_9 + VARLIKE_KEYWORDLIST=RubyLexer::VARLIKE_KEYWORDLIST+VARLIKE_KEYWORDLIST_1_9 + FUNCLIKE_KEYWORDS=/^(?:#{FUNCLIKE_KEYWORDLIST.join '|'})$/ + VARLIKE_KEYWORDS=/^(?:#{VARLIKE_KEYWORDLIST.join '|'})$/ + def FUNCLIKE_KEYWORDS orig=nil + /(?:#{orig||super()}|^(?:#{FUNCLIKE_KEYWORDLIST_1_9.join '|'})$)/ + end + + def VARLIKE_KEYWORDS orig=nil + /(?:#{orig||super()}|^(?:#{VARLIKE_KEYWORDLIST_1_9.join '|'})$)/ + end + + def rubylexer_modules_init + super + @FUNCLIKE_KEYWORDS=FUNCLIKE_KEYWORDS @FUNCLIKE_KEYWORDS unless @FUNCLIKE_KEYWORDS==="->" + @VARLIKE_KEYWORDS=VARLIKE_KEYWORDS @VARLIKE_KEYWORDS unless @VARLIKE_KEYWORDS==="__ENCODING__" + end + + #----------------------------------- + def dquote_handle(ch) + dquote19_esc_seq(ch,'"','"') + end + #----------------------------------- + def dquote_handler_name + :dquote19_esc_seq + end + #----------------------------------- + def Wquote_handler_name + :Wquote19_esc_seq + end + + #----------------------------------- + def method_params? # .() + lasttok=last_token_maybe_implicit #last_operative_token + super or + (lasttok and lasttok.ident=='.') + end + + #----------------------------------- + def callsite_symbol(x) + return if nextchar==?( + super + end + + #----------------------------------- + def read_encoding_line + if line=@file.scan( + /\A#{WSNONLCHARS}*#[\x00-\x7F]*?(?:en)?coding#{WSNONLCHARS}*[:=]#{WSNONLCHARS}*([a-z0-9_-]+)[\x00-\x7F]*$/io + ) + name=@file.last_match[1] + name=encoding_name_normalize name + @encoding=name.to_sym if ENCODINGS.include? name + return line + end + end + + #----------------------------------- def keyword___ENCODING__(str,offset,result) #result.last.value=huh return result end + #----------------------------------- def keyword_not(*args,&block) _keyword_funclike(*args,&block) end - end + #----------------------------------- + def special_identifier?(str,oldpos) + if @parsestack.last.wantarrow and @file.skip ":" + return SymbolToken.new(str,oldpos), KeywordToken.new(":",input_position-1,:as=>"=>") + else + return super + end + end + + #----------------------------------- + def want_hard_nl? + return false if @file.check( /\A\n(?:#{WSTOKS})?[.:][^.:]/o ) + super + end + + #----------------------------------- + #RE_* shamelessly stolen from jcode.rb + RE_UTF8= /[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]|[\xf0-\xf7][\x80-\xbf]{3}/n #longer sequences are possible + RE_EUC= /[\xa1-\xfe][\xa1-\xfe]/n #is this complete? + RE_SJIS= /[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc]/n #is this complete? windows31j? + ENCODING2EXTCHAR={ + :utf8=>RE_UTF8, + :euc=>RE_EUC, + :sjis=>RE_SJIS, + :binary=>/[\x80-\xFF]/n, + :ascii=>nil + } + + #handle ? in ruby code. is it part of ?..: or a character literal? + def char_literal_or_op(ch) #unicode char literals, etc + if colon_quote_expected? ch + #char literal + pos=input_position + getchar + extchar= ENCODING2EXTCHAR[@encoding] + result= + if extchar and extchar=@file.scan( extchar ) + assign_encoding!(StringToken.new('"', extchar)) + else + getchar_maybe_escape + assign_encoding!(StringToken.new('"', @file[pos+1...input_position])) + end + result.offset=pos + result.bs_handler=:dquote19_esc_seq + result.open='?' + result.close='' + return result + else #(ternary) operator + super + end + end + + #----------------------------------- + def plusminus(ch) #-> + pos=input_position + assert(/^[+\-]$/===ch) + if unary_op_expected?(ch) or + KeywordToken===@last_operative_token && + /^(return|break|next)$/===@last_operative_token.ident + if '->' == readahead(2) #stabby proc + @file.pos+=2 + #push down block context + localvars.start_block + @parsestack.push ctx=RubyLexer::BlockContext.new(@linenum) + ctx.wanting_stabby_block_body=true + #read optional proc params + block_param_list_lookahead ?(, RubyLexer::ParenedParamListLhsContext + result=RubyLexer::KeywordToken.new('->',pos) + result.offset=pos + return result + end + end + super + end + + #----------------------------------- + #match /=(>|~|==?)?/ (= or == or =~ or === or =>) + def equals(ch) # /(?<foo>bar)/=~'bar'; declares foo lvar + if readahead(2)=='=~' # =~... after regex, maybe? + last=last_operative_token + + if StringToken===last and last.lvars + #ruby delays adding lvars from regexps to known lvars table + #for several tokens in some cases. not sure why or if on purpose + #i'm just going to add them right away + last.lvars.each{|lvar| localvars[lvar]=true } + end + end + return super + end + + #----------------------------------- + def assign_encoding! str + #search for nonascii bytes + #either directly or via hex (\xXX) or octal (\NNN) escapes + #and \u escapes also + utf8=nonascii=false + str.elems.grep(String).each do|frag| + frag.scan(/#{EVEN_BS_S}(?:\\u|\\2[0-7][0-7]|\\x[89a-fA-F][0-9a-fA-F])|[^\x00-\x7F]/o) do |match| + if match[-1]==?u + utf8=true + break if nonascii + else + nonascii=true + break if utf8 + end + end or break + end + + lexerror(str,"utf8 and nonascii intermixed") if utf8 and nonascii and @encoding!=:utf8 + + #encoding is source encoding unless \u escape is found + str.utf8! if utf8 + + #maybe assign string fragments encodings if running under >=1.9? + + return str + end + + #----------------------------------- + def regex(ch=nil) + result=super + named_brs=[] + if result.elems.size==1 and String===result.elems.first + elem=result.elems.first + index=0 + while index=elem.index(/(#{EVEN_BS_S})( \(\?[<'] | \(\?\# | \[ )/xo,index) + index+=$1.size + case $2 + when "(?<" + index=elem.index(/\G...(#{LCLETTER}#{LETTER_DIGIT}+)>/o,index) + break lexerror(result, "malformed named backreference") unless index + index+=$&.size + named_brs<<$1 + when "(?'" + index=elem.index(/\G...(#{LCLETTER}#{LETTER_DIGIT}+)'/o,index) + break lexerror(result, "malformed named backreference") unless index + index+=$&.size + named_brs<<$1 + when "(?#" + index+=3 + index=elem.index(/#{EVEN_BS_S}\)/o,index) + break lexerror(result, "unterminated regexp comment") unless index + index+=$&.size + when "[" + index+=1 + paren_ctr=1 + loop do + index=elem.index(/#{EVEN_BS_S}(&&\[\^|\])/o,index) + break lexerror(result, "unterminated character class") unless index + index+=$&.size + if $1==']' + paren_ctr-=1 + break if paren_ctr==0 + else + paren_ctr+=1 + end + end + break unless index + + end + end + result.lvars= named_brs unless named_brs.empty? + end + return result + end + + def build_method_operators + /#{RUBYSYMOPERATORREX}|\A![=~@]?|\A`/o + end + + include RubyLexer::NestedContexts + + def semicolon_in_block_param_list? + ParenedParamListLhsContext===@parsestack.last || + BlockParamListLhsContext===@parsestack.last + end + + def is__ENCODING__keyword?(name) + "__ENCODING__"==name + end + + #----------------------------------- + def colon_operator tok + if TernaryContext===@parsestack.last + tok.ternary=true + @parsestack.pop #should be in the context's see handler + end + end + + def maybe_end_stabby_block_param_list(tokch) + stabby_params_just_ended=false + (@parsestack.size-1).downto(1){|i| + case @parsestack[i] + when ParamListContextNoParen,AssignmentRhsContext + #do nothing yet... see if inside a UnparenedParamListLhsContext + when UnparenedParamListLhsContext #stabby proc + @moretokens<<tokch + (@parsestack.size-1).downto(i){|j| + @moretokens.unshift @parsestack[j].endtoken(input_position-1) + } + @parsestack[i..-1]=[] + tokch=@moretokens.shift + stabby_params_just_ended=true + break + else break + end + } + return stabby_params_just_ended,tokch + end + end #module RubyLexer1_9 + + def semicolon_in_block_param_list?; end + def is__ENCODING__keyword?(name); end + def _keyword_funclike(str,offset,result) if @last_operative_token===/^(\.|::)$/ result=yield MethNameToken.new(str) #should pass a methname token here else tok=KeywordToken.new(str) @@ -1490,11 +1950,11 @@ #----------------------------------- def block_param_list_lookahead starter=?|, ctx_type=BlockParamListLhsContext safe_recurse{ |la| - set_last_token KeywordToken.new( ';' ) + set_last_token KeywordToken.new( ';' ) a=ignored_tokens if eat_next_if(starter) mycontext=ctx_type.new(@linenum) a<< KeywordToken.new(mycontext.starter, input_position-1) @@ -1538,11 +1998,11 @@ end end elsif starter==?( ctx_type=UnparenedParamListLhsContext #hacky... should be a param? @parsestack.push ctx_type.new(@linenum) - a<<KwParamListStartToken.new( input_position ) + a<<ImplicitParamListStartToken.new( input_position ) end set_last_token KeywordToken.new( ';' ) #a.concat ignored_tokens @@ -1620,11 +2080,11 @@ def method_parameters(result,normal_comma_level,endingblock,old_parsestack_size) listend=nil set_last_token KeywordToken.new( ',' )#hack nextvar=nil loop do - expect_name=(@last_operative_token===',' and + expect_name=(/^[,;]$/===@last_operative_token.ident and normal_comma_level==@parsestack.size) expect_name and @defining_lvar||=true result << tok=get1token break lexerror(tok, "unexpected eof in def header") if EoiToken===tok @@ -1695,11 +2155,11 @@ assert('*&'[ch]) want_unary=unary_op_expected?(ch) || (@last_operative_token===/^(return|next|break)$/ and KeywordToken===@last_operative_token) result=quadriop(ch) if want_unary - #readahead(2)[1..1][/[\s\v#\\]/] or #not needed? + #readahead(2)[1..1][/[#@@WSCHARS#\\]/o] or #not needed? assert OperatorToken===result result.tag=:unary #result should distinguish unary+binary *& WHSPLF[nextchar.chr] or @moretokens << NoWsToken.new(input_position) cill=comma_in_lvalue_list? @@ -1722,17 +2182,19 @@ #----------------------------------- #handle ? in ruby code. is it part of ?..: or a character literal? def char_literal_or_op(ch) if colon_quote_expected? ch getchar - if @rubyversion >= 1.9 - StringToken.new getchar_maybe_escape - else +# if @rubyversion >= 1.9 +# assign_encoding! StringToken.new getchar_maybe_escape +# else ch=getchar_maybe_escape[0] ch=ch.ord if ch.respond_to? :ord - NumberToken.new ch - end + result=NumberToken.new ch + result.char_literal=true + return result +# end else @parsestack.push TernaryContext.new(@linenum) KeywordToken.new getchar #operator end end @@ -1745,11 +2207,11 @@ @parsestack.pop op=true end if !op and after_nonid_op?{ - !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/[\s\v=]}] + !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/[#@@WSCHARS=]}o] } || (KeywordToken===@last_token_maybe_implicit and @last_token_maybe_implicit.ident=="(") return regex(ch) else #/ is operator result=getchar if eat_next_if(?=) @@ -1770,20 +2232,20 @@ s=tok.to_s case s when /^[@$]/; true when /^<</; HerePlaceholderToken===tok when /(?!#@@LETTER_DIGIT).$/o; false -# when /^#@@LCLETTER/o; localvars===s or VARLIKE_KEYWORDS===s +# when /^#@@LCLETTER/o; localvars===s or @VARLIKE_KEYWORDS===s when /^#@@LETTER/o; VarNameToken===tok else raise "not var or method name: #{s}" end end #----------------------------------- def colon_quote_expected?(ch) #yukko hack assert ':?'[ch] - readahead(2)[/^(\?[^#{WHSPLF}]|:[^\s\r\n\t\f\v :])$/o] or return false + readahead(2)[/^(\?[^#{WHSPLF}]|:[^#@@WSCHARS :])$/o] or return false after_nonid_op? { #possible func-call as operator not is_var_name? and @@ -1802,68 +2264,70 @@ qe= colon_quote_expected?(ch) lastchar=prevchar eat_next_if(ch[0]) or raise "needed: "+ch - if nextchar==?( and @enable_macro + if nextchar==?( and @enable_macro #factored result= OperatorToken.new(':', startpos) result.unary=true return result end #handle quoted symbols like :"foobar", :"[]" - qe and return symbol(':') + if qe + return symbol(':') + elsif eat_next_if(?:) + #we definately found a :: - #look for another colon; return single : if not found - unless eat_next_if(?:) + colon2=KeywordToken.new( '::',startpos) + lasttok=@last_operative_token + assert !(String===lasttok) + if (VarNameToken===lasttok or MethNameToken===lasttok) and + lasttok===/^(?:[$@]|#@@LETTER)/o and !WHSPCHARS[lastchar] + then + @moretokens << colon2 + result= NoWsToken.new(startpos) + else + result=colon2 + end + dot_rhs(colon2) + return result + + #return single : token + else #cancel implicit contexts... - @moretokens.push(*abort_noparens!(':')) + @moretokens.push(*abort_noparens!(':')) #special treatment not needed in 1.9 mode? @moretokens.push tok=KeywordToken.new(':',startpos) - case @parsestack.last - when TernaryContext - tok.ternary=true - @parsestack.pop #should be in the context's see handler - when ExpectDoOrNlContext #should be in the context's see handler - if @rubyversion<1.9 - @parsestack.pop - assert @parsestack.last.starter[/^(while|until|for)$/] - tok.as=";" - end - when ExpectThenOrNlContext,WhenParamListContext - if @rubyversion<1.9 - #should be in the context's see handler - @parsestack.pop - tok.as="then" - end - when RescueSMContext - tok.as=";" - end or + colon_operator(tok) or fail ": not expected in #{@parsestack.last.class}->#{@parsestack.last.starter}" - #end ternary context, if any @parsestack.last.see self,:colon return @moretokens.shift end - #we definately found a :: + end - colon2=KeywordToken.new( '::',startpos) - lasttok=@last_operative_token - assert !(String===lasttok) - if (VarNameToken===lasttok or MethNameToken===lasttok) and - lasttok===/^(?:[$@]|#@@LETTER)/o and !WHSPCHARS[lastchar] - then - @moretokens << colon2 - result= NoWsToken.new(startpos) - else - result=colon2 - end - dot_rhs(colon2) - return result + #----------------------------------- + def colon_operator tok + case @parsestack.last + when TernaryContext + tok.ternary=true + @parsestack.pop #should be in the context's see handler + when ExpectDoOrNlContext #should be in the context's see handler + @parsestack.pop + assert @parsestack.last.starter[/^(while|until|for)$/] + tok.as=";" + when ExpectThenOrNlContext,WhenParamListContext + #should be in the context's see handler + @parsestack.pop + tok.as="then" + when RescueSMContext + tok.as=";" + end end #----------------------------------- def symbol(notbare,couldbecallsite=!notbare) assert !couldbecallsite @@ -1881,17 +2345,18 @@ double_quote('"') when ?' #' assert notbare open=":'"; close="'" single_quote("'") - when ?` then read(1) #` +# when ?` then read(1) #` when ?@ then at_identifier.to_s when ?$ then dollar_identifier.to_s when ?_,?a..?z,NONASCII then identifier_as_string(?:) when ?A..?Z then result=identifier_as_string(?:) if @last_operative_token==='::' + fail #i think this can't happen anymore now assert klass==MethNameToken /#@@LETTER_DIGIT$/o===result and klass=VarNameToken end result else @@ -1917,17 +2382,17 @@ #look for operators opmatches=readahead(3)[@method_operators] return [read(opmatches.size), start] if opmatches case nc=nextchar - when ?` #` - return [read(1),start] +# when ?` #` +# return [read(1),start] when ?_,?a..?z,?A..?Z,NONASCII context=merge_assignment_op_in_setter_callsites? ? ?: : nc return [identifier_as_string(context), start] when ?( - return [nil,start] if @enable_macro or @rubyversion>=1.9 + return [nil,start] if @enable_macro or @rubyversion>=1.9 #factored end set_last_token KeywordToken.new(';') lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}") return [nil, start] @@ -1940,20 +2405,21 @@ dash=eat_next_if(?-) quote=eat_next_if( /['"`]/) if quote ender=til_charset(/[#{quote}]/) (quote==getchar) or - return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc") + return lexerror(res=HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc") quote_real=true else quote='"' ender=@file.scan(/#@@LETTER_DIGIT+/o) ender.length >= 1 or - return lexerror(HerePlaceholderToken.new( dash, quote, ender, nil ), "invalid here header") + return lexerror(res=HerePlaceholderToken.new( dash, quote, ender, nil ), "invalid here header") end res= HerePlaceholderToken.new( dash, quote, ender, quote_real ) + res.line=linenum if true res.open=["<<",dash,quote,ender,quote].join procrastinated=til_charset(/[\n]/)#+readnl unless @base_file @base_file=@file @@ -1980,18 +2446,19 @@ #one or two already read characters are overwritten here, #in order to keep offsets correct in the long term #(at present, offsets and line numbers between #here header and its body will be wrong. but they should re-sync thereafter.) - newpos=input_position_raw-nl.size + newpos=input_position_raw #unless procrastinated.empty? - @file.modify(newpos,nl.size,procrastinated+nl) #vomit procrastinated text back onto input + @file.modify(newpos,0,procrastinated) #vomit procrastinated text back onto input #end + #@offset_adjust2=-1 #nice idea, but crashes 1.9.2 and causes more warnings than it fixes... :( input_position_set newpos #line numbers would be wrong within the procrastinated section - @linenum-=1 + @linenum=res.line #was: @linenum-=1 #be nice to get the here body token at the right place in input, too... @pending_here_bodies<< body @offset_adjust-=bodysize#+nl.size @@ -2036,10 +2503,12 @@ #the action continues in newline, where #the rest of the here token is read after a #newline has been seen and res.affix is eventually called end + ensure + assign_encoding!(res.string) if res end #----------------------------------- def lessthan(ch) #match quadriop('<') or here doc or spaceship op case readahead(3) @@ -2071,17 +2540,17 @@ if @base_file and indices=@file.instance_eval{@start_pos} and (indices[-2]..indices[-1])===@file.pos @base_file.pos=@file.pos @file=@base_file @base_file=nil - result="\n" +# result="\n" end @offset_adjust=@min_offset_adjust @moretokens.push( *optional_here_bodies ) ln=@linenum - @moretokens.push lexerror(EscNlToken.new(result,input_position-result.size,@filename,ln-1), error), + @moretokens.push lexerror(EscNlToken.new(result,input_position-result.size,@filename,ln), error), FileAndLineToken.new(@filename,ln,input_position) start_of_line_directives return @moretokens.shift @@ -2089,21 +2558,24 @@ #----------------------------------- def optional_here_bodies result=[] if true - #handle here bodies queued up by previous line - pos=input_position - while body=@pending_here_bodies.shift + #handle here bodies queued up by previous line + pos=input_position + while body=@pending_here_bodies.shift #body.offset=pos - result.push EscNlToken.new("\n",body.offset-1,@filename,nil) - result.push FileAndLineToken.new(@filename,body.ident.line,body.offset) + result.push EscNlToken.new("\n",body.offset-1,@filename,@linenum) + result.push FileAndLineToken.new(@filename,@linenum,body.offset) result.push body #result.push NoWsToken.new @pending_here_bodies.empty? ? input_position : @pending_here_bodies.first #result.push FileAndLineToken.new(@filename,@linenum,pos) #position and line num are off - body.headtok.line=@linenum-1 - end + @linenum+=body.linecount + body.endline=@linenum-1 + # body.startline=@linenum-1-body.linecount + end + else #...(we should be more compatible with dos/mac style newlines...) while tofill=@incomplete_here_tokens.shift result.push( here_body(tofill), @@ -2120,10 +2592,11 @@ def here_body(tofill) close="\n" tofill.string.offset= input_position linecount=1 #for terminator assert("\n"==prevchar) + startline=@linenum loop { assert("\n"==prevchar) #here body terminator? oldpos= input_position_raw @@ -2135,12 +2608,14 @@ lexerror tofill.string, "unterminated here body" break end if read(tofill.ender.size)==tofill.ender crs=til_charset(/[^\r]/)||'' - if nl=readnl - close+=tofill.ender+crs+nl + nl=nextchar + if !nl or nl==?\n + close+=tofill.ender+crs + close+="\n" if nl break end end input_position_set oldpos @@ -2221,35 +2696,39 @@ result=tofill.bodyclass.new(tofill,linecount) result.open=str.open="" tofill.close=close result.close=str.close=close[1..-1] result.offset=str.offset + result.endline=@linenum-1 + result.startline=startline assert str.open assert str.close return result end #----------------------------------- + def want_hard_nl? + NewlineToken===@last_operative_token || #hack + (KeywordToken===@last_operative_token and + @last_operative_token.ident=="rescue" and + !@last_operative_token.infix?) || + !after_nonid_op?{false} + end + + #----------------------------------- def newline(ch) assert("\r\n"[nextchar.chr]) #ordinary newline handling (possibly implicitly escaped) assert("\r\n"[nextchar.chr]) assert !@parsestack.empty? assert @moretokens.empty? - pre=FileAndLineToken.new(@filename,@linenum+1,input_position) - pre.allow_ooo_offset=true + hard=want_hard_nl? - hard=NewlineToken===@last_operative_token || #hack - (KeywordToken===@last_operative_token and - @last_operative_token.ident=="rescue" and - !@last_operative_token.infix?) || - !after_nonid_op?{false} +# hard=false if @rubyversion>=1.9 and @file.check( /\A\n(?:#@@WSTOKS)?[.:][^.:]/o ) - hard=false if @rubyversion>=1.9 and @file.check( /\A\n(?:#@@WSTOKS)?[.:][^.:]/o ) - if hard @offset_adjust=@min_offset_adjust a= abort_noparens! case @parsestack.last #these should be in the see:semi handler when ExpectDoOrNlContext; @parsestack.pop @@ -2257,17 +2736,19 @@ end assert !@parsestack.empty? @parsestack.last.see self,:semi a << rulexer_newline(ch) - @moretokens.replace a+@moretokens + a+=@moretokens + @moretokens.replace a else @offset_adjust=@min_offset_adjust offset= input_position nl=readnl - @moretokens.push EscNlToken.new(nl,offset,@filename,@linenum-1), - FileAndLineToken.new(@filename,@linenum,input_position) + a=[EscNlToken.new(nl,offset,@filename,@linenum), + FileAndLineToken.new(@filename,@linenum,input_position)] + @moretokens.push( *a ) end #optimization: when thru with regurgitated text from a here document, #revert back to original unadorned Sequence instead of staying in the list. if @base_file and indices=@file.instance_eval{@start_pos} and @@ -2282,27 +2763,17 @@ @offset_adjust=@min_offset_adjust @moretokens.unshift(*optional_here_bodies) - #adjust line count in fal to account for newlines in here bodys - i=@moretokens.size-1 - while(i>=0) - #assert FileAndLineToken===@moretokens[i] - i-=1 if FileAndLineToken===@moretokens[i] - break unless HereBodyToken===@moretokens[i] - pre_fal=true - fal.line-=@moretokens[i].linecount + #adjust line #s to account for newlines in here bodys + l=@linenum + a.reverse_each{|implicit| + implicit.endline=l + l-=1 if EscNlToken===implicit or NewlineToken===implicit + } - i-=1 - end - - if pre_fal - result=@moretokens.first - pre.offset=result.offset - @moretokens.unshift pre - end start_of_line_directives result=@moretokens.shift return result end @@ -2315,10 +2786,11 @@ ENDMARKER=/^__END__[\r\n]?\Z/ ENDMARKERLENGTH=8 def start_of_line_directives #handle =begin...=end (at start of a line) while EQBEGIN===readahead(EQBEGINLENGTH) + startline=@linenum startpos= input_position more= read(EQBEGINLENGTH-1) #get =begin begin eof? and raise "eof before =end" @@ -2335,12 +2807,14 @@ # newls= more.scan(/\r\n?|\n\r?/) # @linenum+= newls.size #inject the fresh comment into future token results - @moretokens.push IgnoreToken.new(more,startpos), - FileAndLineToken.new(@filename,@linenum,input_position) + comment=IgnoreToken.new(more,startpos) + comment.startline=startline + comment.endline=@linenum + @moretokens.push comment, FileAndLineToken.new(@filename,@linenum,input_position) end #handle __END__ if ENDMARKER===readahead(ENDMARKERLENGTH) assert !(ImplicitContext===@parsestack.last) @@ -2351,16 +2825,18 @@ #----------------------------------- #used to resolve the ambiguity of - # unary ops (+, -, *, &, ~ !) in ruby + # unary ops (+, -, *, &, (and ^ if macros enabled) ) in ruby #returns whether current token is to be the start of a literal IDBEGINCHAR=/^(?:#@@LETTER|[$@])/o def unary_op_expected?(ch) #yukko hack - '*&='[readahead(2)[1..1]] and return false + #not unary if its anything followed by = or &/* followed by themselves + return false if /^(?:.=|([&*])\1)$/===readahead(2) + return true if KeywordToken===@last_operative_token and @last_operative_token==='for' after_nonid_op? { #possible func-call as operator @@ -2395,25 +2871,25 @@ #returns what block yields if last token was a method name. #used to resolve the ambiguity of # <<, %, /, ?, :, and newline (among others) in ruby def after_nonid_op? - #this is how it should be, I think, and then no handlers for methnametoken and FUNCLIKE_KEYWORDS are needed + #this is how it should be, I think, and then no handlers for methnametoken and @FUNCLIKE_KEYWORDS are needed # if ImplicitParamListStartToken===@last_token_including_implicit # huh return true # end case @last_operative_token - when VarNameToken , MethNameToken, FUNCLIKE_KEYWORDS.token_pat + when VarNameToken , MethNameToken, @FUNCLIKE_KEYWORDS.token_pat #VarNameToken should really be left out of this case... #should be in next branch instread #callers all check for last token being not a variable if they pass anything #but {false} in the block #(hmmm... some now have true or other non-varname checks in them... could these be bugs?) return yield when StringToken, SymbolToken, NumberToken, HerePlaceholderToken, %r{^( - end|self|true|false|nil|->| + end|self|true|false|nil| __FILE__|__LINE__|__ENCODING__|[\})\]] )$}x.token_pat #dunno about def/undef #maybe class/module shouldn't he here either? #for is also in NewlineToken branch, below. @@ -2423,11 +2899,11 @@ #regexs above must match whole string #assert(@last_operative_token==$&) #disabled 'cause $& is now always nil :( return true if OperatorToken===@last_operative_token || KeywordToken===@last_operative_token when NewlineToken, nil, #nil means we're still at beginning of file /^([({\[]|or|not|and|if|unless|then|elsif|else|class|module|def| - while|until|begin|for|in|case|when|ensure|defined\?)$ + while|until|begin|for|in|case|when|ensure|defined\?|->)$ /x.token_pat return true when KeywordToken return true if /^(alias|undef)$/===@last_operative_token.ident #is this ever actually true??? when IgnoreToken @@ -2476,11 +2952,11 @@ end #----------------------------------- def caret(ch) #match /^=?/ (^ or ^=) (maybe unary ^ too) - if @enable_macro and (@last_token_maybe_implicit and + if @enable_macro and (@last_token_maybe_implicit and #factored @last_token_maybe_implicit.ident=='(') || unary_op_expected?(ch) result=OperatorToken.new(read(1),input_position) result.unary=true result else @@ -2531,19 +3007,19 @@ if unary_op_expected?(ch) or KeywordToken===@last_operative_token && /^(return|break|next)$/===@last_operative_token.ident if (?0..?9)===readahead(2)[1] result= number(ch) - elsif @rubyversion>=1.9 and '->' == readahead(2) #stabby proc - @file.pos+=2 - #push down block context - localvars.start_block - @parsestack.push ctx=BlockContext.new(@linenum) - ctx.wanting_stabby_block_body=true - #read optional proc params - block_param_list_lookahead ?(, ParenedParamListLhsContext - result=KeywordToken.new('->',pos) +# elsif @rubyversion>=1.9 and '->' == readahead(2) #stabby proc +# @file.pos+=2 +# #push down block context +# localvars.start_block +# @parsestack.push ctx=BlockContext.new(@linenum) +# ctx.wanting_stabby_block_body=true +# #read optional proc params +# block_param_list_lookahead ?(, ParenedParamListLhsContext +# result=KeywordToken.new('->',pos) else #unary operator result=getchar WHSPLF[nextchar.chr] or @moretokens << NoWsToken.new(input_position) @@ -2579,24 +3055,24 @@ @moretokens.unshift result @moretokens.unshift( *abort_noparens!("=>")) result=@moretokens.shift end @parsestack.last.see self,:arrow - when '~' # =~... after regex, maybe? - last=last_operative_token - - if @rubyversion>=1.9 and StringToken===last and last.lvars - #ruby delays adding lvars from regexps to known lvars table - #for several tokens in some cases. not sure why or if on purpose - #i'm just going to add them right away - last.lvars.each{|lvar| localvars[lvar]=true } - end +# when '~' # =~... after regex, maybe? +# last=last_operative_token +# +# if @rubyversion>=1.9 and StringToken===last and last.lvars +# #ruby delays adding lvars from regexps to known lvars table +# #for several tokens in some cases. not sure why or if on purpose +# #i'm just going to add them right away +# last.lvars.each{|lvar| localvars[lvar]=true } +# end when '' #plain assignment: record local variable definitions last_context_not_implicit.lhs=false @last_operative_token=result @moretokens.push( *ignored_tokens(true).map{|x| - NewlineToken===x ? EscNlToken.new(x.ident,x.offset,@filename,@linenum) : x + NewlineToken===x ? EscNlToken.new(x.ident,x.offset,x.filename,x.linenum) : x } ) @parsestack.push AssignmentRhsContext.new(@linenum) @moretokens.push AssignmentRhsListStartToken.new( input_position) if eat_next_if ?* tok=OperatorToken.new('*', input_position-1) @@ -2619,41 +3095,43 @@ result+=k elsif eof? or WHSPLF[nextchar.chr] #do nothing else @moretokens << NoWsToken.new(input_position) end - ty= @rubyversion>=1.9 ? OperatorToken : KeywordToken + ty=OperatorToken result=ty.new(result, input_position-result.size) result.unary=!k #result should distinguish unary ! return result end - #----------------------------------- def dot(ch) str='' eat_next_if(?.) or raise "lexer confusion" #three lumps of sugar or two? eat_next_if(?.) and - return KeywordToken.new(eat_next_if(?.)? "..." : "..") + return OperatorToken.new(eat_next_if(?.)? "..." : "..") #else saw just single . #match a valid ruby id after the dot - result= KeywordToken.new( ".") + result= KeywordToken.new( "." ) dot_rhs(result) return result end #----------------------------------- def dot_rhs(prevtok) safe_recurse { |a| set_last_token prevtok aa= ignored_tokens was=after_nonid_op?{true} tok,pos=callsite_symbol(prevtok) - tok and aa.push(*var_or_meth_name(tok,prevtok,pos,was)) + if tok + toks=var_or_meth_name(tok,prevtok,pos,was) + aa.push(*toks) + end a.unshift(*aa) } end #----------------------------------- @@ -2690,12 +3168,24 @@ #} return IgnoreToken.new(result) end end + #----------------------------------- + def method_params? + lasttok=last_token_maybe_implicit #last_operative_token + VarNameToken===lasttok or + MethNameToken===lasttok or + lasttok===@FUNCLIKE_KEYWORDS or + (@enable_macro and lasttok and lasttok.ident==')') #factored + end + + #----------------------------------- def open_brace(ch) + #there are 3 distinct cases here; this method should be divided in 3 + assert((ch!='[' or !want_op_name)) assert(@moretokens.empty?) lastchar=prevchar ch=eat_next_if(/[({\[]/)or raise "lexer confusion" tokch=KeywordToken.new(ch, input_position-1) @@ -2705,30 +3195,23 @@ case tokch.ident when '[' # in contexts expecting an (operator) method name, we # would want to match [] or []= at this point #but control never comes this way in those cases... goes - #to custom parsers for alias, undef, and def in #parse_keywords + #to custom parsers for alias, undef, and def in #special_identifier? tokch.set_infix! unless after_nonid_op?{WHSPLF[lastchar]} @parsestack.push ListImmedContext.new(ch,@linenum) lasttok=last_operative_token #could be: lasttok===/^#@@LETTER/o if (VarNameToken===lasttok or ImplicitParamListEndToken===lasttok or - MethNameToken===lasttok or lasttok===FUNCLIKE_KEYWORDS) and !WHSPCHARS[lastchar] + MethNameToken===lasttok or lasttok===@FUNCLIKE_KEYWORDS) and !WHSPCHARS[lastchar] @moretokens << (tokch) tokch= NoWsToken.new(input_position-1) end when '(' - lasttok=last_token_maybe_implicit #last_operative_token #could be: lasttok===/^#@@LETTER/o - method_params= ( - VarNameToken===lasttok or - MethNameToken===lasttok or - lasttok===FUNCLIKE_KEYWORDS or - (@enable_macro and lasttok and lasttok.ident==')') - ) - if method_params + if method_params? unless WHSPCHARS[lastchar] @moretokens << tokch tokch= NoWsToken.new(input_position-1) end @parsestack.push ParamListContext.new(@linenum) @@ -2751,17 +3234,23 @@ when '{' #check if we are in a hash literal or string inclusion (#{}), #in which case below would be bad. if !(UnparenedParamListLhsContext===@parsestack.last) and after_nonid_op?{false} || @last_operative_token.has_no_block? - @parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash + if @file.readbehind(2)=='#{' + @parsestack.push StringInclusionContext.new(@linenum) + else + @parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash + end else #abort_noparens! tokch.set_infix! tokch.as="do" #if (perhaps deep) inside a stabby block param list context, end it + stabby_params_just_ended,tokch=maybe_end_stabby_block_param_list(tokch) +=begin was if @rubyversion>=1.9 stabby_params_just_ended=false (@parsestack.size-1).downto(1){|i| case @parsestack[i] when ParamListContextNoParen,AssignmentRhsContext @@ -2777,10 +3266,11 @@ break else break end } end +=end # 'need to find matching callsite context and end it if implicit' lasttok=last_operative_token if !(lasttok===')' and lasttok.callsite?) and !stabby_params_just_ended #or ParamListContextNoParen===parsestack.last @moretokens.push( *(abort_1_noparen!(1).push tokch) ) @@ -2798,10 +3288,15 @@ end return (tokch) end #----------------------------------- + def maybe_end_stabby_block_param_list(tokch) + return false,tokch + end + + #----------------------------------- def close_brace(ch) ch==eat_next_if(/[)}\]]/) or raise "lexer confusion" @moretokens.concat abort_noparens!(ch) @parsestack.last.see self,:semi #hack @moretokens<< kw=KeywordToken.new( ch, input_position-1) @@ -2844,16 +3339,24 @@ return(endoffile_detected result) end #----------------------------------- def endoffile_detected(s='') - @moretokens.push( *(abort_noparens!.push rulexer_endoffile_detected(s))) + @linenum+=1 #optional_here_bodies expects to be called after a newline was seen and @linenum bumped + #in this case, there is no newline, but we need to pretend there is. otherwise optional_here_bodies + #makes tokens with wrong line numbers + + @moretokens.concat optional_here_bodies + @linenum-=1 #now put it back + @moretokens.concat abort_noparens! + @moretokens.push rulexer_endoffile_detected(s) if @progress_thread @progress_thread.kill @progress_thread=nil end result= @moretokens.shift + assert @pending_here_bodies.empty? balanced_braces? or (lexerror result,"unbalanced braces at eof. parsestack=#{@parsestack.inspect}") result end #----------------------------------- @@ -2877,12 +3380,15 @@ # AssignmentRhsContext #]===@parsestack while AssignmentRhsContext===@parsestack[-1] pop= case @parsestack[-2] - when ParamListContext,ParamListContextNoParen,WhenParamListContext, - ListImmedContext,AssignmentRhsContext; true + when ParamListContext,ParamListContextNoParen, + WhenParamListContext,ListImmedContext,AssignmentRhsContext, + ParenedParamListLhsContext,UnparenedParamListLhsContext, + BlockParamListLhsContext,KnownNestedLhsParenContext + true when RescueSMContext; @parsestack[-2].state==:rescue when DefContext; !@parsestack[-2].in_body and !@parsestack[-2].has_parens? else false end break unless pop @@ -2902,11 +3408,11 @@ end #----------------------------------- def semicolon(ch) assert @moretokens.empty? - @moretokens.push(*abort_noparens!) + @moretokens.push(*abort_noparens!(';',0)) @parsestack.last.see self,:semi case @parsestack.last #should be in context's see:semi handler when ExpectThenOrNlContext @parsestack.pop when ExpectDoOrNlContext @@ -2930,11 +3436,56 @@ end #----------------------------------- #tokenify_results_of :identifier save_offsets_in(*CHARMAPPINGS.values.uniq-[ - :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote,:caret,:plusminus + :symbol_or_op,:open_brace,:whitespace,:exclam,:caret,:plusminus ]) + save_linenums_in :symbol_or_op,:open_brace,:whitespace,:exclam,:caret,:plusminus #save_offsets_in :symbol end + +#defense against my class being redefined by a a certain other project, +module Kernel + eval %w[require load].map{|name| <<-END }.join + #{name}__without_rubylexer_protection=instance_method :#{name} + define_method(:#{name}) do |file| + if /\\Aruby_(lexer|parser)(\\.rb)?\\z/i===File.basename(file) + warn "Uh-oh, you're trying to use ruby_parser and rubylexer at the same time." + warn "ruby_parser causes a namespace conflict with rubylexer" + warn "because ruby_parser redefines the class RubyLexer" + warn "in a way which is incompatible with standard RubyLexer." + warn "The rubylexer gem owns the namespace ::RubyLexer," + warn "and claimed it at least 2 years before ruby_parser existed." + warn "Attempt to redefine RubyLexer in an incompatible way disabled." + else + begin + #{name}__without_rubylexer_protection.bind(self).call file + rescue Exception=>e + e.backtrace.delete_if{|x| /\\A\#{__FILE__}:\#{__LINE__-2}:/o===x } + raise e + end + end + end + END +end + +eval %w[class\ Module module\ Kernel].map{|ctx| <<END }.join + #{ctx} + autoload__without_rubylexer_protection=instance_method :autoload + define_method(:autoload) do |mod,file| + if /\\Aruby_(lexer|parser)(\\.rb)?\\z/i===File.basename(file) + warn "Uh-oh, you're trying to use ruby_parser and rubylexer at the same time." + warn "ruby_parser causes a namespace conflict with rubylexer" + warn "because ruby_parser redefines the class RubyLexer" + warn "in a way which is incompatible with standard RubyLexer." + warn "The rubylexer gem owns the namespace ::RubyLexer," + warn "and claimed it at least 2 years before ruby_parser existed." + warn "Attempt to redefine RubyLexer in an incompatible way disabled." + else + autoload__without_rubylexer_protection.bind(self).call mod,file + end + end + end +END