rubylexer.rb in rubylexer-0.8.0

- old
+ new

@@ -1,8 +1,9 @@
+#encoding: binary
 =begin
     rubylexer - a ruby lexer written in ruby
-    Copyright (C) 2004,2005,2008  Caleb Clausen
+    Copyright (C) 2004,2005,2008, 2011  Caleb Clausen
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License as published by the Free Software Foundation; either
     version 2.1 of the License, or (at your option) any later version.
@@ -15,68 +16,82 @@
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 =end
 
-
 require 'rubylexer/rulexer' #must be 1st!!!
 require 'rubylexer/version'
 require 'rubylexer/token'
 require 'rubylexer/charhandler'
 require 'rubylexer/symboltable'
 #require "io.each_til_charset"
 require 'rubylexer/context'
 require 'rubylexer/tokenprinter'
 
-
 #-----------------------------------
 class RubyLexer
   include NestedContexts
-  
 
+  #here's a list of other constants that should already be defined at this point:
+  [WHSP, VERSION, Token, CharSet, CharHandler, SymbolTable, SimpleTokenPrinter].each{|k| fail if k.nil? }
  
-   RUBYSYMOPERATORREX=
+  RUBYUNOPERATORS=%w{ +@ ~ ~@ -@ ! !@ }
+  RUBYBINOPERATORS=%w{ & | ^ / % == === =~ > >= >> < <= << <=> + - * ** }
+  RUBYCOMPOPERATORS=%w{== === =~ > >= < <= <=>}
+  RUBYSYMOPERATORS=RUBYUNOPERATORS+RUBYBINOPERATORS+%w{ [] []= }
+  RUBYNONSYMOPERATORS=%w{!= !~ = => :: ? : , ; . .. ... || && ||= &&=}+
+        (RUBYBINOPERATORS-RUBYCOMPOPERATORS).map{|op| op+'='}
+  RUBYSYMOPERATORREX=
       %r{^([&|^/%]|=(==?)|=~|>[=>]?|<(<|=>?)?|[+~\-]@?|\*\*?|\[\]=?)}
       # (nasty beastie, eh?)
       #these are the overridable operators
       #does not match flow-control operators like: || && ! or and if not
       #or op= ops like: += -= ||=
       #or .. ... ?:
       #for that use:
-   RUBYNONSYMOPERATORREX=
+  RUBYNONSYMOPERATORREX=
       %r{^([%^/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|::|=>?|![=~]?)$}
-   RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
-   UNSYMOPS=/^[~!]$/ #always unary
-   UBSYMOPS=/^([*&+-]|::)$/  #ops that could be unary or binary
-   WHSPCHARS=WHSPLF+"\\#"
-   OPORBEGINWORDLIST=%w(if unless while until)
-   BEGINWORDLIST=%w(def class module begin for case do)+OPORBEGINWORDLIST
-   OPORBEGINWORDS="(#{OPORBEGINWORDLIST.join '|'})"
-   BEGINWORDS=/^(#{BEGINWORDLIST.join '|'})$/o
-   FUNCLIKE_KEYWORDLIST=%w/break next redo return yield retry super BEGIN END/
-   FUNCLIKE_KEYWORDS=/^(#{FUNCLIKE_KEYWORDLIST.join '|'})$/
-   VARLIKE_KEYWORDLIST=%w/__FILE__ __LINE__ false nil self true/
-   VARLIKE_KEYWORDS=/^(#{VARLIKE_KEYWORDLIST.join '|'})$/
-   INNERBOUNDINGWORDLIST=%w"else elsif ensure in then rescue when"
-   INNERBOUNDINGWORDS="(#{INNERBOUNDINGWORDLIST.join '|'})"
-   BINOPWORDLIST=%w"and or"
-   BINOPWORDS="(#{BINOPWORDLIST.join '|'})"
+  RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
+  UNSYMOPS=/^[~!]$/ #always unary
+  UBSYMOPS=/^(?:[*&+-]|::)$/  #ops that could be unary or binary
+  WHSPCHARS=WHSPLF+"\\#"
+  OPORBEGINWORDLIST=%w(if unless while until)
+  BEGINWORDLIST=%w(def class module begin for case do)+OPORBEGINWORDLIST
+  OPORBEGINWORDS="(?:#{OPORBEGINWORDLIST.join '|'})"
+  BEGINWORDS=/^(?:#{BEGINWORDLIST.join '|'})$/o
+  FUNCLIKE_KEYWORDLIST_1_9=%w[not]
+  FUNCLIKE_KEYWORDLIST=%w/break next redo return yield retry super BEGIN END/
+  FUNCLIKE_KEYWORDS=/^(?:#{FUNCLIKE_KEYWORDLIST.join '|'})$/
+  VARLIKE_KEYWORDLIST_1_9=%w[__ENCODING__]
+  VARLIKE_KEYWORDLIST=%w/__FILE__ __LINE__ false nil self true/
+  VARLIKE_KEYWORDS=/^(?:#{VARLIKE_KEYWORDLIST.join '|'})$/
+  attr_reader :FUNCLIKE_KEYWORDS, :VARLIKE_KEYWORDS
+  
+  INNERBOUNDINGWORDLIST=%w"else elsif ensure in then rescue when"
+  INNERBOUNDINGWORDS="(?:#{INNERBOUNDINGWORDLIST.join '|'})"
+  BINOPWORDLIST=%w"and or"
+  BINOPWORDS="(?:#{BINOPWORDLIST.join '|'})"
    
-   RUBYKEYWORDS=%r{
-     ^(alias|#{BINOPWORDS}|defined\?|not|undef|end|
+  RUBYKEYWORDS=%r{
+     ^(?:alias|#{BINOPWORDS}|defined\?|not|undef|end|
        #{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}|
        #{INNERBOUNDINGWORDS}|#{BEGINWORDS}
      )$
    }xo
+  RUBYKEYWORDLIST=%w{alias defined? not undef end}+
+       BINOPWORDLIST+
+       VARLIKE_KEYWORDLIST+FUNCLIKE_KEYWORDLIST+
+       INNERBOUNDINGWORDLIST+BEGINWORDLIST+
+       VARLIKE_KEYWORDLIST_1_9
       #__END__ should not be in this set... its handled in start_of_line_directives
 
-   HIGHASCII=?\x80..?\xFF
-   NONASCII=HIGHASCII
-   #NONASCII=?\x80..?xFFFFFFFF  #or is it 10FFFF, whatever the highest conceivable code point
+  HIGHASCII=?\x80..?\xFF
+  NONASCII=HIGHASCII
+  #NONASCII=?\x80..?xFFFFFFFF  #or is it 10FFFF, whatever the highest conceivable code point
 
 
-   CHARMAPPINGS = {
+  CHARMAPPINGS = {
          ?$ => :dollar_identifier,
          ?@ => :at_identifier,
          ?a..?z => :identifier,
          ?A..?Z => :identifier,
          ?_     => :identifier,
@@ -123,37 +138,37 @@
          ?\x01..?\x03 => :illegal_char,
          ?\x05..?\x08 => :illegal_char,
          ?\x0E..?\x19 => :illegal_char,
          ?\x1b..?\x1F => :illegal_char,
          ?\x7F => :illegal_char,
-   }
+  }
 
-   attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit
+  attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit
 
-   UCLETTER=@@UCLETTER="[A-Z]"
+  UCLETTER=@@UCLETTER="[A-Z]"
 
-   #cheaters way, treats utf chars as always 1 byte wide
-   #all high-bit chars are lowercase letters
-   #works, but strings compare with strict binary identity, not unicode collation
-   #works for euc too, I think
-   #(the ruby spec for utf8 support permits this interpretation)
-   LCLETTER=@@LCLETTER="[a-z_\x80-\xFF]"
-   LETTER=@@LETTER="[A-Za-z_\x80-\xFF]"
-   LETTER_DIGIT=@@LETTER_DIGIT="[A-Za-z_0-9\x80-\xFF]"
-   eval %w[UCLETTER LCLETTER LETTER LETTER_DIGIT].map{|n| "
+  #cheaters way, treats utf chars as always 1 byte wide
+  #all high-bit chars are lowercase letters
+  #works, but strings compare with strict binary identity, not unicode collation
+  #works for euc too, I think
+  #(the ruby spec for utf8 support permits this interpretation)
+  LCLETTER=@@LCLETTER="[a-z_\x80-\xFF]"
+  LETTER=@@LETTER="[A-Za-z_\x80-\xFF]"
+  LETTER_DIGIT=@@LETTER_DIGIT="[A-Za-z_0-9\x80-\xFF]"
+  eval %w[UCLETTER LCLETTER LETTER LETTER_DIGIT].map{|n| "
      def #{n}; #{n}; end
      def self.#{n}; @@#{n}; end
      " 
-   }.join
+  }.join
 
-   NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)((?:(?!#@@LETTER_DIGIT).)|\Z)/om
-   if ?A.is_a? String #ruby >= 1.9
-     NEVERSTARTPARAMLISTFIRST=/[aoeitrwu]/
-   else
-     NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu']  #chars that begin NEVERSTARTPARAMLIST
-   end
-   NEVERSTARTPARAMLISTMAXLEN=7     #max len of a NEVERSTARTPARAMLIST
+  NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)((?:(?!#@@LETTER_DIGIT).)|\Z)/om
+  if ?A.is_a? String #ruby >= 1.9
+    NEVERSTARTPARAMLISTFIRST=/[aoeitrwu]/
+  else
+    NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu']  #chars that begin NEVERSTARTPARAMLIST
+  end
+  NEVERSTARTPARAMLISTMAXLEN=7     #max len of a NEVERSTARTPARAMLIST
 
 =begin
    require 'jcode'
    utf8=String::PATTERN_UTF8 #or euc, or sjis...
    LCLETTER_U="(?>[a-z_]|#{utf8})"
@@ -161,11 +176,18 @@
    LETTER_DIGIT_U="(?>[A-Za-z_0-9]|#{utf8})"
 =end
 
    #-----------------------------------
    def initialize(filename,file,linenum=1,offset_adjust=0,options={})
-      @offset_adjust=0 #set again in next line
+      if file.respond_to? :set_encoding
+        file.set_encoding 'binary'
+      elsif file.respond_to? :force_encoding
+        file=file.dup if file.frozen?
+        file.force_encoding 'binary'
+      end
+
+      @offset_adjust=@offset_adjust2=0 #set again in next line
       rulexer_initialize(filename,file, linenum,offset_adjust)
       @start_linenum=linenum
       @parsestack=[TopLevelContext.new]
       @incomplete_here_tokens=[] #not used anymore
       @pending_here_bodies=[]
@@ -177,69 +199,147 @@
       @enable_macro=nil
       @base_file=nil
       @progress_thread=nil
       @rubyversion=options[:rubyversion]||1.8
       @encoding=options[:encoding]||:detect
-      @method_operators=if @rubyversion>=1.9 
-                          /#{RUBYSYMOPERATORREX}|\A![=~@]?/o
-                        else
-                          RUBYSYMOPERATORREX
-                        end
 
+      @always_binary_chars=CharSet['}]);|>,.=^']
+      @unary_or_binary_chars=CharSet['+-%/']
+
+
+      @FUNCLIKE_KEYWORDS=FUNCLIKE_KEYWORDS
+      @VARLIKE_KEYWORDS=VARLIKE_KEYWORDS
+
       @toptable=CharHandler.new(self, :identifier, CHARMAPPINGS)
 
-      extend RubyLexer1_9 if @rubyversion>=1.9
-      read_leading_encoding
-      start_of_line_directives
+      if @rubyversion>=1.9
+        extend RubyLexer1_9 
+      end
+      rubylexer_modules_init
+      @method_operators=build_method_operators
+      if input_position.zero?
+        read_leading_encoding
+        @encoding=:binary if @rubyversion<=1.8
+        start_of_line_directives
+      end
       progress_printer
    end
 
-   ENCODING_ALIASES={
-    'utf-8'=>'utf8',
+   def rubylexer_modules_init
 
-    'ascii-8bit'=>'binary',
-    'ascii-7bit'=>'ascii',
+   end
+
+   alias dump inspect # preserve old inspect functionality
+  
+   # irb friendly #inspect/#to_s
+   def to_s
+     mods=class<<self;self end.ancestors-self.class.ancestors
+     mods=mods.map{|mod| mod.name }.join('+')
+     mods="+"<<mods unless mods.empty?
+     "#<#{self.class.name}#{mods}: [#{@file.inspect}]>"
+   end
+  
+   alias inspect to_s
+
+
+   def build_method_operators
+     /#{RUBYSYMOPERATORREX}|\A`/o
+   end
+
+
+   RAW_ENCODING_ALIASES={
+    #'utf-8'=>'utf8',
+
+    'ascii-8-bit'=>'binary',
+    'ascii-7-bit'=>'ascii',
     'euc-jp'=>'euc',
 
-    'ascii8bit'=>'binary',
-    'ascii7bit'=>'ascii',
-    'eucjp'=>'euc',
+    'iso-8859-1'=>'binary',
+    'latin-1'=>'binary',
+    #'ascii8bit'=>'binary',
+    #'ascii7bit'=>'ascii',
+    #'eucjp'=>'euc',
 
     'us-ascii'=>'ascii',
     'shift-jis'=>'sjis',
 
     'autodetect'=>'detect',
    }
+   ENCODING_ALIASES=Hash[*RAW_ENCODING_ALIASES.map{|long,short| [long.tr_s('-_',''),short] }.flatten]
    ENCODINGS=%w[ascii binary utf8 euc sjis]
+   NONWORKING_ENCODINGS=%w[sjis]
+   WSCHARS=@@WSCHARS= /[\s]/==="\v" ? '\s' : '\s\v'  #same as WHSP
+   WSNONLCHARS=@@WSNONLCHARS=/(?!\n)[#@@WSCHARS]/o                #same as WHSPLF
+
+   NOPARAMLONGOPTIONS=%w[copyright version verbose debug yydebug help]
+   PARAMLONGOPTIONS=%w[encoding dump]
+   DASHPARAMLONGOPTIONS=%w[enable disable]
+   NOPARAMOPTIONS="SacdhlnpsvwyU"
+   OCTALPARAMOPTIONS="0"
+   CHARPARAMOPTIONS="KTW"
+   PARAMSHORTOPTIONS="CXFIEeir"
+   MAYBEPARAMSHORTOPTIONS="x"
+   NEWIN1_9OPTIONS=%w[encoding dump enable disable X U W E]
+   LONGOPTIONS=/
+             --(#{NOPARAMLONGOPTIONS.join'|'})|
+             --(#{PARAMLONGOPTIONS.join'|'})(=|#@@WSNONLCHARS+)[^#@@WSCHARS]+|
+             --(#{DASHPARAMLONGOPTIONS.join'|'})-[^#@@WSCHARS]+
+   /ox
+   CHAINOPTIONS=/
+             [#{NOPARAMOPTIONS}]+|
+             [#{OCTALPARAMOPTIONS}][0-7]{1,3}|
+             [#{CHARPARAMOPTIONS}].
+   /ox
+   PARAMOPTIONS=/
+             [#{PARAMSHORTOPTIONS}]#@@WSNONLCHARS*[^#@@WSCHARS]+|
+             [#{MAYBEPARAMSHORTOPTIONS}]#@@WSNONLCHARS*[^#@@WSCHARS]*
+   /ox
+   OPTIONS=/
+     (#@@WSNONLCHARS*(
+             #{LONGOPTIONS} | --? |
+             -#{CHAINOPTIONS}*( #{PARAMOPTIONS} | #{CHAINOPTIONS} ) 
+     ))*
+   /ox
+
    def read_leading_encoding
-     return unless @encoding==:detect
-     @encoding=:ascii
-     @encoding=:utf8 if @file.skip( "\xEF\xBB\xBF" )   #bom
-     if @file.skip( /\A#!/ )
+     @encoding=nil if @encoding==:detect
+     if enc=@file.scan( "\xEF\xBB\xBF" )   #bom
+       encpos=0
+       @encoding||=:utf8
+     elsif @file.skip( /\A#!/ )
+       lastpos=@file.pos
        loop do
-         til_charset( /[\s\v]/ )
-         break if @file.match( /^\n|[\s\v]([^-\s\v]|--?[\s\v])/,4 )
-         if @file.skip( /.-K(.)/ )
-           case $1
-           when 'u'; @encoding=:utf8
-           when 'e'; @encoding=:euc
-           when 's'; @encoding=:sjis
+         til_charset( /[#@@WSCHARS]/o )
+         assert @file.pos > lastpos
+         break if @file.match( /^\n|#@@WSNONLCHARS([^-#@@WSCHARS])/o,4 )
+         if @file.skip( /.-#{CHAINOPTIONS}*K#@@WSNONLCHARS*([a-zA-Z0-9])/o )
+           case @file.last_match[1]
+           when 'u','U'; @encoding||=:utf8
+           when 'e','E'; @encoding||=:euc
+           when 's','S'; @encoding||=:sjis
            end
+         elsif @file.skip( /.#{LONGOPTIONS}/o )
          end
+         getchar
+         lastpos=@file.pos
        end
        til_charset( /[\n]/ )
+       @moretokens<<ShebangToken.new(@file[0...@file.pos])
+       pos=input_position
+       @moretokens<<EscNlToken.new(readnl,pos,@filename,2)
+       @moretokens<<FileAndLineToken.new(@filename,2,input_position)
      end
-     if @rubyversion>=1.9 and @file.skip( 
-          /\A#[\x00-\x7F]*?(?:en)?coding[\s\v]*[:=][\s\v]*([a-z0-9_-]+)[\x00-\x7F]*\n/i 
-        )
-       name=$1
-       name.downcase!
-       name=ENCODING_ALIASES[name] if ENCODING_ALIASES[name]
-       @encoding=name.to_sym if ENCODINGS.include? name
-     end
+     encpos=input_position unless enc
+     enc||=read_encoding_line
+   ensure
+     @moretokens<<EncodingDeclToken.new(enc||'',@encoding,enc ? encpos : input_position) if @encoding
+     @encoding||=:ascii
    end
 
+   def read_encoding_line
+   end
+
    def progress_printer
      return unless ENV['RL_PROGRESS']
      $stderr.puts 'printing progresses'
      @progress_thread=Thread.new do
        until EoiToken===@last_operative_token
@@ -308,17 +408,22 @@
           raise "#{@filename}:#{linenum}:token is a #{result.class}, last is #{@last_operative_token}"
       end
    end
 
    #-----------------------------------
+   def unshift(*tokens)
+     @moretokens.unshift(*tokens)
+   end
+
+   #-----------------------------------
    def eof?
      rulexer_eof? or EoiToken===@last_operative_token
    end
 
    #-----------------------------------
    def input_position
-     rulexer_input_position+@offset_adjust
+     rulexer_input_position+@offset_adjust+@offset_adjust2
    end
 
    #-----------------------------------
    def input_position_raw 
      @file.pos
@@ -390,11 +495,11 @@
     @moretokens.unshift tok
     return result
   end
 
   #-----------------------------------
-  WSCHARSET=/[#\\\n\s\t\v\r\f\x00\x04\x1a]/
+  WSCHARSET=/[#\\\n#@@WSCHARS\x00\x04\x1a]/o
   def ignored_tokens(allow_eof=false,allow_eol=true)
     result=[]
     result << @moretokens.shift while StillIgnoreToken===@moretokens.first
     @moretokens.empty? or return result
     loop do
@@ -426,11 +531,11 @@
 =begin
       @whsphandler||=CharHandler.new(self, :==,
          "#" => :comment,
          "\n" => :newline,
          "\\" => :escnewline,
-         "\s\t\v\r\f" => :whitespace
+         "#@@WSCHARS\t\r\f" => :whitespace
       )
       #tok=nil
       while tok=@whsphandler.go((nextchar or return result))
          block_given? and NewlineToken===tok and yield tok
          result << tok
@@ -474,23 +579,23 @@
       #just asserts because those contexts are never encountered.
       #control goes through symbol(<...>,nil) 
       assert( /^#@@LETTER$/o===context)
       assert MethNameToken===@last_operative_token || !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
 
-      if @parsestack.last.wantarrow and @rubyversion>=1.9 and @file.skip ":"
-        @moretokens.push SymbolToken.new(str,oldpos), KeywordToken.new("=>",input_position-1)
-      else
-        @moretokens.unshift(*parse_keywords(str,oldpos) do |tok,except|
+#      if @parsestack.last.wantarrow and @rubyversion>=1.9 and @file.skip ":"
+#        @moretokens.unshift SymbolToken.new(str,oldpos), KeywordToken.new(":",input_position-1,:as=>"=>")
+#      else
+        @moretokens.unshift(*special_identifier?(str,oldpos) do |tok,except|
           #most callers of this block pass nothing(==nil) for except. only _keyword_funclike passes a true val
 
           was_last=@last_operative_token
           @last_operative_token=tok if tok
           normally=safe_recurse { |a| var_or_meth_name(str,was_last,oldpos,after_nonid_op?{true}) }
           (Array===normally ? normally[0]=except : normally=except) if except
           normally
         end)
-      end
+#      end
       return @moretokens.shift
    end
 
    #-----------------------------------
    IDENTREX={}
@@ -510,13 +615,13 @@
       #= and ! only match if not part of a larger operator
       trailers = 
         case context
          when ?@,?$ then ""
 #         when ?:    then "!(?![=])|\\?|=(?![=~>])"
-         else            "!(?![=])|\\?"
+         else            "!(?=\\z|[^=]|=[=~>])|\\?"
         end      
-      @in_def_name||context==?: and trailers<<"|=(?![=~>])"
+      @in_def_name||context==?: and trailers<<"|=(?![~>]|=[^~=>])"
 
       @file.scan(IDENTREX[trailers]||=/^(?>#@@LETTER#@@LETTER_DIGIT*(?:#{trailers})?)/)
    end
 
   #-----------------------------------
@@ -551,16 +656,18 @@
        end
    end
    
    #-----------------------------------
    def in_lvar_define_state lasttok=@last_operative_token
-     #@defining_lvar is a hack
-     @defining_lvar or case ctx=@parsestack.last
+     return true if @defining_lvar #@defining_lvar is a hack
+     ctx=@parsestack.last
+     case ctx
        #when ForSMContext; ctx.state==:for
-       when UnparenedParamListLhsContext;  /^(->|,|;)$/===lasttok.ident
+       when UnparenedParamListLhsContext
+         /^(->|,|;)$/===lasttok.ident or /^[*&]$/===lasttok.ident && lasttok.unary
        when RescueSMContext
-         lasttok.ident=="=>" and @file.match?( /\A[\s\v]*([:;#\n]|then(?!#@@LETTER_DIGIT))/om )
+         lasttok.ident=="=>" and @file.match?( /\A[#@@WSCHARS]*([:;#\n]|then(?!#@@LETTER_DIGIT))/om )
        #when BlockParamListLhsContext; true
      end 
    end
 
    IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT=2
@@ -585,11 +692,11 @@
      assert String===name
      
      was_in_lvar_define_state=in_lvar_define_state(lasttok)
      #maybe_local really means 'maybe local or constant'
      maybe_local=case name
-       when /(?!#@@LETTER_DIGIT).$/o #do nothing
+       when /[?!=]$/o #do nothing
        when /^#@@LCLETTER/o  
          (localvars===name or 
           #VARLIKE_KEYWORDS===name or 
           was_in_lvar_define_state
          ) and not lasttok===/^(\.|::)$/
@@ -603,10 +710,13 @@
      oldlast=@last_operative_token
 
      tok=set_last_token assign_lvar_type!(VarNameToken.new(name,pos))
 
      oldpos= input_position
+     oldline= linenum
+
+     #deal with ws following the ident
      sawnl=false
      result=ws_toks=ignored_tokens(true) {|nl| sawnl=true }
      if sawnl || eof? 
          if was_in_lvar_define_state
            if /^#@@LCLETTER#@@LETTER_DIGIT*$/o===name 
@@ -615,20 +725,36 @@
            end
            return result.unshift(tok)
          elsif maybe_local
            return result.unshift(tok) #if is_const
          else 
-           return result.unshift(
+           toks=[
              MethNameToken.new(name,pos),  #insert implicit parens right after tok
              ImplicitParamListStartToken.new( oldpos),
              ImplicitParamListEndToken.new( oldpos) 
-           )
+           ]
+           toks.each{|t| t.endline=oldline}
+           return result.unshift(*toks)
          end
      end
      
      #if next op is assignment (or comma in lvalue list)
      #then omit implicit parens
+     assignment_coming=
+       /\A(?:
+         =[^>=~] | (,) | (;) | (\)) | 
+         (in(?!#@@LETTER_DIGIT)) | (\|[^\|=]) | [%\/\-+^*&|]= | ([<>*&|])\6=
+       )/mox===readahead(3) &&
+            case
+            when $1; comma_in_lvalue_list?  #comma
+            when $2; semicolon_in_block_param_list?
+            when $3; last_context_not_implicit.lhs #right paren in lhs
+            when $4; ForSMContext===last_context_not_implicit #in
+            when $5; BlockParamListLhsContext===last_context_not_implicit #ending goalpost
+            else true
+            end
+=begin was     
      assignment_coming=case nc=nextchar
        when ?=;  not( /^=[>=~]$/===readahead(2) )
        when ?,; comma_in_lvalue_list? 
        when (?; if @rubyversion>=1.9); ParenedParamListLhsContext===@parsestack.last
        when ?); last_context_not_implicit.lhs
@@ -640,19 +766,24 @@
                 #is it a goalpost?
                 BlockParamListLhsContext===last_context_not_implicit &&
                 readahead(2)[1] != ?|
        when ?%,?/,?-,?+,?^; readahead(2)[1]== ?=
      end 
+=end
+
      if (assignment_coming && !(lasttok===/^(\.|::)$/) or was_in_lvar_define_state)
         tok=assign_lvar_type! VarNameToken.new(name,pos)
-        if /(?!#@@LETTER_DIGIT).$/o===name 
-        elsif /^#@@LCLETTER/o===name and !(lasttok===/^(\.|::)$/)
+        #if /(?!#@@LETTER_DIGIT).$/o===name 
+          #nonalphabetics... operator? skip it
+        #els
+        if /^#@@LCLETTER/o===name #and !(lasttok===/^(\.|::)$/)
           localvars[name]=true
         end
         return result.unshift(tok)
      end
      
+     nc=nextchar 
      implicit_parens_to_emit=
      if assignment_coming
        @parsestack.push AssignmentContext.new(nil) if nc==?% or nc==?/
        IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT
      else
@@ -675,59 +806,43 @@
          (NEVERSTARTPARAMLISTWORDS===readahead(NEVERSTARTPARAMLISTMAXLEN)) ? 2 : 1
        when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~,NONASCII; 1 #"
        when ?{
          maybe_local=false
          1
-=begin
-         x=2
-         x-=1 if /\A(return|break|next)\Z/===name and 
-                 !(KeywordToken===oldlast and oldlast===/\A(\.|::)\Z/)
-         x
-=end
        when ?(
          maybe_local=false
          lastid=lasttok&&lasttok.ident
          case lastid
            when /\A[;(]|do\Z/; was_after_nonid_op=false
            when '|';  was_after_nonid_op=false unless BlockParamListLhsContext===@parsestack.last
            when '{'; was_after_nonid_op=false if  BlockContext===@parsestack.last or BeginEndContext===@parsestack.last
          end if KeywordToken===lasttok
          was_after_nonid_op=false if NewlineToken===lasttok or lasttok.nil?
-         want_parens=!(ws_toks.empty? or was_after_nonid_op) #or
-#                      /^(::|rescue|yield|else|case|when|if|unless|until|while|and|or|&&|\|\||[?:]|\.\.?\.?|=>)$/===lastid or 
-#                      MethNameToken===lasttok or
-#                      RUBYNONSYMOPERATORREX===lastid && /=$/===lastid && '!='!=lastid
-#                     )
+         want_parens=!(ws_toks.empty? or was_after_nonid_op)
 
          #look ahead for closing paren (after some whitespace...)
          want_parens=false if @file.match?( /\A.(?:\s|\v|\#.*\n)*\)/ )
-#         afterparen=@file.pos
-#         getchar
-#         ignored_tokens(true)
-#         want_parens=false if nextchar==?)
-#         @file.pos=afterparen
          want_parens=true if /^(return|break|next)$/===@last_operative_token.ident and not(
               KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident
             )
          want_parens ? 1 : 0
-       when ?},?],?),?;,(?^ unless @enable_macro), ?|, ?>, ?,, ?., ?=; 2
-       when ?+, ?-, ?%, ?/, (?^ if @enable_macro)
+       when @always_binary_chars; 2 # ?},?],?),?;,(?^ unless @enable_macro), ?|, ?>, ?,, ?., ?=; 2
+       when @unary_or_binary_chars;     #?+, ?-, ?%, ?/, (?^ if @enable_macro)
          if /^(return|break|next)$/===@last_operative_token.ident and not(
               KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident
             )
            1 
          else
            (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
          end
        when ?*, ?&
- #        lasttok=@last_operative_token
          if /^(return|break|next)$/===@last_operative_token.ident and not(
               KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident
             )
            1 
          else
-           (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}*&]/o]) ? 2 : 3
+           (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}*&]/o] and !@in_def_name) ? 2 : 3
          end
        when ?:
          next2=readahead(2)
          if /^:(?:[#{WHSPLF}]|(:))$/o===next2 then 
            $1 && !ws_toks.empty?   ? 3 : 2 
@@ -736,11 +851,10 @@
          end
        when ??; next3=readahead(3)
                 #? never begins a char constant if immediately followed 
                 #by 2 or more letters or digits
                    /^\?([#{WHSPLF}]|#@@LETTER_DIGIT{2})/o===next3 ? 2 : 3
-#       when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
        when ?<; (!ws_toks.empty? && readahead(4)[/^<<-?(?:["'`]|#@@LETTER_DIGIT)/o]) ? 3 : 2 
        when ?[; 
            if ws_toks.empty? 
              (KeywordToken===oldlast and /^(return|break|next)$/===oldlast.ident) ? 3 : 2
            else
@@ -795,10 +909,12 @@
      # 'then else elsif rescue ensure (illegal in value context)'
 
      # 'need to pop noparen from parsestack on these tokens: (in operator context)'
      # 'not ok:'
      # 'not (but should it be?)'
+   ensure
+     result.first.endline||=oldline unless result.empty?
    end
 
    #-----------------------------------
    #read ahead to see if there's method param list (with real parentheses)
    #and 2 or more parameters (and hence a comma to separate them)
@@ -816,33 +932,43 @@
            break true 
          elsif OperatorToken===tok and /^[&*]$/===tok.ident and tok.tag and @parsestack.size==basesize+1
            break true 
          elsif EoiToken===tok
            lexerror tok, "unexpected eof in parameter list"
+           break
          end
        }
        result.concat @moretokens
        @moretokens.replace []
        return [result,pass]
    end
 
+
   #-----------------------------------
+  module NestedContexts
+    class VContext<NestedContext
+    end
+  end
+  VContext=NestedContexts::VContext
   CONTEXT2ENDTOK={
     AssignmentRhsContext=>AssignmentRhsListEndToken, 
     ParamListContextNoParen=>ImplicitParamListEndToken,
     KWParamListContextNoParen=>ImplicitParamListEndToken, #break,next,return
     WhenParamListContext=>KwParamListEndToken, 
-    RescueSMContext=>KwParamListEndToken
+    RescueSMContext=>KwParamListEndToken,
+    VContext=>0
   }
-  def abort_noparens!(str='')
+  def abort_noparens!(str='',adj=str.size)
     #assert @moretokens.empty?
     result=[]
-    while klass=CONTEXT2ENDTOK[@parsestack.last.class]
-      result << klass.new(input_position-str.length)
-      break if RescueSMContext===@parsestack.last #and str==':'
-      break if WhenParamListContext===@parsestack.last and str==':'
+    ctx=@parsestack.last
+    while klass=CONTEXT2ENDTOK[ctx.class]
+      result << klass.new(input_position-adj) if Class===klass
+      break if RescueSMContext===ctx #and str==':'
+      break if WhenParamListContext===ctx and str==':'
       @parsestack.pop 
+      ctx=@parsestack.last
     end
     return result
   end
 
   #-----------------------------------
@@ -876,18 +1002,22 @@
 
   #-----------------------------------
   CONTEXT2ENDTOK_FOR_DO={
     AssignmentRhsContext=>AssignmentRhsListEndToken, 
     ParamListContextNoParen=>ImplicitParamListEndToken,
-    UnparenedParamListLhsContext=>KwParamListEndToken,
+    UnparenedParamListLhsContext=>ImplicitParamListEndToken,
     ExpectDoOrNlContext=>1,
     #WhenParamListContext=>KwParamListEndToken,
     #RescueSMContext=>KwParamListEndToken
   }
   def abort_noparens_for_do!(str='')
     #assert @moretokens.empty?
     result=[]
+    return result if @parsestack[-1].class==AssignmentRhsContext and
+                     @parsestack[-2].class==ParamListContextNoParen and
+                     @parsestack[-3].class==DefContext and
+                     !@parsestack[-3].in_body
     while klass=CONTEXT2ENDTOK_FOR_DO[@parsestack.last.class]
       if klass==AssignmentRhsListEndToken
         i=@parsestack.size
         end_the_assign=false
         while AssignmentRhsContext===@parsestack[i-=1]
@@ -928,42 +1058,54 @@
      end
      return result
    end
 
    #-----------------------------------
-   def enable_macros!
-     @enable_macro="macro"
+   def enable_macros! #this wholemethod should be unnecessary now
+     @enable_macro="macro" #shouldn't be needed anymore... should be safe to remove 
      class <<self
        alias keyword_macro keyword_def
      end
+     @unary_or_binary_chars.add '^'
+     @always_binary_chars.remove '^'
    end
    public :enable_macros!
 
 
    #-----------------------------------
-   @@SPACES=/[\ \t\v\f\v]/
+   @@SPACES=/[\ \t\f\v]/
    @@WSTOK=/(?>
                (?>\r?)\n|
                (?>\r*)(?>#@@SPACES+)(?>(?:#@@SPACES|\r(?!\n))*)|
-               \#(?>[^\n]*)\n|
+               \#(?>[^\n]*)(?=\n)|
                \\(?>\r?)\n|
                ^=begin(?>(?>#@@SPACES.*)?)\n
                  (?>(?:(?!=end)(?>.*)\n))*
                =end(?>(?>#@@SPACES.*)?)\n
             )/x
    @@WSTOKS=/(?!=begin)(?>#@@WSTOK+)/o
+   WSTOKS=@@WSTOKS
    def divide_ws(ws0,offset)
      result=[]
      ws0.scan(/\G#@@WSTOK/o){|ws|
        incr= $~.begin(0)
-       tok=case ws
-       when /\A[\#=]/; IgnoreToken.new(ws,offset+incr)
-       when /\n\Z/; EscNlToken.new(ws,offset+incr,@filename,@linenum)
-       else WsToken.new(ws,offset+incr)
+       lines=ws.count "\n"
+       case ws
+       when /\A\#/
+         result<< IgnoreToken.new(ws,offset+incr)
+       when /\A=/
+         tok=IgnoreToken.new(ws,offset+incr)
+         tok.startline=@linenum
+         tok.endline=@linenum+lines
+         result<<tok
+       when /\n\Z/
+         result<< EscNlToken.new(ws,offset+incr,@filename,@linenum+1)
+       else 
+         result<< WsToken.new(ws,offset+incr)
        end
-       result << tok
-       @linenum+=ws.count "\n"
+       result<< FileAndLineToken.new(@filename,@linenum+lines,offset+incr+ws.size) if lines>0
+       @linenum+=lines
      }
      result.each_with_index{|ws,i|
        if WsToken===ws
          ws.ident << result.delete_at(i+1).ident while WsToken===result[i+1]
        end
@@ -990,17 +1132,17 @@
 
    #-----------------------------------
    #parse keywords now, to prevent confusion over bare symbols
    #and match end with corresponding preceding def or class or whatever.
    #if arg is not a keyword, the block is called
-   def parse_keywords(str,offset,&block)
+   def special_identifier?(str,offset,&block)
       assert @moretokens.empty?
       assert !(KeywordToken===@last_operative_token and /A(\.|::|def)\Z/===@last_operative_token.ident)
-      result=[KeywordToken.new(str,offset)]
 
-      m=:"keyword_#{str}"
-      respond_to?(m) ? (send m,str,offset,result,&block) : block[MethNameToken.new(str)]
+      m="keyword_#{str}"
+      return yield( MethNameToken.new(str) )unless respond_to?(m)
+      send m,str,offset,[KeywordToken.new(str,offset)],&block
    end
    public #these have to be public so respond_to? can see them (sigh)
    def keyword_end(str,offset,result)
          result.unshift(*abort_noparens!(str))
          @parsestack.last.see self,:semi #sorta hacky... should make an :end event instead?
@@ -1043,10 +1185,12 @@
          @localvars_stack.push SymbolTable.new
          while @file.check( /\A::/ )
                #VarNameToken===@moretokens.last or 
                #KeywordToken===@moretokens.last && @moretokens.last.ident=="::"
            @file.scan(/\A(#@@WSTOKS)?(::)?(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)/o) or break
+           #should not allow newline around :: here
+
            md=@file.last_match
            all,ws1,dc,ws2,name=*md
            if ws1
              @moretokens.concat divide_ws(ws1,md.begin(1))
              incr=ws1.size
@@ -1136,16 +1280,16 @@
             end
          end
          return result
    end
    def keyword_def(str,offset,result)         #macros too, if enabled
-         result.first.has_end!
-         @parsestack.push ctx=DefContext.new(@linenum)
-         ctx.state=:saw_def
+      result.first.has_end!
+      @parsestack.push ctx=DefContext.new(@linenum)
+      ctx.state=:saw_def
       old_moretokens=@moretokens
       @moretokens=[]
-      aa=@moretokens
+      #aa=@moretokens
          #safe_recurse { |aa|
             set_last_token KeywordToken.new(str) #hack
             result.concat ignored_tokens
 
             #read an expr like a.b.c or a::b::c
@@ -1154,32 +1298,52 @@
               old_size=@parsestack.size
               parencount=0
               begin
                 tok=get1token
                 case tok
-                when/^\($/.token_pat then parencount+=1
-                when/^\)$/.token_pat then parencount-=1
+                when /^\($/.token_pat ; parencount+=1
+                when /^\)$/.token_pat ; parencount-=1
+                when EoiToken
+                  @moretokens= old_moretokens.concat @moretokens
+                  return result<<lexerror( tok, "eof in def header" )
                 end
-                EoiToken===tok and lexerror tok, "eof in def header"
                 result << tok
               end until  parencount==0 #@parsestack.size==old_size
               @localvars_stack.push SymbolTable.new
             else #no parentheses, all tail
               set_last_token KeywordToken.new(".") #hack hack
               tokindex=result.size
+              tokline=result.last.endline
               result << tok=symbol(false,false)
               name=tok.to_s
               assert !in_lvar_define_state
      
               #maybe_local really means 'maybe local or constant'
+              @maybe_local_pat||=%r{
+                 ((?!#@@LETTER_DIGIT).$) | ^[@$] | (#@VARLIKE_KEYWORDS | #@FUNCLIKE_KEYWORDS) |
+                 (^#@@LCLETTER) | (^#@@UCLETTER) 
+              }x 
+              @maybe_local_pat === name and
+                maybe_local=
+                  case
+                    when $1; maybe_local=false #operator or non-ident
+                    when $2; ty=KeywordToken   #keyword
+                    when $3; maybe_local=localvars===name #lvar or method
+                    when $4; is_const=true #constant
+                    else true
+                  end
+              #maybe_local=ty=KeywordToken if is__ENCODING__keyword?(name) #"__ENCODING__"==name and @rubyversion>=1.9
+=begin was
               maybe_local=case name
                 when /(?!#@@LETTER_DIGIT).$/o; #do nothing
                 when /^[@$]/; true
-                when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS,("__ENCODING__" if @rubyversion>=1.9); ty=KeywordToken
+                when /#@VARLIKE_KEYWORDS|#@FUNCLIKE_KEYWORDS/,("__ENCODING__" if @rubyversion>=1.9); ty=KeywordToken
                 when /^#@@LCLETTER/o;  localvars===name 
                 when /^#@@UCLETTER/o; is_const=true  #this is the right algorithm for constants... 
               end
+=end
+
               result.push(  *ignored_tokens(false,false)  )
               nc=nextchar
               if !ty and maybe_local
                 if nc==?: || nc==?.
                   ty=VarNameToken
@@ -1193,11 +1357,17 @@
                      result.insert tokindex+1, newtok
                    end
               end
 
               assert result[tokindex].equal?(tok)
-              var=assign_lvar_type! ty.new(tok.to_s,tok.offset)
+              var=ty.new(tok.to_s,tok.offset)
+              if ty==KeywordToken and name[0,2]=="__"
+                send("keyword_#{name}",name,tok.offset,[var])
+              end
+              var.endline=tokline
+
+              var=assign_lvar_type! var
               @localvars_stack.push SymbolTable.new
               var.in_def=true if inside_method_def? and var.respond_to? :in_def=
               result[tokindex]=var
               
 
@@ -1228,19 +1398,24 @@
                   if endofs
                     result.insert end_index,ImplicitParamListEndToken.new(ofs)
                   else 
                     ofs+=listend.to_s.size
                   end
-                  result.insert end_index+1,EndHeaderToken.new(ofs)
+                  tok=EndHeaderToken.new(ofs)
+                  tok.endline= result[end_index-1].endline  #@linenum
+                  result.insert end_index+1,tok
                   break
                end
 
                tok=get1token
                result<< tok
                case tok
                when EoiToken
                   lexerror tok,'unexpected eof in def header'
+                  @moretokens= old_moretokens.concat @moretokens
+                  return result
+
                when StillIgnoreToken
                when MethNameToken ,VarNameToken # /^#@@LETTER/o.token_pat
                   lexerror tok,'expected . or ::' unless state==:expect_name
                   state=:expect_op
                when /^(\.|::)$/.token_pat
@@ -1254,11 +1429,14 @@
                   ctx.state=:def_body
                   state==:expect_op or lexerror tok,'expected identifier'
                   if endofs
                     result.insert( -2,ImplicitParamListEndToken.new(tok.offset) )
                   end
-                  result.insert( -2, EndHeaderToken.new(tok.offset) )
+                  ehtok= EndHeaderToken.new(tok.offset)
+                  #ehtok.endline=tok.endline
+                  #ehtok.endline-=1 if NewlineToken===tok
+                  result.insert( -2, ehtok )
                   break
                else
                   lexerror(tok, "bizarre token in def name: " +
                            "#{tok}:#{tok.class}")
                end
@@ -1423,19 +1601,301 @@
    def keyword___LINE__(str,offset,result)
      result.last.value=@linenum
      return result
    end
  
+
+   #-----------------------------------
+   def encoding_name_normalize name
+         name=name.dup
+         name.downcase!
+         name.tr_s! '-_',''
+         name=ENCODING_ALIASES[name] if ENCODING_ALIASES[name]
+         return name
+   end
+
    module RubyLexer1_9
+     FUNCLIKE_KEYWORDLIST=RubyLexer::FUNCLIKE_KEYWORDLIST+FUNCLIKE_KEYWORDLIST_1_9
+     VARLIKE_KEYWORDLIST=RubyLexer::VARLIKE_KEYWORDLIST+VARLIKE_KEYWORDLIST_1_9
+     FUNCLIKE_KEYWORDS=/^(?:#{FUNCLIKE_KEYWORDLIST.join '|'})$/
+     VARLIKE_KEYWORDS=/^(?:#{VARLIKE_KEYWORDLIST.join '|'})$/
+     def FUNCLIKE_KEYWORDS orig=nil
+       /(?:#{orig||super()}|^(?:#{FUNCLIKE_KEYWORDLIST_1_9.join '|'})$)/
+     end
+
+     def VARLIKE_KEYWORDS orig=nil
+       /(?:#{orig||super()}|^(?:#{VARLIKE_KEYWORDLIST_1_9.join '|'})$)/
+     end
+
+     def rubylexer_modules_init
+       super
+       @FUNCLIKE_KEYWORDS=FUNCLIKE_KEYWORDS @FUNCLIKE_KEYWORDS unless @FUNCLIKE_KEYWORDS==="->"
+       @VARLIKE_KEYWORDS=VARLIKE_KEYWORDS @VARLIKE_KEYWORDS unless @VARLIKE_KEYWORDS==="__ENCODING__"
+     end
+ 
+     #-----------------------------------
+     def dquote_handle(ch)
+       dquote19_esc_seq(ch,'"','"')
+     end
+     #-----------------------------------
+     def dquote_handler_name
+       :dquote19_esc_seq
+     end
+     #-----------------------------------
+     def Wquote_handler_name
+       :Wquote19_esc_seq
+     end
+
+     #-----------------------------------
+     def method_params?  # .()
+       lasttok=last_token_maybe_implicit #last_operative_token
+       super or 
+         (lasttok and lasttok.ident=='.') 
+     end
+
+     #-----------------------------------
+     def callsite_symbol(x)
+       return if nextchar==?(
+       super
+     end
+
+     #-----------------------------------
+     def read_encoding_line
+       if line=@file.scan(
+            /\A#{WSNONLCHARS}*#[\x00-\x7F]*?(?:en)?coding#{WSNONLCHARS}*[:=]#{WSNONLCHARS}*([a-z0-9_-]+)[\x00-\x7F]*$/io
+          )
+         name=@file.last_match[1]
+         name=encoding_name_normalize name
+         @encoding=name.to_sym if ENCODINGS.include? name
+         return line
+       end
+     end
+
+     #-----------------------------------
      def keyword___ENCODING__(str,offset,result)
        #result.last.value=huh
        return result
      end
 
+     #-----------------------------------
      def keyword_not(*args,&block) _keyword_funclike(*args,&block) end
-   end
 
+     #-----------------------------------
+     def special_identifier?(str,oldpos)
+       if @parsestack.last.wantarrow and @file.skip ":"
+         return SymbolToken.new(str,oldpos), KeywordToken.new(":",input_position-1,:as=>"=>")
+       else
+         return super
+       end
+     end
+
+     #-----------------------------------
+     def want_hard_nl?
+       return false if @file.check( /\A\n(?:#{WSTOKS})?[.:][^.:]/o )
+       super
+     end
+
+     #-----------------------------------
+     #RE_* shamelessly stolen from jcode.rb
+     RE_UTF8= /[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]|[\xf0-\xf7][\x80-\xbf]{3}/n #longer sequences are possible
+     RE_EUC= /[\xa1-\xfe][\xa1-\xfe]/n #is this complete?
+     RE_SJIS= /[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc]/n #is this complete? windows31j?
+     ENCODING2EXTCHAR={
+       :utf8=>RE_UTF8,
+       :euc=>RE_EUC,
+       :sjis=>RE_SJIS,
+       :binary=>/[\x80-\xFF]/n,
+       :ascii=>nil
+     }
+
+     #handle ? in ruby code. is it part of ?..: or a character literal?
+     def char_literal_or_op(ch) #unicode char literals, etc
+       if colon_quote_expected? ch
+         #char literal
+         pos=input_position
+         getchar
+         extchar= ENCODING2EXTCHAR[@encoding]
+         result=
+         if extchar and extchar=@file.scan( extchar )
+           assign_encoding!(StringToken.new('"', extchar))
+         else
+           getchar_maybe_escape
+           assign_encoding!(StringToken.new('"', @file[pos+1...input_position]))
+         end
+         result.offset=pos
+         result.bs_handler=:dquote19_esc_seq
+         result.open='?'
+         result.close=''
+         return result
+       else #(ternary) operator
+         super
+       end
+     end
+
+     #-----------------------------------
+     def plusminus(ch) #->
+       pos=input_position
+       assert(/^[+\-]$/===ch)
+       if unary_op_expected?(ch) or
+         KeywordToken===@last_operative_token &&
+         /^(return|break|next)$/===@last_operative_token.ident
+         if '->' == readahead(2) #stabby proc
+             @file.pos+=2
+             #push down block context
+             localvars.start_block
+             @parsestack.push ctx=RubyLexer::BlockContext.new(@linenum)
+             ctx.wanting_stabby_block_body=true
+             #read optional proc params
+             block_param_list_lookahead ?(, RubyLexer::ParenedParamListLhsContext
+             result=RubyLexer::KeywordToken.new('->',pos)
+             result.offset=pos
+             return result
+         end
+       end
+       super
+     end
+
+     #-----------------------------------
+     #match /=(>|~|==?)?/ (= or == or =~ or === or =>)
+     def equals(ch) # /(?<foo>bar)/=~'bar'; declares foo lvar
+       if readahead(2)=='=~' # =~... after regex, maybe?
+         last=last_operative_token
+        
+         if StringToken===last and last.lvars
+           #ruby delays adding lvars from regexps to known lvars table
+           #for several tokens in some cases. not sure why or if on purpose
+           #i'm just going to add them right away
+           last.lvars.each{|lvar| localvars[lvar]=true }
+         end
+       end
+       return super
+     end
+
+     #-----------------------------------
+     def assign_encoding! str
+       #search for nonascii bytes
+       #either directly or via hex (\xXX) or octal (\NNN) escapes
+       #and \u escapes also 
+       utf8=nonascii=false
+       str.elems.grep(String).each do|frag|
+         frag.scan(/#{EVEN_BS_S}(?:\\u|\\2[0-7][0-7]|\\x[89a-fA-F][0-9a-fA-F])|[^\x00-\x7F]/o) do |match|
+           if match[-1]==?u
+             utf8=true
+             break if nonascii
+           else
+             nonascii=true
+             break if utf8
+           end
+         end or break
+       end
+
+       lexerror(str,"utf8 and nonascii intermixed") if utf8 and nonascii and @encoding!=:utf8
+       
+       #encoding is source encoding unless \u escape is found 
+       str.utf8! if utf8
+
+       #maybe assign string fragments encodings if running under >=1.9?
+
+       return str
+     end
+
+     #-----------------------------------
+     def regex(ch=nil)
+      result=super
+        named_brs=[]
+        if result.elems.size==1 and String===result.elems.first
+            elem=result.elems.first
+            index=0
+            while index=elem.index(/(#{EVEN_BS_S})( \(\?[<'] | \(\?\# | \[ )/xo,index)
+              index+=$1.size
+              case $2
+              when "(?<"
+                index=elem.index(/\G...(#{LCLETTER}#{LETTER_DIGIT}+)>/o,index)
+                break lexerror(result, "malformed named backreference") unless index
+                index+=$&.size
+                named_brs<<$1
+              when "(?'"
+                index=elem.index(/\G...(#{LCLETTER}#{LETTER_DIGIT}+)'/o,index)
+                break lexerror(result, "malformed named backreference") unless index
+                index+=$&.size
+                named_brs<<$1
+              when "(?#"
+                index+=3
+                index=elem.index(/#{EVEN_BS_S}\)/o,index)
+                break lexerror(result, "unterminated regexp comment") unless index
+                index+=$&.size
+              when "["
+                index+=1
+                paren_ctr=1
+                loop do
+                  index=elem.index(/#{EVEN_BS_S}(&&\[\^|\])/o,index)
+                  break lexerror(result, "unterminated character class") unless index
+                  index+=$&.size
+                  if $1==']'
+                    paren_ctr-=1
+                    break if paren_ctr==0
+                  else
+                    paren_ctr+=1
+                  end
+                end
+                break unless index
+
+              end
+            end
+            result.lvars= named_brs unless named_brs.empty?
+        end
+      return result
+     end
+
+     def build_method_operators
+       /#{RUBYSYMOPERATORREX}|\A![=~@]?|\A`/o
+     end
+
+     include RubyLexer::NestedContexts
+
+     def semicolon_in_block_param_list?
+       ParenedParamListLhsContext===@parsestack.last || 
+         BlockParamListLhsContext===@parsestack.last
+     end
+
+     def is__ENCODING__keyword?(name)
+       "__ENCODING__"==name
+     end
+
+     #-----------------------------------
+     def colon_operator tok
+        if TernaryContext===@parsestack.last
+          tok.ternary=true
+          @parsestack.pop #should be in the context's see handler
+        end 
+     end
+
+     def maybe_end_stabby_block_param_list(tokch)
+          stabby_params_just_ended=false
+          (@parsestack.size-1).downto(1){|i|
+            case @parsestack[i]
+            when ParamListContextNoParen,AssignmentRhsContext
+              #do nothing yet... see if inside a UnparenedParamListLhsContext
+            when UnparenedParamListLhsContext #stabby proc
+              @moretokens<<tokch
+              (@parsestack.size-1).downto(i){|j|
+                @moretokens.unshift @parsestack[j].endtoken(input_position-1)
+              }
+              @parsestack[i..-1]=[]
+              tokch=@moretokens.shift
+              stabby_params_just_ended=true
+              break
+            else break
+            end
+          }
+          return stabby_params_just_ended,tokch
+     end
+   end #module RubyLexer1_9
+
+   def semicolon_in_block_param_list?;   end
+   def is__ENCODING__keyword?(name); end
+
    def _keyword_funclike(str,offset,result)
          if @last_operative_token===/^(\.|::)$/
            result=yield MethNameToken.new(str) #should pass a methname token here
          else
            tok=KeywordToken.new(str)
@@ -1490,11 +1950,11 @@
 
    
    #-----------------------------------
    def block_param_list_lookahead starter=?|, ctx_type=BlockParamListLhsContext
       safe_recurse{ |la|
-         set_last_token KeywordToken.new(  ';' )
+         set_last_token KeywordToken.new( ';' )
          a=ignored_tokens
 
          if eat_next_if(starter)
            mycontext=ctx_type.new(@linenum)
            a<< KeywordToken.new(mycontext.starter, input_position-1)
@@ -1538,11 +1998,11 @@
            end
 end
          elsif starter==?(
            ctx_type=UnparenedParamListLhsContext #hacky... should be a param?
            @parsestack.push ctx_type.new(@linenum)
-           a<<KwParamListStartToken.new( input_position )
+           a<<ImplicitParamListStartToken.new( input_position )
          end
 
          set_last_token KeywordToken.new( ';' )
          #a.concat ignored_tokens
 
@@ -1620,11 +2080,11 @@
    def method_parameters(result,normal_comma_level,endingblock,old_parsestack_size)
          listend=nil
          set_last_token KeywordToken.new( ',' )#hack
          nextvar=nil
          loop do
-            expect_name=(@last_operative_token===',' and
+            expect_name=(/^[,;]$/===@last_operative_token.ident and
                          normal_comma_level==@parsestack.size)
             expect_name and @defining_lvar||=true
             result << tok=get1token
             break lexerror(tok, "unexpected eof in def header") if EoiToken===tok
 
@@ -1695,11 +2155,11 @@
      assert('*&'[ch])
      want_unary=unary_op_expected?(ch) || 
        (@last_operative_token===/^(return|next|break)$/ and KeywordToken===@last_operative_token)
      result=quadriop(ch)
      if want_unary
-       #readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
+       #readahead(2)[1..1][/[#@@WSCHARS#\\]/o] or #not needed?
        assert OperatorToken===result
        result.tag=:unary         #result should distinguish unary+binary *&
        WHSPLF[nextchar.chr] or
          @moretokens << NoWsToken.new(input_position)
        cill=comma_in_lvalue_list?
@@ -1722,17 +2182,19 @@
    #-----------------------------------
    #handle ? in ruby code. is it part of ?..: or a character literal?
    def char_literal_or_op(ch)
       if colon_quote_expected? ch
          getchar
-         if @rubyversion >= 1.9
-           StringToken.new getchar_maybe_escape
-         else
+#         if @rubyversion >= 1.9
+#           assign_encoding! StringToken.new getchar_maybe_escape
+#         else
            ch=getchar_maybe_escape[0]
            ch=ch.ord if ch.respond_to? :ord
-           NumberToken.new ch
-         end
+           result=NumberToken.new ch
+           result.char_literal=true
+           return result
+#         end
       else
          @parsestack.push TernaryContext.new(@linenum)
          KeywordToken.new getchar   #operator
       end
    end
@@ -1745,11 +2207,11 @@
        @parsestack.pop
        op=true
      end
 
      if !op and after_nonid_op?{ 
-          !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/[\s\v=]}] 
+          !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/[#@@WSCHARS=]}o] 
         } || (KeywordToken===@last_token_maybe_implicit and @last_token_maybe_implicit.ident=="(")
        return regex(ch)
      else #/ is operator
        result=getchar
        if eat_next_if(?=)
@@ -1770,20 +2232,20 @@
      s=tok.to_s
      case s
      when /^[@$]/; true
      when /^<</; HerePlaceholderToken===tok
      when /(?!#@@LETTER_DIGIT).$/o; false
-#     when /^#@@LCLETTER/o; localvars===s or VARLIKE_KEYWORDS===s
+#     when /^#@@LCLETTER/o; localvars===s or @VARLIKE_KEYWORDS===s
      when /^#@@LETTER/o; VarNameToken===tok
      else raise "not var or method name: #{s}"
      end   
    end
    
    #-----------------------------------
    def colon_quote_expected?(ch) #yukko hack
      assert ':?'[ch]
-     readahead(2)[/^(\?[^#{WHSPLF}]|:[^\s\r\n\t\f\v :])$/o]   or return false
+     readahead(2)[/^(\?[^#{WHSPLF}]|:[^#@@WSCHARS :])$/o]   or return false
 
      after_nonid_op? {
        #possible func-call as operator
 
        not is_var_name? and
@@ -1802,68 +2264,70 @@
 
       qe= colon_quote_expected?(ch)
       lastchar=prevchar
       eat_next_if(ch[0]) or raise "needed: "+ch
 
-      if nextchar==?( and @enable_macro
+      if nextchar==?( and @enable_macro #factored
         result= OperatorToken.new(':', startpos)
         result.unary=true
         return result
       end
 
       #handle quoted symbols like  :"foobar",  :"[]"
-      qe and return symbol(':')
+      if qe 
+        return symbol(':')
+      elsif eat_next_if(?:)
+        #we definately found a ::
 
-      #look for another colon; return single : if not found
-      unless eat_next_if(?:) 
+        colon2=KeywordToken.new( '::',startpos)
+        lasttok=@last_operative_token
+        assert !(String===lasttok)
+        if (VarNameToken===lasttok or MethNameToken===lasttok) and
+           lasttok===/^(?:[$@]|#@@LETTER)/o and !WHSPCHARS[lastchar]
+        then
+          @moretokens << colon2
+          result= NoWsToken.new(startpos)
+        else
+          result=colon2
+        end
+        dot_rhs(colon2)
+        return result
+
+      #return single : token
+      else 
         #cancel implicit contexts...
-        @moretokens.push(*abort_noparens!(':'))
+        @moretokens.push(*abort_noparens!(':')) #special treatment not needed in 1.9 mode?
         @moretokens.push tok=KeywordToken.new(':',startpos)
         
-        case @parsestack.last
-        when TernaryContext 
-          tok.ternary=true
-          @parsestack.pop #should be in the context's see handler
-        when ExpectDoOrNlContext #should be in the context's see handler
-          if @rubyversion<1.9
-            @parsestack.pop
-            assert @parsestack.last.starter[/^(while|until|for)$/]
-            tok.as=";"
-          end
-        when ExpectThenOrNlContext,WhenParamListContext
-          if @rubyversion<1.9
-            #should be in the context's see handler
-            @parsestack.pop
-            tok.as="then"
-          end
-        when RescueSMContext
-          tok.as=";"
-        end or
+        colon_operator(tok) or
           fail ": not expected in #{@parsestack.last.class}->#{@parsestack.last.starter}"
-
         
         #end ternary context, if any
         @parsestack.last.see self,:colon
         
         return @moretokens.shift
       end
       
-      #we definately found a ::
+   end
 
-      colon2=KeywordToken.new( '::',startpos)
-      lasttok=@last_operative_token
-      assert !(String===lasttok)
-      if (VarNameToken===lasttok or MethNameToken===lasttok) and
-          lasttok===/^(?:[$@]|#@@LETTER)/o and !WHSPCHARS[lastchar]
-      then
-         @moretokens << colon2
-         result= NoWsToken.new(startpos)
-      else
-         result=colon2
-      end
-      dot_rhs(colon2)
-      return result
+   #-----------------------------------
+   def colon_operator tok
+        case @parsestack.last
+        when TernaryContext
+          tok.ternary=true
+          @parsestack.pop #should be in the context's see handler
+        when ExpectDoOrNlContext #should be in the context's see handler
+          @parsestack.pop
+          assert @parsestack.last.starter[/^(while|until|for)$/]
+          tok.as=";"
+        when ExpectThenOrNlContext,WhenParamListContext
+          #should be in the context's see handler
+          @parsestack.pop
+          tok.as="then"
+        when RescueSMContext
+          tok.as=";"
+        end 
    end
 
    #-----------------------------------
    def symbol(notbare,couldbecallsite=!notbare)
      assert !couldbecallsite
@@ -1881,17 +2345,18 @@
            double_quote('"')
          when ?' #'
            assert notbare
            open=":'"; close="'"
            single_quote("'")
-         when ?` then read(1) #`
+#         when ?` then read(1) #`
          when ?@ then at_identifier.to_s
          when ?$ then dollar_identifier.to_s
          when ?_,?a..?z,NONASCII then identifier_as_string(?:)
          when ?A..?Z then 
            result=identifier_as_string(?:)
            if @last_operative_token==='::' 
+             fail #i think this can't happen anymore now
              assert klass==MethNameToken
              /#@@LETTER_DIGIT$/o===result and klass=VarNameToken
            end
            result
          else 
@@ -1917,17 +2382,17 @@
      
      #look for operators
      opmatches=readahead(3)[@method_operators]
      return [read(opmatches.size), start] if opmatches
      case nc=nextchar
-         when ?` #`
-           return [read(1),start] 
+#         when ?` #`
+#           return [read(1),start] 
          when ?_,?a..?z,?A..?Z,NONASCII
            context=merge_assignment_op_in_setter_callsites? ? ?: : nc
            return [identifier_as_string(context), start]
          when ?(
-           return [nil,start] if @enable_macro or @rubyversion>=1.9
+           return [nil,start] if @enable_macro or @rubyversion>=1.9 #factored
      end
 
      set_last_token KeywordToken.new(';')
      lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}")
      return [nil, start]
@@ -1940,20 +2405,21 @@
       dash=eat_next_if(?-)
       quote=eat_next_if( /['"`]/)
       if quote
         ender=til_charset(/[#{quote}]/)
         (quote==getchar) or 
-          return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc")
+          return lexerror(res=HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc")
         quote_real=true
       else
         quote='"'
         ender=@file.scan(/#@@LETTER_DIGIT+/o)
         ender.length >= 1  or 
-          return lexerror(HerePlaceholderToken.new( dash, quote, ender, nil ), "invalid here header")
+          return lexerror(res=HerePlaceholderToken.new( dash, quote, ender, nil ), "invalid here header")
       end
 
       res= HerePlaceholderToken.new( dash, quote, ender, quote_real )
+      res.line=linenum
 if true
       res.open=["<<",dash,quote,ender,quote].join
       procrastinated=til_charset(/[\n]/)#+readnl
       unless @base_file
         @base_file=@file
@@ -1980,18 +2446,19 @@
 
       #one or two already read characters are overwritten here,
       #in order to keep offsets correct in the long term
       #(at present, offsets and line numbers between 
       #here header and its body will be wrong. but they should re-sync thereafter.)
-      newpos=input_position_raw-nl.size
+      newpos=input_position_raw
       #unless procrastinated.empty?
-        @file.modify(newpos,nl.size,procrastinated+nl) #vomit procrastinated text back onto input
+        @file.modify(newpos,0,procrastinated) #vomit procrastinated text back onto input
       #end
+      #@offset_adjust2=-1  #nice idea, but crashes 1.9.2 and causes more warnings than it fixes... :(
       input_position_set newpos
 
       #line numbers would be wrong within the procrastinated section
-      @linenum-=1
+      @linenum=res.line #was: @linenum-=1
 
       #be nice to get the here body token at the right place in input, too...
       @pending_here_bodies<< body
       @offset_adjust-=bodysize#+nl.size
 
@@ -2036,10 +2503,12 @@
 
       #the action continues in newline, where
       #the rest of the here token is read after a
       #newline has been seen and res.affix is eventually called
 end
+   ensure
+     assign_encoding!(res.string) if res
    end
 
    #-----------------------------------
    def lessthan(ch) #match quadriop('<') or here doc or spaceship op
       case readahead(3)
@@ -2071,17 +2540,17 @@
       if @base_file and indices=@file.instance_eval{@start_pos} and 
          (indices[-2]..indices[-1])===@file.pos
         @base_file.pos=@file.pos
         @file=@base_file
         @base_file=nil
-        result="\n"
+#        result="\n"
       end
       
       @offset_adjust=@min_offset_adjust
       @moretokens.push( *optional_here_bodies )
       ln=@linenum
-      @moretokens.push lexerror(EscNlToken.new(result,input_position-result.size,@filename,ln-1), error),
+      @moretokens.push lexerror(EscNlToken.new(result,input_position-result.size,@filename,ln), error),
                        FileAndLineToken.new(@filename,ln,input_position)
 
       start_of_line_directives
 
       return @moretokens.shift
@@ -2089,21 +2558,24 @@
   
    #-----------------------------------
    def optional_here_bodies
      result=[]
 if true
-      #handle here bodies queued up by previous line
-      pos=input_position
-      while body=@pending_here_bodies.shift
+     #handle here bodies queued up by previous line
+     pos=input_position
+     while body=@pending_here_bodies.shift
         #body.offset=pos
-        result.push EscNlToken.new("\n",body.offset-1,@filename,nil)
-        result.push FileAndLineToken.new(@filename,body.ident.line,body.offset)
+        result.push EscNlToken.new("\n",body.offset-1,@filename,@linenum)
+        result.push FileAndLineToken.new(@filename,@linenum,body.offset)
         result.push body
         #result.push NoWsToken.new @pending_here_bodies.empty? ? input_position : @pending_here_bodies.first
         #result.push FileAndLineToken.new(@filename,@linenum,pos) #position and line num are off
-        body.headtok.line=@linenum-1
-      end
+        @linenum+=body.linecount
+        body.endline=@linenum-1
+   #     body.startline=@linenum-1-body.linecount
+     end
+     
 else
       #...(we should be more compatible with dos/mac style newlines...)
       while tofill=@incomplete_here_tokens.shift
         result.push(
           here_body(tofill), 
@@ -2120,10 +2592,11 @@
    def here_body(tofill)
          close="\n"
          tofill.string.offset= input_position
          linecount=1 #for terminator
          assert("\n"==prevchar)
+         startline=@linenum
          loop {
             assert("\n"==prevchar)
 
             #here body terminator?
             oldpos= input_position_raw
@@ -2135,12 +2608,14 @@
               lexerror tofill.string, "unterminated here body"
               break
             end
             if read(tofill.ender.size)==tofill.ender
               crs=til_charset(/[^\r]/)||''
-              if nl=readnl
-                close+=tofill.ender+crs+nl
+              nl=nextchar
+              if !nl or nl==?\n
+                close+=tofill.ender+crs
+                close+="\n" if nl
                 break
               end
             end
             input_position_set oldpos
             
@@ -2221,35 +2696,39 @@
         result=tofill.bodyclass.new(tofill,linecount)
         result.open=str.open=""
         tofill.close=close
         result.close=str.close=close[1..-1]
         result.offset=str.offset
+        result.endline=@linenum-1
+        result.startline=startline
         assert str.open
         assert str.close
         return result
    end
 
    #-----------------------------------
+   def want_hard_nl?
+      NewlineToken===@last_operative_token || #hack
+           (KeywordToken===@last_operative_token and
+            @last_operative_token.ident=="rescue" and
+            !@last_operative_token.infix?)  ||
+        !after_nonid_op?{false}
+   end
+
+   #-----------------------------------
    def newline(ch)
       assert("\r\n"[nextchar.chr])
 
       #ordinary newline handling (possibly implicitly escaped)
       assert("\r\n"[nextchar.chr])
                    assert !@parsestack.empty?
       assert @moretokens.empty?
 
-      pre=FileAndLineToken.new(@filename,@linenum+1,input_position)
-      pre.allow_ooo_offset=true
+      hard=want_hard_nl?
 
-      hard=NewlineToken===@last_operative_token || #hack
-           (KeywordToken===@last_operative_token and
-            @last_operative_token.ident=="rescue" and
-            !@last_operative_token.infix?)  ||
-           !after_nonid_op?{false}
+#      hard=false if @rubyversion>=1.9 and @file.check( /\A\n(?:#@@WSTOKS)?[.:][^.:]/o )
 
-      hard=false if @rubyversion>=1.9 and @file.check( /\A\n(?:#@@WSTOKS)?[.:][^.:]/o )
-
       if hard
         @offset_adjust=@min_offset_adjust
         a= abort_noparens!
         case @parsestack.last  #these should be in the see:semi handler
           when ExpectDoOrNlContext; @parsestack.pop
@@ -2257,17 +2736,19 @@
         end
         assert !@parsestack.empty?
         @parsestack.last.see self,:semi
 
         a << rulexer_newline(ch)
-        @moretokens.replace a+@moretokens
+        a+=@moretokens
+        @moretokens.replace a
       else
         @offset_adjust=@min_offset_adjust
         offset= input_position
         nl=readnl
-        @moretokens.push EscNlToken.new(nl,offset,@filename,@linenum-1),
-           FileAndLineToken.new(@filename,@linenum,input_position)
+        a=[EscNlToken.new(nl,offset,@filename,@linenum),
+           FileAndLineToken.new(@filename,@linenum,input_position)]
+        @moretokens.push( *a )
       end
 
       #optimization: when thru with regurgitated text from a here document,
       #revert back to original unadorned Sequence instead of staying in the list.
       if @base_file and indices=@file.instance_eval{@start_pos} and
@@ -2282,27 +2763,17 @@
 
       @offset_adjust=@min_offset_adjust
 
       @moretokens.unshift(*optional_here_bodies)
 
-      #adjust line count in fal to account for newlines in here bodys
-      i=@moretokens.size-1
-      while(i>=0)
-        #assert FileAndLineToken===@moretokens[i]
-        i-=1 if FileAndLineToken===@moretokens[i]
-        break unless HereBodyToken===@moretokens[i]
-        pre_fal=true
-        fal.line-=@moretokens[i].linecount
+      #adjust line #s to account for newlines in here bodys
+      l=@linenum
+      a.reverse_each{|implicit| 
+        implicit.endline=l 
+        l-=1 if EscNlToken===implicit or NewlineToken===implicit
+      }
 
-        i-=1
-      end
-
-      if pre_fal
-        result=@moretokens.first
-        pre.offset=result.offset
-        @moretokens.unshift pre
-      end
       start_of_line_directives
 
       result=@moretokens.shift
       return result
    end
@@ -2315,10 +2786,11 @@
    ENDMARKER=/^__END__[\r\n]?\Z/
    ENDMARKERLENGTH=8
    def start_of_line_directives
       #handle =begin...=end (at start of a line)
       while EQBEGIN===readahead(EQBEGINLENGTH)
+         startline=@linenum
          startpos= input_position
          more= read(EQBEGINLENGTH-1)   #get =begin
 
          begin
            eof? and raise "eof before =end"
@@ -2335,12 +2807,14 @@
 
 #         newls= more.scan(/\r\n?|\n\r?/)
 #         @linenum+= newls.size
 
          #inject the fresh comment into future token results
-         @moretokens.push IgnoreToken.new(more,startpos),
-                          FileAndLineToken.new(@filename,@linenum,input_position)
+         comment=IgnoreToken.new(more,startpos)
+         comment.startline=startline
+         comment.endline=@linenum
+         @moretokens.push comment, FileAndLineToken.new(@filename,@linenum,input_position)
       end
 
       #handle __END__
       if ENDMARKER===readahead(ENDMARKERLENGTH)
          assert !(ImplicitContext===@parsestack.last)
@@ -2351,16 +2825,18 @@
 
 
 
   #-----------------------------------
   #used to resolve the ambiguity of
-  # unary ops (+, -, *, &, ~ !) in ruby
+  # unary ops (+, -, *, &,  (and ^ if macros enabled) ) in ruby
   #returns whether current token is to be the start of a literal
   IDBEGINCHAR=/^(?:#@@LETTER|[$@])/o
   def unary_op_expected?(ch) #yukko hack
-    '*&='[readahead(2)[1..1]] and return false
 
+    #not unary if its anything followed by = or &/* followed by themselves
+    return false if /^(?:.=|([&*])\1)$/===readahead(2) 
+
     return true if KeywordToken===@last_operative_token and @last_operative_token==='for'
  
     after_nonid_op? {
       #possible func-call as operator
 
@@ -2395,25 +2871,25 @@
    #returns what block yields if last token was a method name.
    #used to resolve the ambiguity of
    # <<, %, /, ?, :, and newline (among others) in ruby
    def after_nonid_op?
 
-    #this is how it should be, I think, and then no handlers for methnametoken and FUNCLIKE_KEYWORDS are needed
+    #this is how it should be, I think, and then no handlers for methnametoken and @FUNCLIKE_KEYWORDS are needed
 #      if ImplicitParamListStartToken===@last_token_including_implicit
 #        huh return true
 #      end
       case @last_operative_token
-         when VarNameToken , MethNameToken, FUNCLIKE_KEYWORDS.token_pat 
+         when VarNameToken , MethNameToken, @FUNCLIKE_KEYWORDS.token_pat 
          #VarNameToken should really be left out of this case... 
          #should be in next branch instread
          #callers all check for last token being not a variable if they pass anything
          #but {false} in the block 
          #(hmmm... some now have true or other non-varname checks in them... could these be bugs?)
             return yield
          when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
               %r{^(
-                end|self|true|false|nil|->|  
+                end|self|true|false|nil|  
                 __FILE__|__LINE__|__ENCODING__|[\})\]]
               )$}x.token_pat
             #dunno about def/undef
             #maybe class/module shouldn't he here either?  
             #for is also in NewlineToken branch, below.
@@ -2423,11 +2899,11 @@
             #regexs above must match whole string
             #assert(@last_operative_token==$&) #disabled 'cause $& is now always nil :(
             return true if OperatorToken===@last_operative_token || KeywordToken===@last_operative_token
          when NewlineToken, nil,   #nil means we're still at beginning of file
               /^([({\[]|or|not|and|if|unless|then|elsif|else|class|module|def|
-                 while|until|begin|for|in|case|when|ensure|defined\?)$
+                 while|until|begin|for|in|case|when|ensure|defined\?|->)$
               /x.token_pat
             return true
          when KeywordToken
             return true if /^(alias|undef)$/===@last_operative_token.ident  #is this ever actually true???
          when IgnoreToken
@@ -2476,11 +2952,11 @@
    end
 
 
     #-----------------------------------
     def caret(ch) #match /^=?/ (^ or ^=) (maybe unary ^ too)
-      if @enable_macro and (@last_token_maybe_implicit and
+      if @enable_macro and (@last_token_maybe_implicit and #factored
          @last_token_maybe_implicit.ident=='(') || unary_op_expected?(ch)
            result=OperatorToken.new(read(1),input_position)
            result.unary=true
            result
       else
@@ -2531,19 +3007,19 @@
       if unary_op_expected?(ch) or 
          KeywordToken===@last_operative_token && 
          /^(return|break|next)$/===@last_operative_token.ident
         if (?0..?9)===readahead(2)[1]
           result= number(ch)
-        elsif @rubyversion>=1.9 and '->' == readahead(2) #stabby proc
-          @file.pos+=2
-          #push down block context
-          localvars.start_block
-          @parsestack.push ctx=BlockContext.new(@linenum)
-          ctx.wanting_stabby_block_body=true
-          #read optional proc params
-          block_param_list_lookahead ?(, ParenedParamListLhsContext
-          result=KeywordToken.new('->',pos)
+#        elsif @rubyversion>=1.9 and '->' == readahead(2) #stabby proc
+#          @file.pos+=2
+#          #push down block context
+#          localvars.start_block
+#          @parsestack.push ctx=BlockContext.new(@linenum)
+#          ctx.wanting_stabby_block_body=true
+#          #read optional proc params
+#          block_param_list_lookahead ?(, ParenedParamListLhsContext
+#          result=KeywordToken.new('->',pos)
           
         else #unary operator
           result=getchar
           WHSPLF[nextchar.chr] or
             @moretokens << NoWsToken.new(input_position)
@@ -2579,24 +3055,24 @@
           @moretokens.unshift result
           @moretokens.unshift( *abort_noparens!("=>"))
           result=@moretokens.shift
         end
         @parsestack.last.see self,:arrow
-      when '~' # =~... after regex, maybe?
-        last=last_operative_token
-        
-        if @rubyversion>=1.9 and StringToken===last and last.lvars
-          #ruby delays adding lvars from regexps to known lvars table
-          #for several tokens in some cases. not sure why or if on purpose
-          #i'm just going to add them right away
-          last.lvars.each{|lvar| localvars[lvar]=true }
-        end
+#      when '~' # =~... after regex, maybe?
+#        last=last_operative_token
+#        
+#        if @rubyversion>=1.9 and StringToken===last and last.lvars
+#          #ruby delays adding lvars from regexps to known lvars table
+#          #for several tokens in some cases. not sure why or if on purpose
+#          #i'm just going to add them right away
+#          last.lvars.each{|lvar| localvars[lvar]=true }
+#        end
       when '' #plain assignment: record local variable definitions
         last_context_not_implicit.lhs=false
         @last_operative_token=result
         @moretokens.push( *ignored_tokens(true).map{|x| 
-          NewlineToken===x ? EscNlToken.new(x.ident,x.offset,@filename,@linenum) : x 
+          NewlineToken===x ? EscNlToken.new(x.ident,x.offset,x.filename,x.linenum) : x 
         } )
         @parsestack.push AssignmentRhsContext.new(@linenum)
         @moretokens.push AssignmentRhsListStartToken.new( input_position)
         if eat_next_if ?* 
           tok=OperatorToken.new('*', input_position-1)
@@ -2619,41 +3095,43 @@
         result+=k
       elsif eof? or WHSPLF[nextchar.chr] #do nothing
       else
         @moretokens << NoWsToken.new(input_position)
       end
-      ty= @rubyversion>=1.9 ? OperatorToken : KeywordToken
+      ty=OperatorToken
       result=ty.new(result, input_position-result.size)
       result.unary=!k #result should distinguish unary !
 
       return result
    end
 
-
    #-----------------------------------
    def dot(ch)
       str=''
       eat_next_if(?.) or raise "lexer confusion"
 
       #three lumps of sugar or two?
       eat_next_if(?.) and
-         return KeywordToken.new(eat_next_if(?.)? "..." : "..")
+         return OperatorToken.new(eat_next_if(?.)? "..." : "..")
 
       #else saw just single .
       #match a valid ruby id after the dot
-      result= KeywordToken.new( ".")
+      result= KeywordToken.new( "." )
       dot_rhs(result)
       return result
    end
    #-----------------------------------
    def dot_rhs(prevtok)
       safe_recurse { |a|
          set_last_token prevtok
          aa= ignored_tokens
          was=after_nonid_op?{true}
          tok,pos=callsite_symbol(prevtok)
-         tok and aa.push(*var_or_meth_name(tok,prevtok,pos,was)) 
+         if tok
+           toks=var_or_meth_name(tok,prevtok,pos,was)
+           aa.push(*toks) 
+         end
          a.unshift(*aa)
       }     
    end
 
   #-----------------------------------
@@ -2690,12 +3168,24 @@
      #}
 
      return IgnoreToken.new(result)
    end
 end
+
    #-----------------------------------
+   def method_params?
+     lasttok=last_token_maybe_implicit #last_operative_token
+     VarNameToken===lasttok or 
+       MethNameToken===lasttok or
+       lasttok===@FUNCLIKE_KEYWORDS or
+       (@enable_macro and lasttok and lasttok.ident==')') #factored
+   end
+
+   #-----------------------------------
    def open_brace(ch)
+      #there are 3 distinct cases here; this method should be divided in 3
+
       assert((ch!='[' or !want_op_name))
       assert(@moretokens.empty?)
       lastchar=prevchar
       ch=eat_next_if(/[({\[]/)or raise "lexer confusion"
       tokch=KeywordToken.new(ch, input_position-1)
@@ -2705,30 +3195,23 @@
       case tokch.ident
       when '['
         # in contexts expecting an (operator) method name, we 
         #       would want to match [] or []= at this point
         #but control never comes this way in those cases... goes 
-        #to custom parsers for alias, undef, and def in #parse_keywords
+        #to custom parsers for alias, undef, and def in #special_identifier?
         tokch.set_infix! unless after_nonid_op?{WHSPLF[lastchar]}
         @parsestack.push ListImmedContext.new(ch,@linenum)
         lasttok=last_operative_token
         #could be: lasttok===/^#@@LETTER/o
         if (VarNameToken===lasttok or ImplicitParamListEndToken===lasttok or 
-            MethNameToken===lasttok or lasttok===FUNCLIKE_KEYWORDS) and !WHSPCHARS[lastchar]
+            MethNameToken===lasttok or lasttok===@FUNCLIKE_KEYWORDS) and !WHSPCHARS[lastchar]
                @moretokens << (tokch)
                tokch= NoWsToken.new(input_position-1)
         end
       when '('
-        lasttok=last_token_maybe_implicit #last_operative_token
         #could be: lasttok===/^#@@LETTER/o
-        method_params= (
-          VarNameToken===lasttok or 
-          MethNameToken===lasttok or
-          lasttok===FUNCLIKE_KEYWORDS or
-          (@enable_macro and lasttok and lasttok.ident==')')
-        )
-        if method_params
+        if method_params?
           unless WHSPCHARS[lastchar]
                @moretokens << tokch
                tokch= NoWsToken.new(input_position-1)
           end
           @parsestack.push ParamListContext.new(@linenum)
@@ -2751,17 +3234,23 @@
       when '{'
       #check if we are in a hash literal or string inclusion (#{}),
       #in which case below would be bad.
       if  !(UnparenedParamListLhsContext===@parsestack.last) and 
           after_nonid_op?{false} || @last_operative_token.has_no_block?
-        @parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
+        if @file.readbehind(2)=='#{'
+          @parsestack.push StringInclusionContext.new(@linenum)
+        else
+          @parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
+        end
       else
         #abort_noparens!
         tokch.set_infix!
         tokch.as="do"
 
         #if (perhaps deep) inside a stabby block param list context, end it
+        stabby_params_just_ended,tokch=maybe_end_stabby_block_param_list(tokch)
+=begin was
         if @rubyversion>=1.9     
           stabby_params_just_ended=false
           (@parsestack.size-1).downto(1){|i|
             case @parsestack[i]
             when ParamListContextNoParen,AssignmentRhsContext
@@ -2777,10 +3266,11 @@
               break
             else break
             end
           }
         end
+=end
 
         # 'need to find matching callsite context and end it if implicit'
         lasttok=last_operative_token
         if !(lasttok===')' and lasttok.callsite?) and !stabby_params_just_ended #or ParamListContextNoParen===parsestack.last
           @moretokens.push( *(abort_1_noparen!(1).push tokch) )
@@ -2798,10 +3288,15 @@
       end
       return (tokch)
    end
 
    #-----------------------------------
+   def maybe_end_stabby_block_param_list(tokch)
+     return false,tokch
+   end
+
+   #-----------------------------------
    def close_brace(ch)
       ch==eat_next_if(/[)}\]]/) or raise "lexer confusion"
       @moretokens.concat abort_noparens!(ch)
       @parsestack.last.see self,:semi #hack
       @moretokens<< kw=KeywordToken.new( ch, input_position-1)
@@ -2844,16 +3339,24 @@
      return(endoffile_detected result)
    end
 
    #-----------------------------------
    def endoffile_detected(s='')
-     @moretokens.push( *(abort_noparens!.push rulexer_endoffile_detected(s)))
+     @linenum+=1 #optional_here_bodies expects to be called after a newline was seen and @linenum bumped
+     #in this case, there is no newline, but we need to pretend there is. otherwise optional_here_bodies
+     #makes tokens with wrong line numbers
+
+     @moretokens.concat optional_here_bodies
+     @linenum-=1 #now put it back
+     @moretokens.concat abort_noparens!
+     @moretokens.push rulexer_endoffile_detected(s)
      if @progress_thread
        @progress_thread.kill
        @progress_thread=nil
      end
      result= @moretokens.shift
+     assert @pending_here_bodies.empty?
      balanced_braces? or (lexerror result,"unbalanced braces at eof. parsestack=#{@parsestack.inspect}")
      result
    end
 
   #-----------------------------------
@@ -2877,12 +3380,15 @@
        #  AssignmentRhsContext
        #]===@parsestack
        while AssignmentRhsContext===@parsestack[-1]
          pop=
            case @parsestack[-2]
-           when ParamListContext,ParamListContextNoParen,WhenParamListContext,
-                ListImmedContext,AssignmentRhsContext; true
+           when ParamListContext,ParamListContextNoParen,
+                WhenParamListContext,ListImmedContext,AssignmentRhsContext,
+                ParenedParamListLhsContext,UnparenedParamListLhsContext,
+                BlockParamListLhsContext,KnownNestedLhsParenContext
+                 true
            when RescueSMContext; @parsestack[-2].state==:rescue
            when DefContext; !@parsestack[-2].in_body and !@parsestack[-2].has_parens?
            else false
            end
          break unless pop
@@ -2902,11 +3408,11 @@
   end
   
   #-----------------------------------
   def semicolon(ch)
     assert @moretokens.empty?
-    @moretokens.push(*abort_noparens!)
+    @moretokens.push(*abort_noparens!(';',0))
     @parsestack.last.see self,:semi
     case @parsestack.last #should be in context's see:semi handler
     when ExpectThenOrNlContext
       @parsestack.pop
     when ExpectDoOrNlContext
@@ -2930,11 +3436,56 @@
   end
 
   #-----------------------------------
   #tokenify_results_of  :identifier
   save_offsets_in(*CHARMAPPINGS.values.uniq-[
-    :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote,:caret,:plusminus
+    :symbol_or_op,:open_brace,:whitespace,:exclam,:caret,:plusminus
   ])
+  save_linenums_in :symbol_or_op,:open_brace,:whitespace,:exclam,:caret,:plusminus
   #save_offsets_in :symbol
 
 end
+
+#defense against my class being redefined by a a certain other project,
+module Kernel
+  eval %w[require load].map{|name| <<-END }.join
+    #{name}__without_rubylexer_protection=instance_method :#{name}
+    define_method(:#{name}) do |file|
+      if /\\Aruby_(lexer|parser)(\\.rb)?\\z/i===File.basename(file)
+        warn "Uh-oh, you're trying to use ruby_parser and rubylexer at the same time."
+        warn "ruby_parser causes a namespace conflict with rubylexer"
+        warn "because ruby_parser redefines the class RubyLexer"
+        warn "in a way which is incompatible with standard RubyLexer."
+        warn "The rubylexer gem owns the namespace ::RubyLexer,"
+        warn "and claimed it at least 2 years before ruby_parser existed."
+        warn "Attempt to redefine RubyLexer in an incompatible way disabled."
+      else
+        begin
+          #{name}__without_rubylexer_protection.bind(self).call file
+        rescue Exception=>e
+          e.backtrace.delete_if{|x| /\\A\#{__FILE__}:\#{__LINE__-2}:/o===x }
+          raise e
+        end
+      end
+    end
+  END
+end
+
+eval %w[class\ Module module\ Kernel].map{|ctx| <<END }.join
+  #{ctx}
+    autoload__without_rubylexer_protection=instance_method :autoload
+    define_method(:autoload) do |mod,file|
+      if /\\Aruby_(lexer|parser)(\\.rb)?\\z/i===File.basename(file)
+        warn "Uh-oh, you're trying to use ruby_parser and rubylexer at the same time."
+        warn "ruby_parser causes a namespace conflict with rubylexer"
+        warn "because ruby_parser redefines the class RubyLexer"
+        warn "in a way which is incompatible with standard RubyLexer."
+        warn "The rubylexer gem owns the namespace ::RubyLexer,"
+        warn "and claimed it at least 2 years before ruby_parser existed."
+        warn "Attempt to redefine RubyLexer in an incompatible way disabled."
+      else
+        autoload__without_rubylexer_protection.bind(self).call mod,file
+      end
+    end
+  end
+END