rubylexer.rb in rubylexer-0.7.7

- old
+ new

@@ -107,21 +107,26 @@
          ?: => :symbol_or_op,
          ?\n => :newline, #implicitly escaped after op
          #?\r => :newline, #implicitly escaped after op
 
          ?\\ => :escnewline,
-         ?\x00 => :eof,
-         ?\x04 => :eof,
-         ?\x1a => :eof,
 
          "[({" => :open_brace,
          "])}" => :close_brace,
 
 
          ?# => :comment,
 
-         NONASCII => :identifier,
+         ?\x00 => :eof,
+         ?\x04 => :eof,
+         ?\x1a => :eof,
+
+         ?\x01..?\x03 => :illegal_char,
+         ?\x05..?\x08 => :illegal_char,
+         ?\x0E..?\x19 => :illegal_char,
+         ?\x1b..?\x1F => :illegal_char,
+         ?\x7F => :illegal_char,
    }
 
    attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit
 
    UCLETTER=@@UCLETTER="[A-Z]"
@@ -136,28 +141,32 @@
    LETTER_DIGIT=@@LETTER_DIGIT="[A-Za-z_0-9\x80-\xFF]"
    eval %w[UCLETTER LCLETTER LETTER LETTER_DIGIT].map{|n| "
      def #{n}; #{n}; end
      def self.#{n}; @@#{n}; end
      " 
-   }.to_s
+   }.join
 
    NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)((?:(?!#@@LETTER_DIGIT).)|\Z)/om
-   NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu']  #chars that begin NEVERSTARTPARAMLIST
+   if ?A.is_a? String #ruby >= 1.9
+     NEVERSTARTPARAMLISTFIRST=/[aoeitrwu]/
+   else
+     NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu']  #chars that begin NEVERSTARTPARAMLIST
+   end
    NEVERSTARTPARAMLISTMAXLEN=7     #max len of a NEVERSTARTPARAMLIST
 
 =begin
    require 'jcode'
    utf8=String::PATTERN_UTF8 #or euc, or sjis...
    LCLETTER_U="(?>[a-z_]|#{utf8})"
    LETTER_U="(?>[A-Za-z_]|#{utf8})"
-   IDENTCHAR_U="(?>[A-Za-z_0-9]|#{utf8})"
+   LETTER_DIGIT_U="(?>[A-Za-z_0-9]|#{utf8})"
 =end
 
    #-----------------------------------
-   def initialize(filename,file,linenum=1,offset_adjust=0,options={:rubyversion=>1.8})
+   def initialize(filename,file,linenum=1,offset_adjust=0,options={})
       @offset_adjust=0 #set again in next line
-      super(filename,file, linenum,offset_adjust)
+      rulexer_initialize(filename,file, linenum,offset_adjust)
       @start_linenum=linenum
       @parsestack=[TopLevelContext.new]
       @incomplete_here_tokens=[] #not used anymore
       @pending_here_bodies=[]
       @localvars_stack=[SymbolTable.new]
@@ -166,20 +175,21 @@
       @last_operative_token=nil
       @last_token_maybe_implicit=nil
       @enable_macro=nil
       @base_file=nil
       @progress_thread=nil
-      @rubyversion=options[:rubyversion]
+      @rubyversion=options[:rubyversion]||1.8
       @encoding=options[:encoding]||:detect
       @method_operators=if @rubyversion>=1.9 
-                          /#{RUBYSYMOPERATORREX}|\A![=~]?\Z/o
+                          /#{RUBYSYMOPERATORREX}|\A![=~@]?/o
                         else
                           RUBYSYMOPERATORREX
                         end
 
-      @toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
+      @toptable=CharHandler.new(self, :identifier, CHARMAPPINGS)
 
+      extend RubyLexer1_9 if @rubyversion>=1.9
       read_leading_encoding
       start_of_line_directives
       progress_printer
    end
 
@@ -201,15 +211,15 @@
    }
    ENCODINGS=%w[ascii binary utf8 euc sjis]
    def read_leading_encoding
      return unless @encoding==:detect
      @encoding=:ascii
-     @encoding=:utf8 if @file.skip( /\xEF\xBB\xBF/ )   #bom
+     @encoding=:utf8 if @file.skip( "\xEF\xBB\xBF" )   #bom
      if @file.skip( /\A#!/ )
        loop do
          til_charset( /[\s\v]/ )
-         break if @file.match( / ([^-\s\v]|--[\s\v])/,4 )
+         break if @file.match( /^\n|[\s\v]([^-\s\v]|--?[\s\v])/,4 )
          if @file.skip( /.-K(.)/ )
            case $1
            when 'u'; @encoding=:utf8
            when 'e'; @encoding=:euc
            when 's'; @encoding=:sjis
@@ -241,12 +251,13 @@
    
    def localvars;
      @localvars_stack.last
    end
 
+   attr_accessor :localvars_stack
+
    attr_accessor :in_def
-   attr :localvars_stack	
    attr :offset_adjust
    attr_writer :pending_here_bodies
    attr :rubyversion
 
    #-----------------------------------
@@ -254,11 +265,11 @@
      @last_operative_token=@last_token_maybe_implicit=tok
    end
 
    #-----------------------------------
    def get1token
-      result=super  #most of the action's here
+      result=rulexer_get1token  #most of the action's here
 
       if ENV['PROGRESS']
       @last_cp_pos||=0
       @start_time||=Time.now
       if result.offset-@last_cp_pos>100000
@@ -298,16 +309,16 @@
       end
    end
 
    #-----------------------------------
    def eof?
-     super or EoiToken===@last_operative_token
+     rulexer_eof? or EoiToken===@last_operative_token
    end
 
    #-----------------------------------
    def input_position
-     super+@offset_adjust
+     rulexer_input_position+@offset_adjust
    end
 
    #-----------------------------------
    def input_position_raw 
      @file.pos
@@ -349,10 +360,11 @@
    #-----------------------------------
    def inside_method_def?
      return true if (defined? @in_def) and @in_def
      @parsestack.reverse_each{|ctx|
        ctx.starter=='def' and ctx.state!=:saw_def and return true
+       ctx.starter=='class' || ctx.starter=='module' and return false
      }
      return false
    end
 
    #-----------------------------------
@@ -387,11 +399,11 @@
     @moretokens.empty? or return result
     loop do
       unless @moretokens.empty?
         case @moretokens.first
         when StillIgnoreToken
-        when NewlineToken: allow_eol or break
+        when NewlineToken; allow_eol or break
         else break
         end 
       else
       
         break unless ch=nextchar
@@ -465,16 +477,13 @@
       assert MethNameToken===@last_operative_token || !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
 
       if @parsestack.last.wantarrow and @rubyversion>=1.9 and @file.skip ":"
         @moretokens.push SymbolToken.new(str,oldpos), KeywordToken.new("=>",input_position-1)
       else
-        @moretokens.unshift(*parse_keywords(str,oldpos) do |tok|
-          #if not a keyword, decide if it should be var or method
-          case str
-            when FUNCLIKE_KEYWORDS; except=tok
-            when VARLIKE_KEYWORDS,RUBYKEYWORDS; raise "shouldnt see keywords here, now"
-          end
+        @moretokens.unshift(*parse_keywords(str,oldpos) do |tok,except|
+          #most callers of this block pass nothing(==nil) for except. only _keyword_funclike passes a true val
+
           was_last=@last_operative_token
           @last_operative_token=tok if tok
           normally=safe_recurse { |a| var_or_meth_name(str,was_last,oldpos,after_nonid_op?{true}) }
           (Array===normally ? normally[0]=except : normally=except) if except
           normally
@@ -545,10 +554,11 @@
    #-----------------------------------
    def in_lvar_define_state lasttok=@last_operative_token
      #@defining_lvar is a hack
      @defining_lvar or case ctx=@parsestack.last
        #when ForSMContext; ctx.state==:for
+       when UnparenedParamListLhsContext;  /^(->|,|;)$/===lasttok.ident
        when RescueSMContext
          lasttok.ident=="=>" and @file.match?( /\A[\s\v]*([:;#\n]|then(?!#@@LETTER_DIGIT))/om )
        #when BlockParamListLhsContext; true
      end 
    end
@@ -565,10 +575,11 @@
    #in general, operators in ruby are disambuated by the before-but-not-after rule.
    #an otherwise ambiguous operator is disambiguated by the surrounding whitespace:
    #whitespace before but not after the 'operator' indicates it is to be considered a
    #value token instead. otherwise it is a binary operator. (unary (prefix) ops count 
    #as 'values' here.)
+   #this is by far the ugliest method in RubyLexer.
    def var_or_meth_name(name,lasttok,pos,was_after_nonid_op)
      #look for call site if not a keyword or keyword is function-like
      #look for and ignore local variable names
 
      assert String===name
@@ -577,11 +588,11 @@
      #maybe_local really means 'maybe local or constant'
      maybe_local=case name
        when /(?!#@@LETTER_DIGIT).$/o #do nothing
        when /^#@@LCLETTER/o  
          (localvars===name or 
-          VARLIKE_KEYWORDS===name or 
+          #VARLIKE_KEYWORDS===name or 
           was_in_lvar_define_state
          ) and not lasttok===/^(\.|::)$/
        when /^#@@UCLETTER/o
          is_const=true
          not lasttok==='.'  #this is the right algorithm for constants... 
@@ -615,12 +626,13 @@
      end
      
      #if next op is assignment (or comma in lvalue list)
      #then omit implicit parens
      assignment_coming=case nc=nextchar
-       when ?=;  not /^=[>=~]$/===readahead(2)
+       when ?=;  not( /^=[>=~]$/===readahead(2) )
        when ?,; comma_in_lvalue_list? 
+       when (?; if @rubyversion>=1.9); ParenedParamListLhsContext===@parsestack.last
        when ?); last_context_not_implicit.lhs
        when ?i; /^in(?!#@@LETTER_DIGIT)/o===readahead(3) and 
                   ForSMContext===last_context_not_implicit
        when ?>,?<; /^(.)\1=$/===readahead(3)
        when ?*,?&; /^(.)\1?=/===readahead(3)
@@ -643,11 +655,11 @@
      if assignment_coming
        @parsestack.push AssignmentContext.new(nil) if nc==?% or nc==?/
        IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT
      else
      case nc
-       when nil: 2
+       when nil; 2
        when ?!; /^![=~]$/===readahead(2) ? 2 : 1
        when ?d; 
          if /^do((?!#@@LETTER_DIGIT)|$)/o===readahead(3)
            if maybe_local and expecting_do?
              ty=VarNameToken 
@@ -759,11 +771,11 @@
      when 1,3;
        if /^(break|next|return)$/===name and
                 !(KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident)
          #only 1 param in list
          result.unshift ImplicitParamListStartToken.new(oldpos)
-         @parsestack.push ParamListContextNoParen.new(@linenum)
+         @parsestack.push KWParamListContextNoParen.new(@linenum)
        else
          arr,pass=*param_list_coming_with_2_or_more_params?
          result.push( *arr )
          unless pass
            #only 1 param in list
@@ -844,18 +856,18 @@
   def abort_noparens_for_rescue!(str='')
     #assert @moretokens.empty?
     result=[]
     ctx=@parsestack.last
     while klass=CONTEXT2ENDTOK_FOR_RESCUE[ctx.class]
-      break if AssignmentRhsContext===ctx && !ctx.multi_assign? 
-      if ParamListContextNoParen===ctx && AssignmentRhsContext===@parsestack[-2]
-        result.push ImplicitParamListEndToken.new(input_position-str.length),
-                    AssignmentRhsListEndToken.new(input_position-str.length)
-          @parsestack.pop
-          @parsestack.pop
-        break
-      end
+#      break if AssignmentRhsContext===ctx && !ctx.multi_assign? 
+#      if ParamListContextNoParen===ctx && AssignmentRhsContext===@parsestack[-2]
+#        result.push ImplicitParamListEndToken.new(input_position-str.length),
+#                    AssignmentRhsListEndToken.new(input_position-str.length)
+#          @parsestack.pop
+#          @parsestack.pop
+#        break
+#      end
       result << klass.new(input_position-str.length) #unless AssignmentRhsContext===ctx and !ctx.multi_assign?
       break if RescueSMContext===ctx #why is this here?
       @parsestack.pop 
       ctx=@parsestack.last
     end
@@ -864,18 +876,30 @@
 
   #-----------------------------------
   CONTEXT2ENDTOK_FOR_DO={
     AssignmentRhsContext=>AssignmentRhsListEndToken, 
     ParamListContextNoParen=>ImplicitParamListEndToken,
+    UnparenedParamListLhsContext=>KwParamListEndToken,
     ExpectDoOrNlContext=>1,
     #WhenParamListContext=>KwParamListEndToken,
     #RescueSMContext=>KwParamListEndToken
   }
   def abort_noparens_for_do!(str='')
     #assert @moretokens.empty?
     result=[]
     while klass=CONTEXT2ENDTOK_FOR_DO[@parsestack.last.class]
+      if klass==AssignmentRhsListEndToken
+        i=@parsestack.size
+        end_the_assign=false
+        while AssignmentRhsContext===@parsestack[i-=1]
+          if CONTEXT2ENDTOK_FOR_DO[@parsestack[i-1].class] and 
+             @parsestack[i-1].class!=AssignmentRhsContext
+               break end_the_assign=true
+          end
+        end
+        break unless end_the_assign
+      end
       break if klass==1
       result << klass.new(input_position-str.length)
       @parsestack.pop 
     end
     return result
@@ -915,44 +939,67 @@
    public :enable_macros!
 
 
    #-----------------------------------
    @@SPACES=/[\ \t\v\f\v]/
-   @@WSTOK=/\r?\n|\r*#@@SPACES+(?:#@@SPACES|\r(?!\n))*|\#[^\n]*\n|\\\r?\n|
-            ^=begin(?:[\s].*)?\n(?:(?!=end).*\n)*=end[\s\n].*\n/x
-   @@WSTOKS=/(?!=begin)#@@WSTOK+/o
-   def divide_ws(ws,offset)
+   @@WSTOK=/(?>
+               (?>\r?)\n|
+               (?>\r*)(?>#@@SPACES+)(?>(?:#@@SPACES|\r(?!\n))*)|
+               \#(?>[^\n]*)\n|
+               \\(?>\r?)\n|
+               ^=begin(?>(?>#@@SPACES.*)?)\n
+                 (?>(?:(?!=end)(?>.*)\n))*
+               =end(?>(?>#@@SPACES.*)?)\n
+            )/x
+   @@WSTOKS=/(?!=begin)(?>#@@WSTOK+)/o
+   def divide_ws(ws0,offset)
      result=[]
-     ws.scan(/\G#@@WSTOK/o){|ws|
+     ws0.scan(/\G#@@WSTOK/o){|ws|
        incr= $~.begin(0)
-       klass=case ws
-       when /\A[\#=]/; CommentToken
-       when /\n\Z/; EscNlToken
-       else WsToken
+       tok=case ws
+       when /\A[\#=]/; IgnoreToken.new(ws,offset+incr)
+       when /\n\Z/; EscNlToken.new(ws,offset+incr,@filename,@linenum)
+       else WsToken.new(ws,offset+incr)
        end
-       result << klass.new(ws,offset+incr)
+       result << tok
+       @linenum+=ws.count "\n"
      }
      result.each_with_index{|ws,i|
        if WsToken===ws
          ws.ident << result.delete_at(i+1).ident while WsToken===result[i+1]
        end
      }
      return result
    end
    
+   #-----------------------------------
+   #lex tokens until a predefined end token is found.
+   #returns a list of tokens seen.
+   def read_arbitrary_expression(&endcondition)
+     result=[]
+     oldsize=@parsestack.size
+     safe_recurse{
+       tok=nil
+       until endcondition[tok,@parsestack[oldsize+1..-1]||[]] and @parsestack.size==oldsize
+         tok=get1token
+         result<<tok
+         EoiToken===tok and break lexerror( tok, "unexpected eof" )
+       end
+     }
+     result
+   end
 
-
    #-----------------------------------
    #parse keywords now, to prevent confusion over bare symbols
    #and match end with corresponding preceding def or class or whatever.
    #if arg is not a keyword, the block is called
    def parse_keywords(str,offset,&block)
       assert @moretokens.empty?
       assert !(KeywordToken===@last_operative_token and /A(\.|::|def)\Z/===@last_operative_token.ident)
       result=[KeywordToken.new(str,offset)]
 
-      m="keyword_#{str}"
+      m=:"keyword_#{str}"
       respond_to?(m) ? (send m,str,offset,result,&block) : block[MethNameToken.new(str)]
    end
    public #these have to be public so respond_to? can see them (sigh)
    def keyword_end(str,offset,result)
          result.unshift(*abort_noparens!(str))
@@ -975,33 +1022,43 @@
    end
 
    def keyword_module(str,offset,result) 
          result.first.has_end!
          @parsestack.push WantsEndContext.new(str,@linenum)
-         @localvars_stack.push SymbolTable.new 
          offset=input_position
-         @file.scan(/\A(#@@WSTOKS)?(::)?/o) 
-         md=@file.last_match
-         all,ws,dc=*md
-         fail if all.empty?
-         @moretokens.concat divide_ws(ws,offset) if ws
-         @moretokens.push KeywordToken.new('::',offset+md.end(0)-2) if dc
-         loop do
-           offset=input_position
-           @file.scan(/\A(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)(::)?/o)
+         assert @moretokens.empty?
+         tokens=[]
+         if @file.scan(/\A(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)(?=[#{WHSP}]+(?:[^(])|[#;\n]|::)/o) 
            md=@file.last_match
-           all,ws,name,dc=*md
-           if ws
-             @moretokens.concat divide_ws(ws,offset)
-             incr=ws.size
+           all,ws,name=*md
+           tokens.concat divide_ws(ws,md.begin(1)) if ws
+           tokens.push VarNameToken.new(name,md.begin(2))
+         end
+         tokens.push( *read_arbitrary_expression{|tok,extra_contexts|
+           #@file.check /\A(\n|;|::|end(?!#@@LETTER_DIGIT)|(#@@UCLETTER#@@LETTER_DIGIT*)(?!(#@@WSTOKS)?::))/o
+           @file.check( /\A(\n|;|end(?!#@@LETTER_DIGIT))/o ) or 
+             @file.check("::") && extra_contexts.all?{|ctx| ImplicitParamListContext===ctx } &&
+               @moretokens.push(*abort_noparens!)
+         } ) if !name #or @file.check /#@@WSTOKS?::/o
+         @moretokens[0,0]=tokens
+         @localvars_stack.push SymbolTable.new
+         while @file.check( /\A::/ )
+               #VarNameToken===@moretokens.last or 
+               #KeywordToken===@moretokens.last && @moretokens.last.ident=="::"
+           @file.scan(/\A(#@@WSTOKS)?(::)?(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)/o) or break
+           md=@file.last_match
+           all,ws1,dc,ws2,name=*md
+           if ws1
+             @moretokens.concat divide_ws(ws1,md.begin(1))
+             incr=ws1.size
            else
              incr=0
            end
-           @moretokens.push VarNameToken.new(name,offset+incr)
-           break unless dc
-           @moretokens.push NoWsToken.new(offset+md.end(0)-2)
-           @moretokens.push KeywordToken.new('::',offset+md.end(0)-2)
+           @moretokens.push NoWsToken.new(md.begin(2)) if dc
+           @moretokens.push KeywordToken.new('::',md.begin(2)) if dc
+           @moretokens.concat divide_ws(ws2,md.begin(3)) if ws2
+           @moretokens.push VarNameToken.new(name,md.begin(4))
          end
          @moretokens.push EndHeaderToken.new(input_position)
          return result
    end        
         
@@ -1069,12 +1126,11 @@
             assert WantsEndContext===@parsestack.last
             result.last.as=";"
          else
             result.last.has_end!
             if BlockContext===ctx and ctx.wanting_stabby_block_body
-              ctx.wanting_stabby_block_body=false
-              ctx.starter,ctx.ender="do","end"
+              @parsestack[-1]= WantsEndContext.new(str,@linenum)            
             else
               @parsestack.push WantsEndContext.new(str,@linenum)            
               localvars.start_block
               block_param_list_lookahead
             end
@@ -1105,22 +1161,22 @@
                 end
                 EoiToken===tok and lexerror tok, "eof in def header"
                 result << tok
               end until  parencount==0 #@parsestack.size==old_size
               @localvars_stack.push SymbolTable.new
-           else #no parentheses, all tail
-             set_last_token KeywordToken.new(".") #hack hack
+            else #no parentheses, all tail
+              set_last_token KeywordToken.new(".") #hack hack
               tokindex=result.size
               result << tok=symbol(false,false)
               name=tok.to_s
               assert !in_lvar_define_state
      
               #maybe_local really means 'maybe local or constant'
               maybe_local=case name
                 when /(?!#@@LETTER_DIGIT).$/o; #do nothing
                 when /^[@$]/; true
-                when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS; ty=KeywordToken
+                when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS,("__ENCODING__" if @rubyversion>=1.9); ty=KeywordToken
                 when /^#@@LCLETTER/o;  localvars===name 
                 when /^#@@UCLETTER/o; is_const=true  #this is the right algorithm for constants... 
               end
               result.push(  *ignored_tokens(false,false)  )
               nc=nextchar
@@ -1162,10 +1218,11 @@
 
                #look for start of parameter list
                nc=(@moretokens.empty? ? nextchar.chr : @moretokens.first.to_s[0,1])
                if state==:expect_op and /^(?:#@@LETTER|[(&*])/o===nc
                   ctx.state=:def_param_list
+                  ctx.has_parens= '('==nc
                   list,listend=def_param_list
                   result.concat list
                   end_index=result.index(listend)
                   ofs=listend.offset
                   if endofs
@@ -1269,14 +1326,23 @@
          return result
    end
 
    def keyword_rescue(str,offset,result)
          unless after_nonid_op? {false}
+           result.replace []
            #rescue needs to be treated differently when in operator context... 
            #i think no RescueSMContext should be pushed on the stack...
-           result.first.set_infix!            #plus, the rescue token should be marked as infix
-           result.unshift(*abort_noparens_for_rescue!(str))  
+           tok=OperatorToken.new(str,offset)
+           tok.unary=false           #plus, the rescue token should be marked as infix
+           if AssignmentRhsContext===@parsestack.last
+             tok.as="rescue3"
+             @parsestack.pop #end rhs context
+             result.push AssignmentRhsListEndToken.new(offset) #end rhs token
+           else
+             result.concat abort_noparens_for_rescue!(str)
+           end
+           result.push tok
          else         
            result.push KwParamListStartToken.new(offset+str.length)
            #corresponding EndToken emitted by abort_noparens! on leaving rescue context
            @parsestack.push RescueSMContext.new(@linenum)
 #           result.unshift(*abort_noparens!(str))  
@@ -1347,16 +1413,35 @@
            }
          end
          return result
    end
 
+   def keyword___FILE__(str,offset,result)
+     result.last.value=@filename
+     return result
+   end
 
+   def keyword___LINE__(str,offset,result)
+     result.last.value=@linenum
+     return result
+   end
+ 
+   module RubyLexer1_9
+     def keyword___ENCODING__(str,offset,result)
+       #result.last.value=huh
+       return result
+     end
+
+     def keyword_not(*args,&block) _keyword_funclike(*args,&block) end
+   end
+
    def _keyword_funclike(str,offset,result)
          if @last_operative_token===/^(\.|::)$/
            result=yield MethNameToken.new(str) #should pass a methname token here
          else
-           result=yield KeywordToken.new(str)
+           tok=KeywordToken.new(str)
+           result=yield tok,tok
          end
          return result
    end
    for kw in FUNCLIKE_KEYWORDLIST-["END","return","break","next"] do
      alias_method "keyword_#{kw}".to_sym, :_keyword_funclike
@@ -1364,14 +1449,16 @@
  
    def _keyword_varlike(str,offset,result)
          #do nothing
          return result
    end
-   for kw in VARLIKE_KEYWORDLIST+["defined?", "not"] do
+   for kw in VARLIKE_KEYWORDLIST-["__FILE__","__LINE__"]+["defined?", "not"] do
      alias_method "keyword_#{kw}".to_sym, :_keyword_varlike
    end
 
+   
+
    private
 
    #-----------------------------------
    def parsestack_lastnonassign_is?(obj)
      @parsestack.reverse_each{|ctx|
@@ -1451,10 +1538,11 @@
            end
 end
          elsif starter==?(
            ctx_type=UnparenedParamListLhsContext #hacky... should be a param?
            @parsestack.push ctx_type.new(@linenum)
+           a<<KwParamListStartToken.new( input_position )
          end
 
          set_last_token KeywordToken.new( ';' )
          #a.concat ignored_tokens
 
@@ -1491,20 +1579,49 @@
 
 
             #parsestack was changed by get1token above...
             normal_comma_level+=1
             assert(normal_comma_level==@parsestack.size)
-            endingblock=proc{|tok| tok===')' }
+            endingblock=proc{|tok2| tok2===')' }
          else
-            endingblock=proc{|tok| tok===';' or NewlineToken===tok}
+            endingblock=proc{|tok2| tok2===';' or NewlineToken===tok2}
          end
          class << endingblock
             alias === call
          end
 
+         listend=method_parameters(result,normal_comma_level,endingblock,old_parsestack_size)
+
+         @defining_lvar=false
+         @parsestack.last.see self,:semi
+
+         assert(@parsestack.size <= old_parsestack_size)
+
+         #hack: force next token to look like start of a
+         #new stmt, if the last ignored_tokens
+         #call above did not find a newline
+         #(just in case the next token parsed
+         #happens to call quote_expected? or after_nonid_op)
+         result.concat ignored_tokens
+#         if  !eof? and nextchar.chr[/[iuw\/<|>+\-*&%?:({]/] and
+#             !(NewlineToken===@last_operative_token) and
+#             !(/^(end|;)$/===@last_operative_token)
+           #result<<EndHeaderToken.new(result.last.offset+result.last.to_s.size)
+           set_last_token KeywordToken.new( ';' )
+           result<< get1token
+#         end
+      }
+
+      return result,listend
+   end
+
+
+   #-----------------------------------
+   #read local parameter names in method definition
+   def method_parameters(result,normal_comma_level,endingblock,old_parsestack_size)
+         listend=nil
          set_last_token KeywordToken.new( ',' )#hack
-         #read local parameter names
          nextvar=nil
          loop do
             expect_name=(@last_operative_token===',' and
                          normal_comma_level==@parsestack.size)
             expect_name and @defining_lvar||=true
@@ -1531,11 +1648,11 @@
                   @defining_lvar=false
                   assert((not @last_operative_token===','))
 #                  assert !nextvar
                   nextvar=tok.ident
                   localvars[nextvar]=false #remove nextvar from list of local vars for now
-                when /^[&*]$/.token_pat #unary form...
+                when /^[&*(]$/.token_pat #unary form...
                   #a NoWsToken is also expected... read it now
                   result.concat maybe_no_ws_token #not needed?
                   set_last_token KeywordToken.new( ',' )
                 else 
                   lexerror tok,"unfamiliar var name '#{tok}'"
@@ -1551,36 +1668,13 @@
                 localvars[nextvar]=true #now, finally add nextvar back to local vars
                 nextvar
               end
             end
          end
-         
-         @defining_lvar=false
-         @parsestack.last.see self,:semi
-
-         assert(@parsestack.size <= old_parsestack_size)
-         assert(endingblock[tok] || ErrorToken===tok)
-
-         #hack: force next token to look like start of a
-         #new stmt, if the last ignored_tokens
-         #call above did not find a newline
-         #(just in case the next token parsed
-         #happens to call quote_expected? or after_nonid_op)
-         result.concat ignored_tokens
-#         if  !eof? and nextchar.chr[/[iuw\/<|>+\-*&%?:({]/] and
-#             !(NewlineToken===@last_operative_token) and
-#             !(/^(end|;)$/===@last_operative_token)
-           #result<<EndHeaderToken.new(result.last.offset+result.last.to_s.size)
-           set_last_token KeywordToken.new( ';' )
-           result<< get1token
-#         end
-      }
-
-      return result,listend
+         return listend
    end
 
-
    #-----------------------------------
    #handle % in ruby code. is it part of fancy quote or a modulo operator?
    def percent(ch)
      if AssignmentContext===@parsestack.last
        @parsestack.pop
@@ -1628,11 +1722,17 @@
    #-----------------------------------
    #handle ? in ruby code. is it part of ?..: or a character literal?
    def char_literal_or_op(ch)
       if colon_quote_expected? ch
          getchar
-         NumberToken.new getchar_maybe_escape
+         if @rubyversion >= 1.9
+           StringToken.new getchar_maybe_escape
+         else
+           ch=getchar_maybe_escape[0]
+           ch=ch.ord if ch.respond_to? :ord
+           NumberToken.new ch
+         end
       else
          @parsestack.push TernaryContext.new(@linenum)
          KeywordToken.new getchar   #operator
       end
    end
@@ -1823,11 +1923,11 @@
            return [read(1),start] 
          when ?_,?a..?z,?A..?Z,NONASCII
            context=merge_assignment_op_in_setter_callsites? ? ?: : nc
            return [identifier_as_string(context), start]
          when ?(
-           return [nil,start] if @enable_macro
+           return [nil,start] if @enable_macro or @rubyversion>=1.9
      end
 
      set_last_token KeywordToken.new(';')
      lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}")
      return [nil, start]
@@ -1851,11 +1951,11 @@
           return lexerror(HerePlaceholderToken.new( dash, quote, ender, nil ), "invalid here header")
       end
 
       res= HerePlaceholderToken.new( dash, quote, ender, quote_real )
 if true
-      res.open=["<<",dash,quote,ender,quote].to_s
+      res.open=["<<",dash,quote,ender,quote].join
       procrastinated=til_charset(/[\n]/)#+readnl
       unless @base_file
         @base_file=@file
         @file=Sequence::List.new([@file])
         @file.pos=@base_file.pos
@@ -1977,11 +2077,11 @@
       end
       
       @offset_adjust=@min_offset_adjust
       @moretokens.push( *optional_here_bodies )
       ln=@linenum
-      @moretokens.push lexerror(EscNlToken.new(@filename,ln-1,result,input_position-result.size), error),
+      @moretokens.push lexerror(EscNlToken.new(result,input_position-result.size,@filename,ln-1), error),
                        FileAndLineToken.new(@filename,ln,input_position)
 
       start_of_line_directives
 
       return @moretokens.shift
@@ -1993,11 +2093,11 @@
 if true
       #handle here bodies queued up by previous line
       pos=input_position
       while body=@pending_here_bodies.shift
         #body.offset=pos
-        result.push EscNlToken.new(@filename,nil,"\n",body.offset-1)
+        result.push EscNlToken.new("\n",body.offset-1,@filename,nil)
         result.push FileAndLineToken.new(@filename,body.ident.line,body.offset)
         result.push body
         #result.push NoWsToken.new @pending_here_bodies.empty? ? input_position : @pending_here_bodies.first
         #result.push FileAndLineToken.new(@filename,@linenum,pos) #position and line num are off
         body.headtok.line=@linenum-1
@@ -2144,29 +2244,29 @@
            (KeywordToken===@last_operative_token and
             @last_operative_token.ident=="rescue" and
             !@last_operative_token.infix?)  ||
            !after_nonid_op?{false}
 
-      hard=false if @rubyversion>=1.9 and @file.check /\A\n(?:#@@WSTOKS)?\.[^.]/o
+      hard=false if @rubyversion>=1.9 and @file.check( /\A\n(?:#@@WSTOKS)?[.:][^.:]/o )
 
       if hard
         @offset_adjust=@min_offset_adjust
         a= abort_noparens!
         case @parsestack.last  #these should be in the see:semi handler
-          when ExpectDoOrNlContext: @parsestack.pop
-          when ExpectThenOrNlContext: @parsestack.pop        
+          when ExpectDoOrNlContext; @parsestack.pop
+          when ExpectThenOrNlContext; @parsestack.pop        
         end
         assert !@parsestack.empty?
         @parsestack.last.see self,:semi
 
-        a << super(ch)
+        a << rulexer_newline(ch)
         @moretokens.replace a+@moretokens
       else
         @offset_adjust=@min_offset_adjust
         offset= input_position
         nl=readnl
-        @moretokens.push EscNlToken.new(@filename,@linenum-1,nl,offset),
+        @moretokens.push EscNlToken.new(nl,offset,@filename,@linenum-1),
            FileAndLineToken.new(@filename,@linenum,input_position)
       end
 
       #optimization: when thru with regurgitated text from a here document,
       #revert back to original unadorned Sequence instead of staying in the list.
@@ -2220,19 +2320,19 @@
          startpos= input_position
          more= read(EQBEGINLENGTH-1)   #get =begin
 
          begin
            eof? and raise "eof before =end"
-           more<< til_charset(/[\r\n]/)
+           more<< til_charset(/\n/)
            eof? and raise "eof before =end"
            more<< readnl
          end until readahead(EQENDLENGTH)==EQEND
 
          #read rest of line after =end
-         more << til_charset(/[\r\n]/)  
-         assert((eof? or ?\r===nextchar or ?\n===nextchar))
-         assert !(/[\r\n]/===more[-1,1])
+         more << til_charset(/\n/)  
+         assert((eof? or ?\n===nextchar))
+         assert !(/\n/===more[-1,1])
          more<< readnl unless eof?
 
 #         newls= more.scan(/\r\n?|\n\r?/)
 #         @linenum+= newls.size
 
@@ -2309,12 +2409,12 @@
          #but {false} in the block 
          #(hmmm... some now have true or other non-varname checks in them... could these be bugs?)
             return yield
          when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
               %r{^(
-                end|self|true|false|nil|  
-                __FILE__|__LINE__|[\})\]]
+                end|self|true|false|nil|->|  
+                __FILE__|__LINE__|__ENCODING__|[\})\]]
               )$}x.token_pat
             #dunno about def/undef
             #maybe class/module shouldn't he here either?  
             #for is also in NewlineToken branch, below.
             #what about rescue?
@@ -2397,11 +2497,11 @@
          result << ?=
       end
       result= operator_or_methname_token( result)
       result.offset=oldpos
       return result
-   end
+    end
 
    #-----------------------------------
    def tilde(ch) #match ~
       assert(ch=='~')
       result=getchar
@@ -2424,24 +2524,26 @@
    #-----------------------------------
    #match /[+\-]=?/ (+ or +=)
    #could be beginning of number, too
    #fixme: handle +@ and -@ here as well... (currently, this is done in symbol()?)
    def plusminus(ch)
+      pos=input_position
       assert(/^[+\-]$/===ch)
       if unary_op_expected?(ch) or 
          KeywordToken===@last_operative_token && 
          /^(return|break|next)$/===@last_operative_token.ident
         if (?0..?9)===readahead(2)[1]
-          return number(ch)
+          result= number(ch)
         elsif @rubyversion>=1.9 and '->' == readahead(2) #stabby proc
+          @file.pos+=2
           #push down block context
           localvars.start_block
           @parsestack.push ctx=BlockContext.new(@linenum)
           ctx.wanting_stabby_block_body=true
           #read optional proc params
           block_param_list_lookahead ?(, ParenedParamListLhsContext
-
+          result=KeywordToken.new('->',pos)
           
         else #unary operator
           result=getchar
           WHSPLF[nextchar.chr] or
             @moretokens << NoWsToken.new(input_position)
@@ -2454,10 +2556,11 @@
          if eat_next_if(?=)
             result << ?=
          end
          result=(operator_or_methname_token result)
       end
+      result.offset=pos
       return result
    end
 
    #-----------------------------------
    def equals(ch) #match /=(>|~|==?)?/ (= or == or =~ or === or =>)
@@ -2483,27 +2586,28 @@
         
         if @rubyversion>=1.9 and StringToken===last and last.lvars
           #ruby delays adding lvars from regexps to known lvars table
           #for several tokens in some cases. not sure why or if on purpose
           #i'm just going to add them right away
-          localvars.concat last.lvars
+          last.lvars.each{|lvar| localvars[lvar]=true }
         end
       when '' #plain assignment: record local variable definitions
         last_context_not_implicit.lhs=false
+        @last_operative_token=result
         @moretokens.push( *ignored_tokens(true).map{|x| 
-          NewlineToken===x ? EscNlToken.new(@filename,@linenum,x.ident,x.offset) : x 
+          NewlineToken===x ? EscNlToken.new(x.ident,x.offset,@filename,@linenum) : x 
         } )
         @parsestack.push AssignmentRhsContext.new(@linenum)
+        @moretokens.push AssignmentRhsListStartToken.new( input_position)
         if eat_next_if ?* 
           tok=OperatorToken.new('*', input_position-1)
           tok.tag=:unary
           @moretokens.push tok
           WHSPLF[nextchar.chr] or
             @moretokens << NoWsToken.new(input_position)
           comma_in_lvalue_list? #is this needed?
         end
-        @moretokens.push AssignmentRhsListStartToken.new( input_position)
       end
       return result
    end
 
    #-----------------------------------
@@ -2511,17 +2615,19 @@
       assert nextchar==?!
       result=getchar
       k=eat_next_if(/[~=]/)
       if k
         result+=k
-      elsif eof?: #do nothing
+      elsif eof? or WHSPLF[nextchar.chr] #do nothing
       else
-        WHSPLF[nextchar.chr] or
-          @moretokens << NoWsToken.new(input_position)
+        @moretokens << NoWsToken.new(input_position)
       end
-      return KeywordToken.new(result, input_position-result.size)
-      #result should distinguish unary !
+      ty= @rubyversion>=1.9 ? OperatorToken : KeywordToken
+      result=ty.new(result, input_position-result.size)
+      result.unary=!k #result should distinguish unary !
+
+      return result
    end
 
 
    #-----------------------------------
    def dot(ch)
@@ -2563,11 +2669,11 @@
 if false
    #-----------------------------------
    def comment(str)
      result=""
      #loop{
-       result<< super(nil).to_s
+       result<< rulexer_comment(nil).to_s
 
        if /^\#.*\#$/===result #if comment was ended by a crunch
 
          #that's not a legal comment end in ruby, so just keep reading
          assert(result.to_s[-1]==?#)
@@ -2643,24 +2749,45 @@
         end
 
       when '{'
       #check if we are in a hash literal or string inclusion (#{}),
       #in which case below would be bad.
-      if after_nonid_op?{false} or @last_operative_token.has_no_block?
+      if  !(UnparenedParamListLhsContext===@parsestack.last) and 
+          after_nonid_op?{false} || @last_operative_token.has_no_block?
         @parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
       else
         #abort_noparens!
         tokch.set_infix!
         tokch.as="do"
-#=begin not needed now, i think
+
+        #if (perhaps deep) inside a stabby block param list context, end it
+        if @rubyversion>=1.9     
+          stabby_params_just_ended=false
+          (@parsestack.size-1).downto(1){|i|
+            case @parsestack[i]
+            when ParamListContextNoParen,AssignmentRhsContext
+              #do nothing yet... see if inside a UnparenedParamListLhsContext
+            when UnparenedParamListLhsContext #stabby proc
+              @moretokens<<tokch
+              (@parsestack.size-1).downto(i){|j|
+                @moretokens.unshift @parsestack[j].endtoken(input_position-1)
+              }
+              @parsestack[i..-1]=[]
+              tokch=@moretokens.shift
+              stabby_params_just_ended=true
+              break
+            else break
+            end
+          }
+        end
+
         # 'need to find matching callsite context and end it if implicit'
         lasttok=last_operative_token
-        if !(lasttok===')' and lasttok.callsite?) #or ParamListContextNoParen===parsestack.last
+        if !(lasttok===')' and lasttok.callsite?) and !stabby_params_just_ended #or ParamListContextNoParen===parsestack.last
           @moretokens.push( *(abort_1_noparen!(1).push tokch) )
           tokch=@moretokens.shift
         end
-#=end
 
         if BlockContext===@parsestack.last and @parsestack.last.wanting_stabby_block_body
           @parsestack.last.wanting_stabby_block_body=false
         else
           localvars.start_block
@@ -2717,11 +2844,11 @@
      return(endoffile_detected result)
    end
 
    #-----------------------------------
    def endoffile_detected(s='')
-     @moretokens.push( *(abort_noparens!.push super(s)))
+     @moretokens.push( *(abort_noparens!.push rulexer_endoffile_detected(s)))
      if @progress_thread
        @progress_thread.kill
        @progress_thread=nil
      end
      result= @moretokens.shift
@@ -2729,36 +2856,41 @@
      result
    end
 
   #-----------------------------------
   def single_char_token(ch)
-    KeywordToken.new super(ch), input_position-1
+    KeywordToken.new rulexer_single_char_token(ch), input_position-1
   end
 
   #-----------------------------------
   def comma(ch)
     @moretokens.push token=single_char_token(ch)
 
-    #if assignment rhs seen inside method param list, when param list, array or hash literal,
-    #       rescue where comma is expected, or method def param list
-    #          then end the assignment rhs now
-       #+[OBS,ParamListContext|ParamListContextNoParen|WhenParamListContext|ListImmedContext|
+    case @parsestack[-1]
+    when AssignmentRhsContext; 
+       token.tag=:rhs
+      #if assignment rhs seen inside method param list, when param list, 
+      #       array or hash literal, rescue where comma is expected, method def param list,
+      #       or another right hand side
+      #          then end the assignment rhs now
+       #+[OBS,ParamListContext|ParamListContextNoParen|WhenParamListContext|ListImmedContext|AssignmentRhsContext|
        #      (RescueSMContext&-{:state=>:rescue})|(DefContext&-{:in_body=>FalseClass|nil}),
        #  AssignmentRhsContext
        #]===@parsestack
-    if AssignmentRhsContext===@parsestack[-1] and
-       ParamListContext===@parsestack[-2] || 
-       ParamListContextNoParen===@parsestack[-2] ||
-       WhenParamListContext===@parsestack[-2] ||
-       ListImmedContext===@parsestack[-2] ||
-       (RescueSMContext===@parsestack[-2] && @parsestack[-2].state==:rescue) ||
-       (DefContext===@parsestack[-2] && !@parsestack[-2].in_body)
+       while AssignmentRhsContext===@parsestack[-1]
+         pop=
+           case @parsestack[-2]
+           when ParamListContext,ParamListContextNoParen,WhenParamListContext,
+                ListImmedContext,AssignmentRhsContext; true
+           when RescueSMContext; @parsestack[-2].state==:rescue
+           when DefContext; !@parsestack[-2].in_body and !@parsestack[-2].has_parens?
+           else false
+           end
+         break unless pop
          @parsestack.pop
-         @moretokens.unshift AssignmentRhsListEndToken.new(input_position)
-    end
-    case @parsestack[-1]
-    when AssignmentRhsContext; token.tag=:rhs
+         @moretokens.unshift AssignmentRhsListEndToken.new(input_position-1)
+       end
     when ParamListContext,ParamListContextNoParen; #:call
     when ListImmedContext; #:array
     when BlockParamListLhsContext,UnparenedParamListLhsContext; #:block or stabby proc
     when ParenedParamListLhsContext; #stabby proc or method def'n?
     when KnownNestedLhsParenContext; #:nested
@@ -2798,10 +2930,10 @@
   end
 
   #-----------------------------------
   #tokenify_results_of  :identifier
   save_offsets_in(*CHARMAPPINGS.values.uniq-[
-    :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote,:caret
+    :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote,:caret,:plusminus
   ])
   #save_offsets_in :symbol
 
 end