=begin copyright rubylexer - a ruby lexer written in ruby Copyright (C) 2004,2005 Caleb Clausen This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA =end require 'rubylexer/rulexer' #must be 1st!!! require 'rubylexer/version' require 'rubylexer/token' require 'rubylexer/charhandler' require 'rubylexer/symboltable' #require "io.each_til_charset" require 'rubylexer/context' require 'rubylexer/tokenprinter' #----------------------------------- class RubyLexer include NestedContexts RUBYSYMOPERATORREX= %r{^([&|^/%~]|=(==?|~)|>[=>]?|<(<|=>?)?|[+\-]@?|\*\*?|\[\]=?)} # (nasty beastie, eh?) #these are the overridable operators #does not match flow-control operators like: || && ! or and if not #or op= ops like: += -= ||= #or .. ... ?: #for that use: RUBYNONSYMOPERATORREX= %r{^([%^~/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|=>?|![=~]?)$} RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o UNSYMOPS=/^[~!]$/ #always unary UBSYMOPS=/^([*&+-]|::)$/ #ops that could be unary or binary WHSPCHARS=WHSPLF+"\\#" OPORBEGINWORDS="(if|unless|while|until)" BEGINWORDS=/^(def|class|module|begin|for|case|do|#{OPORBEGINWORDS})$/o FUNCLIKE_KEYWORDS=/^(break|next|redo|return|raise|yield|defined\?|retry|super|BEGIN|END)$/ VARLIKE_KEYWORDS=/^(__FILE__|__LINE__|false|nil|self|true)$/ INNERBOUNDINGWORDS="(else|elsif|ensure|in|then|rescue|when)" BINOPWORDS="(and|or)" NEVERSTARTPARAMLISTWORDS=/^(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)([^a-zA-Z0-9_!?=]|\Z)/o NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST RUBYKEYWORDS=%r{ ^(alias|#{BINOPWORDS}|not|undef|end| #{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}| #{INNERBOUNDINGWORDS}|#{BEGINWORDS} )$ }xo #__END__ should not be in this set... its handled in start_of_line_directives CHARMAPPINGS = { ?$ => :dollar_identifier, ?@ => :at_identifier, ?a..?z => :identifier, ?A..?Z => :identifier, ?_ => :identifier, ?0..?9 => :number, %{"'} => :double_quote, ?` => :back_quote, WHSP => :whitespace, #includes \r ?, => :comma, ?; => :semicolon, ?^ => :biop, ?~ => :tilde, ?= => :equals, ?! => :exclam, ?. => :dot, #these ones could signal either an op or a term ?/ => :regex_or_div, "|" => :conjunction_or_goalpost, ">" => :quadriop, "*&" => :star_or_amp, #could be unary "+-" => :plusminus, #could be unary ?< => :lessthan, ?% => :percent, ?? => :char_literal_or_op, #single-char int literal ?: => :symbol_or_op, ?\n => :newline, #implicitly escaped after op #?\r => :newline, #implicitly escaped after op ?\\ => :escnewline, ?\0 => :eof, "[({" => :open_brace, "])}" => :close_brace, ?# => :comment } attr_reader :incomplete_here_tokens, :parsestack #----------------------------------- def initialize(filename,file,linenum=1) super(filename,file, linenum) @start_linenum=linenum @parsestack=[TopLevelContext.new] @incomplete_here_tokens=[] @localvars_stack=[SymbolTable.new] @defining_lvar=nil @in_def_name=false @toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS) start_of_line_directives end def localvars; @localvars_stack.last end #----------------------------------- def get1token result=super #most of the action's here #now cleanup and housekeeping #check for bizarre token types case result when StillIgnoreToken#,nil result when Token#,String @last_operative_token=result assert !(IgnoreToken===@last_operative_token) result else raise "#{@filename}:#{linenum}:token is a #{result.class}, last is #{@last_operative_token}" end end #----------------------------------- def balanced_braces? #@parsestack.empty? @parsestack.size==1 and TopLevelContext===@parsestack.first end #----------------------------------- def dollar_identifier(ch=nil) s=eat_next_if(?$) or return nil if t=((identifier_as_string(?$) or special_global)) s< :comment, "\n" => :newline, "\\" => :escnewline, "\s\t\v\r\f" => :whitespace ) #tok=nil while tok=@whsphandler.go((nextchar or return result)) block_given? and NewlineToken===tok and yield tok result << tok end =end return result end #----------------------------------- def safe_recurse old_moretokens=@moretokens #old_parsestack=@parsestack.dup @moretokens=[] result= yield @moretokens #assert @incomplete_here_tokens.empty? #assert @parsestack==old_parsestack @moretokens= old_moretokens.concat @moretokens return result #need to do something with @last_operative_token? end #----------------------------------- def special_global #handle $-a and friends assert prevchar=='$' result = (( #order matters here, but it shouldn't #(but til_charset must be last) eat_next_if(/[!@&+`'=~\/\\,.;<>*"$?:]/) or (eat_next_if('-') and ("-"+getchar)) or (?0..?9)===nextchar ? til_charset(/[^\d]/) : nil )) end #----------------------------------- def identifier(context=nil) oldpos= input_position str=identifier_as_string(context) #skip keyword processing if 'escaped' as it were, by def, . or :: #or if in a non-bare context #just asserts because those contexts are never encountered. #control goes through symbol(<...>,nil) assert( /^[a-z_]$/i===context) assert !(@last_operative_token===/^(\.|::|(un)?def|alias)$/) @moretokens.unshift(*parse_keywords(str,oldpos) do #if not a keyword, case str when FUNCLIKE_KEYWORDS; #do nothing when VARLIKE_KEYWORDS,RUBYKEYWORDS; raise "shouldnt see keywords here, now" end safe_recurse { |a| var_or_meth_name(str,@last_operative_token,oldpos) } end) return @moretokens.shift end #----------------------------------- def identifier_as_string(context) #must begin w/ letter or underscore str=eat_next_if(/[_a-z]/i) or return nil #equals, question mark, and exclamation mark #might be allowed at the end in some contexts. #(in def headers and symbols) #otherwise, =,?, and ! are to be considered #separate tokens. confusing, eh? #i hope i've captured all right conditions.... #context should always be ?: right after def, ., and :: now maybe_eq,maybe_qm,maybe_ex = case context when ?@,?$ then [nil,nil,nil] when ?: then [?=, ??, ?!] else [nil,??, ?!] end @in_def_name and maybe_eq= ?= str< back1char else str << b end else back1char end return str end #----------------------------------- #contexts in which comma may appear in ruby: #multiple lhs (terminated by assign op) #multiple rhs (in implicit context) #method actual param list (in ( or implicit context) #method formal param list (in ( or implicit context) #block formal param list (in | context) #nested multiple rhs #nested multiple lhs #nested block formal list #element reference/assignment (in [] or []= method actual parameter context) #hash immediate (in imm{ context) #array immediate (in imm[ context) #list between 'for' and 'in' #list after rescue #list after when #list after undef #note: comma in parens not around a param list or lhs or rhs is illegal #----------------------------------- #a comma has been seen. are we in an #lvalue list or some other construct that uses commas? def comma_in_lvalue_list? @parsestack.last.lhs= (not ListContext===@parsestack.last) end #----------------------------------- def in_lvar_define_state #@defining_lvar is a hack @defining_lvar or case ctx=@parsestack.last when ForSMContext; ctx.state==:for when RescueSMContext; ctx.state==:arrow #when BlockParamListLhsContext; true end end #----------------------------------- #determine if an alphabetic identifier refers to a variable #or method name. generates implicit parenthes(es) if it is a #call site and no explicit parens are present. starts an implicit param list #if appropriate. adds tok to the #local var table if its a local var being defined for the first time. #note: what we here call variables (rather, constants) following :: #might actually be methods at runtime, but that's immaterial to tokenization. #note: this routine should determine the correct token type for name and #create the appropriate token. currently this is not done because callers #typically have done it (perhaps incorrectly) already. def var_or_meth_name(name,lasttok,pos) #look for call site if not a keyword or keyword is function-like #look for and ignore local variable names assert String===name #maybe_local really means 'maybe local or constant' maybe_local=case name when /[^a-z_0-9]$/i; #do nothing when /^[a-z_]/; (localvars===name or VARLIKE_KEYWORDS===name or in_lvar_define_state) and not lasttok===/^(\.|::)$/ when /^[A-Z]/; is_const=true;not lasttok==='.' #this is the right algorithm for constants... end assert(@moretokens.empty?) oldlast=@last_operative_token tok=@last_operative_token=VarNameToken.new(name,pos) oldpos= input_position sawnl=false result=ws_toks=ignored_tokens(true) {|nl| sawnl=true } if sawnl || eof? if maybe_local then if in_lvar_define_state if /^[a-z_][a-zA-Z_0-9]*$/===name assert !(lasttok===/^(\.|::)$/) localvars[name]=true else lexerror tok,"not a valid variable name: #{name}" end return result.unshift(tok) end return result.unshift(tok) #if is_const else return result.unshift( MethNameToken.new(name,pos), #insert implicit parens right after tok ImplicitParamListStartToken.new( oldpos), ImplicitParamListEndToken.new( oldpos) ) end end #if next op is assignment (or comma in lvalue list) #then omit implicit parens assignment_coming=case nc=nextchar when ?=; not /^=[>=~]$/===readahead(2) when ?,; comma_in_lvalue_list? when ?); last_context_not_implicit.lhs when ?>,?<; /^(.)\1=$/===readahead(3) when ?*,?&; /^(.)\1?=/===readahead(3) when ?|; /^\|\|?=/===readahead(3) or #is it a goalpost? BlockParamListLhsContext===last_context_not_implicit && readahead(2)[1] != ?| when ?%,?/,?-,?+,?^; readahead(2)[1]== ?= end if (assignment_coming && !(lasttok===/^(\.|::)$/) or in_lvar_define_state) tok=VarNameToken.new(name,pos) if /[^a-z_0-9]$/i===name lexerror tok,"not a valid variable name: #{name}" elsif /^[a-z_]/===name and !(lasttok===/^(\.|::)$/) localvars[name]=true end return result.unshift(tok) end implicit_parens_to_emit= if assignment_coming @parsestack.push AssignmentContext.new(nil) if nc==?% or nc==?/ 0 else case nc when nil: 2 when ?!; readahead(2)=='!=' ? 2 : 1 when NEVERSTARTPARAMLISTFIRST (NEVERSTARTPARAMLISTWORDS===readahead(NEVERSTARTPARAMLISTMAXLEN)) ? 2 : 1 when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~; 1 when ?{ maybe_local=false x=2 x-=1 if /\A(return|break|next)\Z/===name and !(KeywordToken===oldlast and oldlast===/\A(\.|::)\Z/) x when ?(; maybe_local=false; !(ws_toks.empty? or lasttok===/^(\.|::)$/)? 1 : 0 when ?},?],?),?;,?^, ?|, ?>, ?,, ?., ?=; 2 when ?+, ?-, ?*, ?&, ?%, ?/; (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3 when ?:,??; next2=readahead(2); WHSPLF[next2[1].chr] || next2=='::' ? 2 : 3 # when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3 when ?<; (ws_toks.empty? || readahead(3)[/^<<["'`a-zA-Z_0-9-]/]) ? 3 : 2 when ?[; ws_toks.empty? ? 2 : 3 when ?\\, ?\s, ?\t, ?\n, ?\r, ?\v, ?#; raise 'failure' else raise "unknown char after ident: #{nc=nextchar ? nc.chr : "<>"}" end end if is_const and implicit_parens_to_emit==3 then implicit_parens_to_emit=1 end tok=if maybe_local and implicit_parens_to_emit>=2 implicit_parens_to_emit=0 VarNameToken else MethNameToken end.new(name,pos) case implicit_parens_to_emit when 2; result.unshift ImplicitParamListStartToken.new(oldpos), ImplicitParamListEndToken.new(oldpos) when 1,3; arr,pass=*param_list_coming_with_2_or_more_params? result.push( *arr ) unless pass result.unshift ImplicitParamListStartToken.new(oldpos) @parsestack.push ParamListContextNoParen.new(@linenum) end when 0; #do nothing else raise 'invalid value of implicit_parens_to_emit' end return result.unshift(tok) # 'ok:' # 'if unless while until {' # '\n (unescaped) and or' # 'then else elsif rescue ensure (illegal in value context)' # 'need to pop noparen from parsestack on these tokens: (in operator context)' # 'not ok:' # 'not (but should it be?)' end #----------------------------------- def param_list_coming_with_2_or_more_params? WHSPCHARS[prevchar] && (?(==nextchar) or return [[],false] basesize=@parsestack.size result=[get1token] pass=loop{ tok=get1token result<AssignmentRhsListEndToken, ParamListContextNoParen=>ImplicitParamListEndToken, WhenParamListContext=>KwParamListEndToken, RescueSMContext=>KwParamListEndToken } def abort_noparens!(str='') #assert @moretokens.empty? result=[] while klass=CONTEXT2ENDTOK[@parsestack.last.class] result << klass.new(input_position-str.length) break if RescueSMContext===@parsestack.last @parsestack.pop end return result end if false #no longer used #----------------------------------- def abort_1_noparen!(offs=0) assert @moretokens.empty? result=[] while AssignmentRhsContext===@parsestack.last @parsestack.pop result << AssignmentRhsListEndToken.new(input_position-offs) end ParamListContextNoParen===@parsestack.last or lexerror huh,'{} with no matching callsite' @parsestack.pop result << ImplicitParamListEndToken.new(input_position-offs) return result end end #----------------------------------- #parse keywords now, to prevent confusion over bare symbols #and match end with corresponding preceding def or class or whatever. #if arg is not a keyword, the block is called def parse_keywords(str,offset) assert @moretokens.empty? result=[KeywordToken.new(str,offset)] case str when "end" result.unshift(*abort_noparens!(str)) @parsestack.last.see self,:semi #sorta hacky... should make an :end event instead? =begin not needed? if ExpectDoOrNlContext===@parsestack.last @parsestack.pop assert @parsestack.last.starter[/^(while|until|for)$/] end =end WantsEndContext===@parsestack.last or lexerror result.last, 'unbalanced end' ctx=@parsestack.pop start,line=ctx.starter,ctx.linenum BEGINWORDS===start or lexerror result.last, "end does not match #{start or "nil"}" /^(do)$/===start and localvars.end_block /^(class|module|def)$/===start and @localvars_stack.pop when "class","module" result.first.has_end! @parsestack.push WantsEndContext.new(str,@linenum) @localvars_stack.push SymbolTable.new when "if","unless" #could be infix form without end if after_nonid_op?{false} #prefix form result.first.has_end! @parsestack.push WantsEndContext.new(str,@linenum) else #infix form result.unshift(*abort_noparens!(str)) end when "begin","case" result.first.has_end! @parsestack.push WantsEndContext.new(str,@linenum) when "while","until" #could be infix form without end if after_nonid_op?{false} #prefix form result.first.has_end! @parsestack.push WantsEndContext.new(str,@linenum) expect_do_or_end_or_nl! str else #infix form result.unshift(*abort_noparens!(str)) end when "for" result.first.has_end! result.push KwParamListStartToken.new(offset+str.length) # corresponding EndToken emitted leaving ForContext ("in" branch, below) @parsestack.push WantsEndContext.new(str,@linenum) #expect_do_or_end_or_nl! str #handled by ForSMContext now @parsestack.push ForSMContext.new(@linenum) when "do" result.unshift(*abort_noparens!(str)) if ExpectDoOrNlContext===@parsestack.last @parsestack.pop assert WantsEndContext===@parsestack.last else result.last.has_end! @parsestack.push WantsEndContext.new(str,@linenum) localvars.start_block block_param_list_lookahead end when "def" result.first.has_end! @parsestack.push WantsEndContext.new("def",@linenum) @localvars_stack.push SymbolTable.new safe_recurse { |aa| @last_operative_token=KeywordToken.new "def" #hack result.concat ignored_tokens #read an expr like a.b.c or a::b::c #or (expr).b.c if nextchar==?( #look for optional parenthesised head old_size=@parsestack.size parencount=0 begin tok=get1token case tok when/^\($/.token_pat then parencount+=1 when/^\)$/.token_pat then parencount-=1 end EoiToken===tok and lexerror tok, "eof in def header" result<=@parsestack.size and break #next token is a local var name #(or the one after that if unary ops present) #result.concat ignored_tokens if expect_name case tok when IgnoreToken #, /^[A-Z]/ #do nothing when /^,$/.token_pat #hack when VarNameToken assert@defining_lvar @defining_lvar=false assert((not @last_operative_token===',')) when /^[&*]$/.token_pat #unary form... #a NoWsToken is also expected... read it now result.concat maybe_no_ws_token #not needed? @last_operative_token=KeywordToken.new ',' else lexerror tok,"unfamiliar var name '#{tok}'" end elsif /^,$/.token_pat===tok and normal_comma_level+1==@parsestack.size and AssignmentRhsContext===@parsestack.last #seeing comma here should end implicit rhs started within the param list result[-1,0]=AssignmentRhsListEndToken.new(tok.offset) @parsestack.pop end end @defining_lvar=false assert(@parsestack.size <= old_parsestack_size) assert(endingblock[tok]) #hack: force next token to look like start of a #new stmt, if the last ignored_tokens #call above did not find a newline #(just in case the next token parsed #happens to call quote_expected? or after_nonid_op) result.concat ignored_tokens if nextchar.chr[/[iuw\/<|>+\-*&%?:]/] and !(NewlineToken===@last_operative_token) and !(/^(end|;)$/===@last_operative_token) @last_operative_token=KeywordToken.new ';' result<< get1token end } return result end #----------------------------------- #handle % in ruby code. is it part of fancy quote or a modulo operator? def percent(ch) if quote_expected? ch fancy_quote ch else biop ch end end #----------------------------------- #handle * & in ruby code. is unary or binary operator? def star_or_amp(ch) assert('*&'[ch]) want_unary=unary_op_expected? ch result=(quadriop ch) if want_unary #readahead(2)[1..1][/[\s\v#\\]/] or #not needed? assert OperatorToken===result result.unary=true #result should distinguish unary+binary *& WHSPLF[nextchar.chr] or @moretokens << NoWsToken.new(input_position) end result end #----------------------------------- #handle ? in ruby code. is it part of ?..: or a character literal? def char_literal_or_op(ch) if colon_quote_expected? ch getchar NumberToken.new getchar_maybe_escape else @parsestack.push TernaryContext.new(@linenum) KeywordToken.new getchar #operator end end #----------------------------------- def regex_or_div(ch) #space after slash always means / operator, rather than regex start if after_nonid_op?{ !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/\s}] } return regex(ch) else #/ is operator result=getchar if eat_next_if(?=) result << '=' end return(operator_or_methname_token result) end end #----------------------------------- #return true if last tok corresponds to a variable or constant, #false if its for a method, nil for something else #we assume it is a valid token with a correctly formed name. #...should really be called was_var_name def is_var_name? (tok=@last_operative_token) s=tok.to_s case s when /[^a-z_0-9]$/i; false when /^[a-z_]/; localvars===s or VARLIKE_KEYWORDS===s when /^[A-Z]/; VarNameToken===tok when /^[@$<]/; true else raise "not var or method name: #{s}" end end #----------------------------------- def colon_quote_expected?(ch) #yukko hack assert ':?'[ch] readahead(2)[/^(\?[^#{WHSPLF}]|:[^\s\r\n\t\f\v :])$/o] or return false after_nonid_op? { #possible func-call as operator not is_var_name? and if ch==':' not TernaryContext===@parsestack.last else !readahead(3)[/^\?[a-z0-9_]{2}/i] end } end #----------------------------------- def symbol_or_op(ch) startpos= input_position qe= colon_quote_expected?(ch) lastchar=prevchar eat_next_if(ch[0]) or raise "needed: "+ch #handle quoted symbols like :"foobar", :"[]" qe and return symbol(':') #look for another colon; return single : if not found unless eat_next_if(?:) #cancel implicit contexts... @moretokens.push(*abort_noparens!(':')) #end ternary context, if any @parsestack.last.see self,:colon TernaryContext===@parsestack.last and @parsestack.pop #should be in the context's see handler if ExpectDoOrNlContext===@parsestack.last #should be in the context's see handler @parsestack.pop assert @parsestack.last.starter[/^(while|until|for)$/] end @moretokens.push KeywordToken.new(':',startpos) return @moretokens.shift end #we definately found a :: colon2=KeywordToken.new( '::',startpos) lasttok=@last_operative_token assert !(String===lasttok) if (VarNameToken===lasttok or MethNameToken===lasttok) and lasttok===/^[$@a-zA-Z_]/ and !WHSPCHARS[lastchar] then @moretokens << colon2 result= NoWsToken.new(startpos) else result=colon2 end dot_rhs(colon2) return result end #----------------------------------- def symbol(notbare,couldbecallsite=!notbare) assert !couldbecallsite start= input_position notbare and start-=1 klass=(notbare ? SymbolToken : MethNameToken) #look for operators opmatches=readahead(3)[RUBYSYMOPERATORREX] result= opmatches ? read(opmatches.size) : case nc=nextchar when ?" then assert notbare;double_quote('"') when ?' then assert notbare;double_quote("'") when ?` then read(1) when ?@ then at_identifier.to_s when ?$ then dollar_identifier.to_s when ?_,?a..?z then identifier_as_string(?:) when ?A..?Z then result=identifier_as_string(?:) if @last_operative_token==='::' assert klass==MethNameToken /[A-Z_0-9]$/i===result and klass=VarNameToken end result else error= "unexpected char starting symbol: #{nc.chr}" end return lexerror(klass.new(result,start),error) end def merge_assignment_op_in_setter_callsites? false end #----------------------------------- def callsite_symbol(tok_to_errify) start= input_position #look for operators opmatches=readahead(3)[RUBYSYMOPERATORREX] return [opmatches ? read(opmatches.size) : case nc=nextchar when ?` then read(1) when ?_,?a..?z,?A..?Z then context=merge_assignment_op_in_setter_callsites? ? ?: : nc identifier_as_string(context) else @last_operative_token=KeywordToken.new(';') lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}") nil end, start ] end #----------------------------------- def here_header read(2)=='<<' or raise "parser insanity" dash=eat_next_if(?-) quote=eat_next_if( /['"`]/) if quote ender=til_charset(/[#{quote}]/) (quote==getchar) or return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc") else quote='"' ender=til_charset(/[^a-zA-Z0-9_]/) ender.length >= 1 or return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "invalid here header") end res= HerePlaceholderToken.new( dash, quote, ender ) @incomplete_here_tokens.push res #hack: normally this should just be in get1token #this fixup is necessary because the call the get1token below #makes a recursion. @last_operative_token=res safe_recurse { |a| assert(a.object_id==@moretokens.object_id) toks=[] begin #yech. #handle case of here header in a string inclusion, but #here body outside it. cnt=0 1.upto @parsestack.size do |i| case @parsestack[-i] when AssignmentRhsContext,ParamListContextNoParen,TopLevelContext else cnt+=1 end end if nextchar==?} and cnt==1 res.bodyclass=OutlinedHereBodyToken break end tok=get1token assert(a.equal?( @moretokens)) toks<" then operator_or_methname_token read(3) else quadriop(ch) end end #----------------------------------- def escnewline(ch) assert ch == '\\' pos= input_position result=getchar if nl=readnl result+=nl else error='illegal escape sequence' end @moretokens.unshift FileAndLineToken.new(@filename,ln=@linenum,input_position) optional_here_bodies lexerror EscNlToken.new(@filename,ln-1,result,pos), error end #----------------------------------- def optional_here_bodies #handle here bodies queued up by previous line #(we should be more compatible with dos/mac style newlines...) while tofill=@incomplete_here_tokens.shift tofill.string.offset= input_position loop { assert("\r\n"[prevchar]) #here body terminator? oldpos= input_position if tofill.dash til_charset(/[^#{WHSP}]/o) end break if eof? break if read(tofill.ender.size)==tofill.ender and readnl input_position_set oldpos if tofill.quote=="'" line=til_charset(/[\r\n]/)+readnl line.gsub! "\\\\", "\\" tofill.append line assert(line[-1..-1][/[\r\n]/]) else back1char #-1 to make newline char the next to read @linenum-=1 #retr evrything til next nl line=all_quote(INET_NL_REX, tofill.quote, INET_NL_REX) #(you didn't know all_quote could take a regex, did you?) #get rid of fals that otherwise appear to be in the middle of #a string (and are emitted out of order) fal=@moretokens.pop assert FileAndLineToken===fal || fal.nil? back1char @linenum-=1 assert("\r\n"[nextchar.chr]) tofill.append_token line tofill.append readnl end } assert(eof? || "\r\n"[prevchar]) tofill.unsafe_to_use=false tofill.line=@linenum-1 @moretokens.push \ tofill.bodyclass.new(tofill), FileAndLineToken.new(@filename,@linenum,input_position) end end #----------------------------------- def newline(ch) assert("\r\n"[nextchar.chr]) #ordinary newline handling (possibly implicitly escaped) assert("\r\n"[nextchar.chr]) assert !@parsestack.empty? assert @moretokens.empty? result=if NewlineToken===@last_operative_token or #hack @last_operative_token===/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack !after_nonid_op?{false} then #hack-o-rama: probly cases left out above a= abort_noparens! ExpectDoOrNlContext===@parsestack.last and @parsestack.pop assert !@parsestack.empty? @parsestack.last.see self,:semi a << super(ch) @moretokens.replace a+@moretokens @moretokens.shift else offset= input_position nl=readnl @moretokens << FileAndLineToken.new(@filename,@linenum,input_position) EscNlToken.new(@filename,@linenum-1,nl,offset) #WsToken.new ' ' #why? #should be "\\\n" ? end optional_here_bodies start_of_line_directives return result end #----------------------------------- EQBEGIN=%r/^=begin[ \t\v\r\n\f]$/ EQBEGINLENGTH=7 EQEND='=end' EQENDLENGTH=4 ENDMARKER=/^__END__[\r\n]?\Z/ ENDMARKERLENGTH=8 def start_of_line_directives #handle =begin...=end (at start of a line) while EQBEGIN===readahead(EQBEGINLENGTH) startpos= input_position more= read(EQBEGINLENGTH-1) #get =begin begin eof? and raise "eof before =end" more<].include?(ch)) result=getchar + (eat_next_if(ch)or'') if eat_next_if(?=) result << ?= end return operator_or_methname_token(result) end #----------------------------------- def biop(ch) #match /%=?/ (% or %=) assert(ch[/^[%^~]$/]) result=getchar if eat_next_if(?=) result <|~|==?)?/ (= or == or =~ or === or =>) offset= input_position str=getchar assert str=='=' c=(eat_next_if(/[~=>]/)or'') str << c result= operator_or_methname_token( str,offset) case c when '=': str<< (eat_next_if(?=)or'') when '>': unless ParamListContextNoParen===@parsestack.last @moretokens.unshift result @moretokens.unshift( *abort_noparens!("=>")) result=@moretokens.shift end @parsestack.last.see self,:arrow when '': #record local variable definitions @parsestack.push AssignmentRhsContext.new(@linenum) @moretokens.unshift AssignmentRhsListStartToken.new( offset+1) end return result end #----------------------------------- def exclam(ch) #match /![~=]?/ (! or != or !~) assert nextchar==?! result=getchar k=eat_next_if(/[~=]/) if k result+=k else WHSPLF[nextchar.chr] or @moretokens << NoWsToken.new(input_position) end return KeywordToken.new(result, input_position-result.size) #result should distinguish unary ! end #----------------------------------- def dot(ch) str='' eat_next_if(?.) or raise "lexer confusion" #three lumps of sugar or two? eat_next_if(?.) and return KeywordToken.new(eat_next_if(?.)? "..." : "..") #else saw just single . #match a valid ruby id after the dot result= KeywordToken.new( ".") dot_rhs(result) return result end #----------------------------------- def dot_rhs(prevtok) safe_recurse { |a| @last_operative_token=prevtok aa= ignored_tokens tok,pos=callsite_symbol(prevtok) tok and aa.push(*var_or_meth_name(tok,prevtok,pos)) a.unshift(*aa) } end #----------------------------------- def back_quote(ch=nil) if @last_operative_token===/^(def|::|\.)$/ oldpos= input_position MethNameToken.new(eat_next_if(?`), oldpos) else double_quote(ch) end end if false #----------------------------------- def comment(str) result="" #loop{ result<