class RubyLexer attr_accessor :command_start attr_accessor :cmdarg attr_accessor :cond attr_accessor :tern attr_accessor :nest ESC_RE = /\\([0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|[^0-7xMCc])/ ## # What version of ruby to parse. 18 and 19 are the only valid values # currently supported. attr_accessor :version # Additional context surrounding tokens that both the lexer and # grammar use. attr_reader :lex_state attr_accessor :lex_strterm attr_accessor :parser # HACK for very end of lexer... *sigh* # Stream of data that yylex examines. attr_reader :src # Last token read via yylex. attr_accessor :token attr_accessor :string_buffer # Value of last token which had a value associated with it. attr_accessor :yacc_value # What handles warnings attr_accessor :warnings attr_accessor :space_seen EOF = :eof_haha! # ruby constants for strings (should this be moved somewhere else?) STR_FUNC_BORING = 0x00 STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP STR_FUNC_EXPAND = 0x02 STR_FUNC_REGEXP = 0x04 STR_FUNC_QWORDS = 0x08 STR_FUNC_SYMBOL = 0x10 STR_FUNC_INDENT = 0x20 # <<-HEREDOC STR_SQUOTE = STR_FUNC_BORING STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND STR_SSYM = STR_FUNC_SYMBOL STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND TOKENS = { "!" => :tBANG, "!=" => :tNEQ, "!~" => :tNMATCH, "," => :tCOMMA, ".." => :tDOT2, "..." => :tDOT3, "=" => :tEQL, "==" => :tEQ, "===" => :tEQQ, "=>" => :tASSOC, "=~" => :tMATCH, "->" => :tLAMBDA, } # How the parser advances to the next token. # # @return true if not at end of file (EOF). def advance r = yylex self.token = r raise "yylex returned nil" unless r return RubyLexer::EOF != r end def arg_ambiguous self.warning("Ambiguous first argument. make sure.") end def comments c = @comments.join @comments.clear c end def expr_beg_push val cond.push false cmdarg.push false self.lex_state = :expr_beg self.yacc_value = val end def fix_arg_lex_state self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot :expr_arg else :expr_beg end end def heredoc here # 63 lines _, eos, func, last_line = here indent = (func & STR_FUNC_INDENT) != 0 expand = (func & STR_FUNC_EXPAND) != 0 eos_re = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/ err_msg = "can't match #{eos_re.inspect} anywhere in " rb_compile_error err_msg if src.eos? if src.beginning_of_line? && src.scan(eos_re) then src.unread_many last_line # TODO: figure out how to remove this self.yacc_value = eos return :tSTRING_END end self.string_buffer = [] if expand then case when src.scan(/#[$@]/) then src.pos -= 1 # FIX omg stupid self.yacc_value = src.matched return :tSTRING_DVAR when src.scan(/#[{]/) then self.yacc_value = src.matched return :tSTRING_DBEG when src.scan(/#/) then string_buffer << '#' end until src.check(eos_re) do c = tokadd_string func, "\n", nil rb_compile_error err_msg if c == RubyLexer::EOF if c != "\n" then self.yacc_value = string_buffer.join.delete("\r") return :tSTRING_CONTENT else string_buffer << src.scan(/\n/) end rb_compile_error err_msg if src.eos? end else until src.check(eos_re) do string_buffer << src.scan(/.*(\n|\z)/) rb_compile_error err_msg if src.eos? end end self.lex_strterm = [:heredoc, eos, func, last_line] self.yacc_value = string_buffer.join.delete("\r") return :tSTRING_CONTENT end def heredoc_identifier # 51 lines term, func = nil, STR_FUNC_BORING self.string_buffer = [] case when src.scan(/(-?)(['"`])(.*?)\2/) then term = src[2] unless src[1].empty? then func |= STR_FUNC_INDENT end func |= case term when "\'" then STR_SQUOTE when '"' then STR_DQUOTE else STR_XQUOTE end string_buffer << src[3] when src.scan(/-?(['"`])(?!\1*\Z)/) then rb_compile_error "unterminated here document identifier" when src.scan(/(-?)(\w+)/) then term = '"' func |= STR_DQUOTE unless src[1].empty? then func |= STR_FUNC_INDENT end string_buffer << src[2] else return nil end if src.scan(/.*\n/) then # TODO: think about storing off the char range instead line = src.matched src.extra_lines_added += 1 else line = nil end self.lex_strterm = [:heredoc, string_buffer.join, func, line] if term == '`' then self.yacc_value = "`" return :tXSTRING_BEG else self.yacc_value = "\"" return :tSTRING_BEG end end def initialize v = 18 self.version = v self.cond = RubyParserStuff::StackState.new(:cond) self.cmdarg = RubyParserStuff::StackState.new(:cmdarg) self.tern = RubyParserStuff::StackState.new(:tern) self.nest = 0 @comments = [] reset end def int_with_base base rb_compile_error "Invalid numeric format" if src.matched =~ /__/ self.yacc_value = src.matched.to_i(base) return :tINTEGER end def lex_state= o # warn "wtf lex_state = #{o.inspect}" raise "wtf\?" unless Symbol === o @lex_state = o end attr_writer :lineno def lineno @lineno ||= src.lineno end ## # Parse a number from the input stream. # # @param c The first character of the number. # @return A int constant wich represents a token. def parse_number self.lex_state = :expr_end case when src.scan(/[+-]?0[xbd]\b/) then rb_compile_error "Invalid numeric format" when src.scan(/[+-]?0x[a-f0-9_]+/i) then int_with_base(16) when src.scan(/[+-]?0b[01_]+/) then int_with_base(2) when src.scan(/[+-]?0d[0-9_]+/) then int_with_base(10) when src.scan(/[+-]?0[Oo]?[0-7_]*[89]/) then rb_compile_error "Illegal octal digit." when src.scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then int_with_base(8) when src.scan(/[+-]?[\d_]+_(e|\.)/) then rb_compile_error "Trailing '_' in number." when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then number = src.matched if number =~ /__/ then rb_compile_error "Invalid numeric format" end self.yacc_value = number.to_f :tFLOAT when src.scan(/[+-]?0\b/) then int_with_base(10) when src.scan(/[+-]?[\d_]+\b/) then int_with_base(10) else rb_compile_error "Bad number format" end end def parse_quote # 58 lines beg, nnd, short_hand, c = nil, nil, false, nil if src.scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}). rb_compile_error "unknown type of %string" if src.matched_size == 2 c, beg, short_hand = src.matched, src.getch, false else # Short-hand (e.g. %{, %., %!, etc) c, beg, short_hand = 'Q', src.getch, true end if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then rb_compile_error "unterminated quoted string meets end of file" end # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting? nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg] nnd, beg = beg, "\0" if nnd.nil? token_type, self.yacc_value = nil, "%#{c}#{beg}" token_type, string_type = case c when 'Q' then ch = short_hand ? nnd : c + beg self.yacc_value = "%#{ch}" [:tSTRING_BEG, STR_DQUOTE] when 'q' then [:tSTRING_BEG, STR_SQUOTE] when 'W' then src.scan(/\s*/) [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS] when 'w' then src.scan(/\s*/) [:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS] when 'x' then [:tXSTRING_BEG, STR_XQUOTE] when 'r' then [:tREGEXP_BEG, STR_REGEXP] when 's' then self.lex_state = :expr_fname [:tSYMBEG, STR_SSYM] end rb_compile_error "Bad %string type. Expected [Qqwxr\W], found '#{c}'." if token_type.nil? self.lex_strterm = [:strterm, string_type, nnd, beg] return token_type end def parse_string(quote) # 65 lines _, string_type, term, open = quote space = false # FIX: remove these func = string_type paren = open term_re = Regexp.escape term qwords = (func & STR_FUNC_QWORDS) != 0 regexp = (func & STR_FUNC_REGEXP) != 0 expand = (func & STR_FUNC_EXPAND) != 0 unless func then # FIX: impossible, prolly needs == 0 self.lineno = nil return :tSTRING_END end space = true if qwords and src.scan(/\s+/) if self.nest == 0 && src.scan(/#{term_re}/) then if qwords then quote[1] = nil return :tSPACE elsif regexp then self.yacc_value = self.regx_options self.lineno = nil return :tREGEXP_END else self.yacc_value = term self.lineno = nil return :tSTRING_END end end if space then return :tSPACE end self.string_buffer = [] if expand case when src.scan(/#(?=[$@])/) then return :tSTRING_DVAR when src.scan(/#[{]/) then return :tSTRING_DBEG when src.scan(/#/) then string_buffer << '#' end end if tokadd_string(func, term, paren) == RubyLexer::EOF then rb_compile_error "unterminated string meets end of file" end self.yacc_value = string_buffer.join return :tSTRING_CONTENT end def rb_compile_error msg msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}" raise SyntaxError, msg end def read_escape # 51 lines case when src.scan(/\\/) then # Backslash '\\' when src.scan(/n/) then # newline "\n" when src.scan(/t/) then # horizontal tab "\t" when src.scan(/r/) then # carriage-return "\r" when src.scan(/f/) then # form-feed "\f" when src.scan(/v/) then # vertical tab "\13" when src.scan(/a/) then # alarm(bell) "\007" when src.scan(/e/) then # escape "\033" when src.scan(/b/) then # backspace "\010" when src.scan(/s/) then # space " " when src.scan(/[0-7]{1,3}/) then # octal constant src.matched.to_i(8).chr when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant src[1].to_i(16).chr when src.check(/M-\\[\\MCc]/) then src.scan(/M-\\/) # eat it c = self.read_escape c[0] = (c[0].ord | 0x80).chr c when src.scan(/M-(.)/) then c = src[1] c[0] = (c[0].ord | 0x80).chr c when src.check(/(C-|c)\\[\\MCc]/) then src.scan(/(C-|c)\\/) # eat it c = self.read_escape c[0] = (c[0].ord & 0x9f).chr c when src.scan(/C-\?|c\?/) then 127.chr when src.scan(/(C-|c)(.)/) then c = src[2] c[0] = (c[0].ord & 0x9f).chr c when src.scan(/[McCx0-9]/) || src.eos? then rb_compile_error("Invalid escape character syntax") else src.getch end end def regx_options # 15 lines good, bad = [], [] if src.scan(/[a-z]+/) then good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ } end unless bad.empty? then rb_compile_error("unknown regexp option%s - %s" % [(bad.size > 1 ? "s" : ""), bad.join.inspect]) end return good.join end def reset self.command_start = true self.lex_strterm = nil self.token = nil self.yacc_value = nil @src = nil @lex_state = nil end def ruby18 Ruby18Parser === parser end def src= src raise "bad src: #{src.inspect}" unless String === src @src = RPStringScanner.new(src) end def tokadd_escape term # 20 lines case when src.scan(/\\\n/) then # just ignore when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then self.string_buffer << src.matched when src.scan(/\\([MC]-|c)(?=\\)/) then self.string_buffer << src.matched self.tokadd_escape term when src.scan(/\\([MC]-|c)(.)/) then self.string_buffer << src.matched when src.scan(/\\[McCx]/) then rb_compile_error "Invalid escape character syntax" when src.scan(/\\(.)/m) then self.string_buffer << src.matched else rb_compile_error "Invalid escape character syntax" end end def tokadd_string(func, term, paren) # 105 lines qwords = (func & STR_FUNC_QWORDS) != 0 escape = (func & STR_FUNC_ESCAPE) != 0 expand = (func & STR_FUNC_EXPAND) != 0 regexp = (func & STR_FUNC_REGEXP) != 0 symbol = (func & STR_FUNC_SYMBOL) != 0 paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren)) term_re = Regexp.new(Regexp.escape(term)) until src.eos? do c = nil handled = true case when self.nest == 0 && src.scan(term_re) then src.pos -= 1 break when paren_re && src.scan(paren_re) then self.nest += 1 when src.scan(term_re) then self.nest -= 1 when qwords && src.scan(/\s/) then src.pos -= 1 break when expand && src.scan(/#(?=[\$\@\{])/) then src.pos -= 1 break when expand && src.scan(/#(?!\n)/) then # do nothing when src.check(/\\/) then case when qwords && src.scan(/\\\n/) then string_buffer << "\n" next when qwords && src.scan(/\\\s/) then c = ' ' when expand && src.scan(/\\\n/) then next when regexp && src.check(/\\/) then self.tokadd_escape term next when expand && src.scan(/\\/) then c = self.read_escape when src.scan(/\\\n/) then # do nothing when src.scan(/\\\\/) then string_buffer << '\\' if escape c = '\\' when src.scan(/\\/) then unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then string_buffer << "\\" end else handled = false end else handled = false end # case unless handled then t = Regexp.escape term x = Regexp.escape(paren) if paren && paren != "\000" re = if qwords then /[^#{t}#{x}\#\0\\\n\ ]+|./ # |. to pick up whatever else /[^#{t}#{x}\#\0\\]+|./ end src.scan re c = src.matched rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/ end # unless handled c ||= src.matched string_buffer << c end # until c ||= src.matched c = RubyLexer::EOF if src.eos? return c end def unescape s r = { "a" => "\007", "b" => "\010", "e" => "\033", "f" => "\f", "n" => "\n", "r" => "\r", "s" => " ", "t" => "\t", "v" => "\13", "\\" => '\\', "\n" => "", "C-\?" => 127.chr, "c\?" => 127.chr, }[s] return r if r case s when /^[0-7]{1,3}/ then $&.to_i(8).chr when /^x([0-9a-fA-F]{1,2})/ then $1.to_i(16).chr when /^M-(.)/ then ($1[0].ord | 0x80).chr when /^(C-|c)(.)/ then ($2[0].ord & 0x9f).chr when /^[McCx0-9]/ then rb_compile_error("Invalid escape character syntax") else s end end def warning s # do nothing for now end ## # Returns the next token. Also sets yy_val is needed. # # @return Description of the Returned Value def yylex # 826 lines c = '' self.space_seen = false command_state = false src = self.src self.token = nil self.yacc_value = nil return yylex_string if lex_strterm command_state = self.command_start self.command_start = false last_state = lex_state loop do # START OF CASE if src.scan(/[\ \t\r\f\v]/) then # \s - \n + \v self.space_seen = true next elsif src.check(/[^a-zA-Z]/) then if src.scan(/\n|#/) then self.lineno = nil c = src.matched if c == '#' then src.pos -= 1 while src.scan(/\s*#.*(\n+|\z)/) do @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '') end if src.eos? then return RubyLexer::EOF end end # Replace a string of newlines with a single one src.scan(/\n+/) if [:expr_beg, :expr_fname, :expr_dot, :expr_class].include? lex_state then next end self.command_start = true self.lex_state = :expr_beg return :tNL elsif src.scan(/[\]\)\}]/) then cond.lexpop cmdarg.lexpop self.lex_state = :expr_end self.yacc_value = src.matched result = { ")" => :tRPAREN, "]" => :tRBRACK, "}" => :tRCURLY }[src.matched] self.tern.lexpop if [:tRBRACK, :tRCURLY].include?(result) return result elsif src.scan(/\.\.\.?|,|![=~]?/) then self.lex_state = :expr_beg tok = self.yacc_value = src.matched return TOKENS[tok] elsif src.check(/\./) then if src.scan(/\.\d/) then rb_compile_error "no . floating literal anymore put 0 before dot" elsif src.scan(/\./) then self.lex_state = :expr_dot self.yacc_value = "." return :tDOT end elsif src.scan(/\(/) then result = if ruby18 then yylex_paren18 else yylex_paren19 end self.expr_beg_push "(" return result elsif src.check(/\=/) then if src.scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then self.fix_arg_lex_state tok = self.yacc_value = src.matched return TOKENS[tok] elsif src.scan(/\=begin(?=\s)/) then # @comments << '=' << src.matched @comments << src.matched unless src.scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then @comments.clear rb_compile_error("embedded document meets end of file") end @comments << src.matched next else raise "you shouldn't be able to get here" end elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/o) then self.yacc_value = src.matched[1..-2].gsub(ESC_RE) { unescape $1 } self.lex_state = :expr_end return :tSTRING elsif src.scan(/\"/) then # FALLBACK self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\0"] # TODO: question this self.yacc_value = "\"" return :tSTRING_BEG elsif src.scan(/\@\@?\w*/) then self.token = src.matched rb_compile_error "`#{token}` is not allowed as a variable name" if token =~ /\@\d/ return process_token(command_state) elsif src.scan(/\:\:/) then if is_beg? || lex_state == :expr_class || is_space_arg? then self.lex_state = :expr_beg self.yacc_value = "::" return :tCOLON3 end self.lex_state = :expr_dot self.yacc_value = "::" return :tCOLON2 elsif lex_state != :expr_end && lex_state != :expr_endarg && src.scan(/:([a-zA-Z_]\w*(?:[?!]|=(?!>))?)/) then self.yacc_value = src[1] self.lex_state = :expr_end return :tSYMBOL elsif src.scan(/\:/) then # ?: / then / when if (lex_state == :expr_end || lex_state == :expr_endarg|| src.check(/\s/)) then self.lex_state = :expr_beg self.yacc_value = ":" return :tCOLON end case when src.scan(/\'/) then self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\0"] when src.scan(/\"/) then self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\0"] end self.lex_state = :expr_fname self.yacc_value = ":" return :tSYMBEG elsif src.check(/[0-9]/) then return parse_number elsif src.scan(/\[/) then result = src.matched if lex_state == :expr_fname || lex_state == :expr_dot then self.lex_state = :expr_arg case when src.scan(/\]\=/) then self.yacc_value = "[]=" return :tASET when src.scan(/\]/) then self.yacc_value = "[]" return :tAREF else rb_compile_error "unexpected '['" end elsif lex_state == :expr_beg || lex_state == :expr_mid then self.tern.push false result = :tLBRACK elsif lex_state.is_argument && space_seen then self.tern.push false result = :tLBRACK else result = :tLBRACK2 end self.expr_beg_push "[" return result elsif src.scan(/\'(\\.|[^\'])*\'/) then self.yacc_value = src.matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'") self.lex_state = :expr_end return :tSTRING elsif src.check(/\|/) then if src.scan(/\|\|\=/) then self.lex_state = :expr_beg self.yacc_value = "||" return :tOP_ASGN elsif src.scan(/\|\|/) then self.lex_state = :expr_beg self.yacc_value = "||" return :tOROP elsif src.scan(/\|\=/) then self.lex_state = :expr_beg self.yacc_value = "|" return :tOP_ASGN elsif src.scan(/\|/) then self.fix_arg_lex_state self.yacc_value = "|" return :tPIPE end elsif src.scan(/\{/) then if defined?(@hack_expects_lambda) && @hack_expects_lambda @hack_expects_lambda = false self.lex_state = :expr_beg return :tLAMBEG end result = if lex_state.is_argument || lex_state == :expr_end then :tLCURLY # block (primary) elsif lex_state == :expr_endarg then :tLBRACE_ARG # block (expr) else self.tern.push false :tLBRACE # hash end self.expr_beg_push "{" self.command_start = true unless result == :tLBRACE return result elsif src.scan(/->/) then @hack_expects_lambda = true self.lex_state = :expr_arg return :tLAMBDA elsif src.scan(/[+-]/) then sign = src.matched utype, type = if sign == "+" then [:tUPLUS, :tPLUS] else [:tUMINUS, :tMINUS] end if lex_state == :expr_fname || lex_state == :expr_dot then self.lex_state = :expr_arg if src.scan(/@/) then self.yacc_value = "#{sign}@" return utype else self.yacc_value = sign return type end end if src.scan(/\=/) then self.lex_state = :expr_beg self.yacc_value = sign return :tOP_ASGN end if (lex_state == :expr_beg || lex_state == :expr_mid || (lex_state.is_argument && space_seen && !src.check(/\s/))) then if lex_state.is_argument then arg_ambiguous end self.lex_state = :expr_beg self.yacc_value = sign if src.check(/\d/) then if utype == :tUPLUS then return self.parse_number else return :tUMINUS_NUM end end return utype end self.lex_state = :expr_beg self.yacc_value = sign return type elsif src.check(/\*/) then if src.scan(/\*\*=/) then self.lex_state = :expr_beg self.yacc_value = "**" return :tOP_ASGN elsif src.scan(/\*\*/) then self.yacc_value = "**" self.fix_arg_lex_state return :tPOW elsif src.scan(/\*\=/) then self.lex_state = :expr_beg self.yacc_value = "*" return :tOP_ASGN elsif src.scan(/\*/) then result = if lex_state.is_argument && space_seen && src.check(/\S/) then warning("`*' interpreted as argument prefix") :tSTAR elsif lex_state == :expr_beg || lex_state == :expr_mid then :tSTAR else :tSTAR2 end self.yacc_value = "*" self.fix_arg_lex_state return result end elsif src.check(/\/) then self.fix_arg_lex_state self.yacc_value = "<=>" return :tCMP elsif src.scan(/\<\=/) then self.fix_arg_lex_state self.yacc_value = "<=" return :tLEQ elsif src.scan(/\<\<\=/) then self.fix_arg_lex_state self.lex_state = :expr_beg self.yacc_value = "\<\<" return :tOP_ASGN elsif src.scan(/\<\/) then if src.scan(/\>\=/) then self.fix_arg_lex_state self.yacc_value = ">=" return :tGEQ elsif src.scan(/\>\>=/) then self.fix_arg_lex_state self.lex_state = :expr_beg self.yacc_value = ">>" return :tOP_ASGN elsif src.scan(/\>\>/) then self.fix_arg_lex_state self.yacc_value = ">>" return :tRSHFT elsif src.scan(/\>/) then self.fix_arg_lex_state self.yacc_value = ">" return :tGT end elsif src.scan(/\`/) then self.yacc_value = "`" case lex_state when :expr_fname then self.lex_state = :expr_end return :tBACK_REF2 when :expr_dot then self.lex_state = if command_state then :expr_cmdarg else :expr_arg end return :tBACK_REF2 end self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\0"] return :tXSTRING_BEG elsif src.scan(/\?/) then if lex_state == :expr_end || lex_state == :expr_endarg then self.lex_state = :expr_beg self.tern.push true self.yacc_value = "?" return :tEH end if src.eos? then rb_compile_error "incomplete character syntax" end if src.check(/\s|\v/) then unless lex_state.is_argument then c2 = { " " => 's', "\n" => 'n', "\t" => 't', "\v" => 'v', "\r" => 'r', "\f" => 'f' }[src.matched] if c2 then warning("invalid character syntax; use ?\\" + c2) end end # ternary self.lex_state = :expr_beg self.tern.push true self.yacc_value = "?" return :tEH elsif src.check(/\w(?=\w)/) then # ternary, also self.lex_state = :expr_beg self.tern.push true self.yacc_value = "?" return :tEH end c = if src.scan(/\\/) then self.read_escape else src.getch end self.lex_state = :expr_end if version == 18 then self.yacc_value = c[0].ord & 0xff return :tINTEGER else self.yacc_value = c return :tSTRING end elsif src.check(/\&/) then if src.scan(/\&\&\=/) then self.yacc_value = "&&" self.lex_state = :expr_beg return :tOP_ASGN elsif src.scan(/\&\&/) then self.lex_state = :expr_beg self.yacc_value = "&&" return :tANDOP elsif src.scan(/\&\=/) then self.yacc_value = "&" self.lex_state = :expr_beg return :tOP_ASGN elsif src.scan(/&/) then result = if lex_state.is_argument && space_seen && !src.check(/\s/) then warning("`&' interpreted as argument prefix") :tAMPER elsif lex_state == :expr_beg || lex_state == :expr_mid then :tAMPER else :tAMPER2 end self.fix_arg_lex_state self.yacc_value = "&" return result end elsif src.scan(/\//) then if lex_state == :expr_beg || lex_state == :expr_mid then self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"] self.yacc_value = "/" return :tREGEXP_BEG end if src.scan(/\=/) then self.yacc_value = "/" self.lex_state = :expr_beg return :tOP_ASGN end if lex_state.is_argument && space_seen then unless src.scan(/\s/) then arg_ambiguous self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"] self.yacc_value = "/" return :tREGEXP_BEG end end self.fix_arg_lex_state self.yacc_value = "/" return :tDIVIDE elsif src.scan(/\^=/) then self.lex_state = :expr_beg self.yacc_value = "^" return :tOP_ASGN elsif src.scan(/\^/) then self.fix_arg_lex_state self.yacc_value = "^" return :tCARET elsif src.scan(/\;/) then self.command_start = true self.lex_state = :expr_beg self.yacc_value = ";" return :tSEMI elsif src.scan(/\~/) then if lex_state == :expr_fname || lex_state == :expr_dot then src.scan(/@/) end self.fix_arg_lex_state self.yacc_value = "~" return :tTILDE elsif src.scan(/\\/) then if src.scan(/\n/) then self.lineno = nil self.space_seen = true next end rb_compile_error "bare backslash only allowed before newline" elsif src.scan(/\%/) then if lex_state == :expr_beg || lex_state == :expr_mid then return parse_quote end if src.scan(/\=/) then self.lex_state = :expr_beg self.yacc_value = "%" return :tOP_ASGN end if lex_state.is_argument && space_seen && ! src.check(/\s/) then return parse_quote end self.fix_arg_lex_state self.yacc_value = "%" return :tPERCENT elsif src.check(/\$/) then if src.scan(/(\$_)(\w+)/) then self.lex_state = :expr_end self.token = src.matched return process_token(command_state) elsif src.scan(/\$_/) then self.lex_state = :expr_end self.token = src.matched self.yacc_value = src.matched return :tGVAR elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then self.lex_state = :expr_end self.yacc_value = src.matched return :tGVAR elsif src.scan(/\$([\&\`\'\+])/) then self.lex_state = :expr_end # Explicit reference to these vars as symbols... if last_state == :expr_fname then self.yacc_value = src.matched return :tGVAR else self.yacc_value = src[1].to_sym return :tBACK_REF end elsif src.scan(/\$([1-9]\d*)/) then self.lex_state = :expr_end if last_state == :expr_fname then self.yacc_value = src.matched return :tGVAR else self.yacc_value = src[1].to_i return :tNTH_REF end elsif src.scan(/\$0/) then self.lex_state = :expr_end self.token = src.matched return process_token(command_state) elsif src.scan(/\$\W|\$\z/) then # TODO: remove? self.lex_state = :expr_end self.yacc_value = "$" return "$" elsif src.scan(/\$\w+/) self.lex_state = :expr_end self.token = src.matched return process_token(command_state) end elsif src.check(/\_/) then if src.beginning_of_line? && src.scan(/\__END__(\n|\Z)/) then self.lineno = nil return RubyLexer::EOF elsif src.scan(/\_\w*/) then self.token = src.matched return process_token(command_state) end end end # END OF CASE if src.scan(/\004|\032|\000/) || src.eos? then # ^D, ^Z, EOF return RubyLexer::EOF else # alpha check if src.scan(/\W/) then rb_compile_error "Invalid char #{src.matched.inspect} in expression" end end self.token = src.matched if self.src.scan(/\w+/) return process_token(command_state) end end def yylex_paren18 self.command_start = true result = :tLPAREN2 if lex_state == :expr_beg || lex_state == :expr_mid then result = :tLPAREN elsif space_seen then if lex_state == :expr_cmdarg then result = :tLPAREN_ARG elsif lex_state == :expr_arg then self.tern.push false warning "don't put space before argument parentheses" end else self.tern.push false end result end def is_end? (lex_state == :expr_end || lex_state == :expr_endarg || lex_state == :expr_endfn) end def is_arg? lex_state == :expr_arg || lex_state == :expr_cmdarg end def is_beg? (lex_state == :expr_beg || lex_state == :expr_mid || lex_state == :expr_value || lex_state == :expr_class) end def is_space_arg? c = "x" is_arg? and space_seen and c !~ /\s/ end def yylex_paren19 if is_beg? then result = :tLPAREN elsif is_space_arg? then result = :tLPAREN_ARG else self.tern.push false result = :tLPAREN2 end # p :wtf_paren => [lex_state, space_seen, result] # HACK paren_nest++; # HACK: this is a mess, but it makes the tests pass, so suck it # (stolen from the 1.8 side) if lex_state == :expr_beg || lex_state == :expr_mid then # do nothing elsif space_seen then if lex_state == :expr_arg then self.tern.push false end else self.tern.push false end result end def process_token(command_state) token << src.matched if token =~ /^\w/ && src.scan(/[\!\?](?!=)/) result = nil last_state = lex_state case token when /^\$/ then self.lex_state, result = :expr_end, :tGVAR when /^@@/ then self.lex_state, result = :expr_end, :tCVAR when /^@/ then self.lex_state, result = :expr_end, :tIVAR else if token =~ /[!?]$/ then result = :tFID else if lex_state == :expr_fname then # ident=, not =~ => == or followed by => # TODO test lexing of a=>b vs a==>b if src.scan(/=(?:(?![~>=])|(?==>))/) then result = :tIDENTIFIER token << src.matched end end result ||= if token =~ /^[A-Z]/ then :tCONSTANT else :tIDENTIFIER end end unless self.tern.is_in_state if (lex_state == :expr_beg && (ruby18 || !command_state)) || lex_state == :expr_arg || lex_state == :expr_cmdarg then colon = src.scan(/:/) if colon && src.peek(1) != ":" src.unscan self.lex_state = :expr_beg src.scan(/:/) self.yacc_value = [token, src.lineno] return :tLABEL end src.unscan if colon end end unless ruby18 unless lex_state == :expr_dot then # See if it is a reserved word. keyword = if ruby18 then # REFACTOR need 18/19 lexer subclasses RubyParserStuff::Keyword.keyword18 token else RubyParserStuff::Keyword.keyword19 token end if keyword then state = lex_state self.lex_state = keyword.state self.yacc_value = [token, src.lineno] if state == :expr_fname then self.yacc_value = keyword.name return keyword.id0 end if keyword.id0 == :kDO then self.command_start = true return :kDO_COND if cond.is_in_state return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg return :kDO_BLOCK if state == :expr_endarg if defined?(@hack_expects_lambda) && @hack_expects_lambda @hack_expects_lambda = false return :kDO_LAMBDA end return :kDO end return keyword.id0 if state == :expr_beg or state == :expr_value self.lex_state = :expr_beg if keyword.id0 != keyword.id1 return keyword.id1 end end if (lex_state == :expr_beg || lex_state == :expr_mid || lex_state == :expr_dot || lex_state == :expr_arg || lex_state == :expr_cmdarg) then if command_state then self.lex_state = :expr_cmdarg else self.lex_state = :expr_arg end else self.lex_state = :expr_end end end self.yacc_value = token self.lex_state = :expr_end if last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar return result end def yylex_string # 23 lines token = if lex_strterm[0] == :heredoc then self.heredoc lex_strterm else self.parse_string lex_strterm end if token == :tSTRING_END || token == :tREGEXP_END then self.lineno = nil self.lex_strterm = nil self.lex_state = :expr_end end return token end end