require_relative 'irb/slex' module YARD module Parser::Ruby::Legacy # Legacy lexical tokenizer module. module RubyToken EXPR_BEG = :EXPR_BEG EXPR_MID = :EXPR_MID EXPR_END = :EXPR_END EXPR_ARG = :EXPR_ARG EXPR_FNAME = :EXPR_FNAME EXPR_DOT = :EXPR_DOT EXPR_CLASS = :EXPR_CLASS # Represents a token in the Ruby lexer class Token # @return [Integer] the line number in the file/stream the token is # located. attr_reader :line_no # @return [Integer] the character number in the file/stream the token # is located. attr_reader :char_no # @return [String] the token text value attr_reader :text # @return [Symbol] the lexical state at the token attr_accessor :lex_state # @private NO_TEXT = "??".freeze # Creates a new Token object # @param [Integer] line_no the line number to initialize the token to # @param [Integer] char_no the char number to initialize the token to def initialize(line_no, char_no) @line_no = line_no @char_no = char_no @text = NO_TEXT end # Chainable way to sets the text attribute # # @param [String] text the new text # @return [Token] this token object def set_text(text) @text = text self end end # Represents a block class TkBlockContents < Token def text; '...' end end # Represents an end statement class TkStatementEnd < Token def text; '' end end class TkNode < Token attr :node end # Represents whitespace class TkWhitespace < Token end # Represents a Ruby identifier class TkId < Token def initialize(line_no, char_no, name) super(line_no, char_no) @name = name end attr :name end # Represents a Ruby keyword class TkKW < TkId end # Represents a Ruby value class TkVal < Token def initialize(line_no, char_no, value = nil) super(line_no, char_no) set_text(value) end end class TkOp < Token def name self.class.op_name end end class TkOPASGN < TkOp def initialize(line_no, char_no, op) super(line_no, char_no) op = TkReading2Token[op] unless op.is_a?(Symbol) @op = op end attr :op end class TkUnknownChar < Token def initialize(line_no, char_no, _id) super(line_no, char_no) @name = char_no > 255 ? '?' : char_no.chr end attr :name end class TkError < Token end # @private def set_token_position(line, char) @prev_line_no = line @prev_char_no = char end # @private def Token(token, value = nil) # rubocop:disable Style/MethodName tk = nil case token when String, Symbol source = token.is_a?(String) ? TkReading2Token : TkSymbol2Token if (tk = source[token]).nil? raise "no key #{token}" end tk = Token(tk[0], value) else if token tk = if (token.ancestors & [TkId, TkVal, TkOPASGN, TkUnknownChar]).empty? token.new(@prev_line_no, @prev_char_no) else token.new(@prev_line_no, @prev_char_no, value) end end end tk end # @private TokenDefinitions = [ [:TkCLASS, TkKW, "class", EXPR_CLASS], [:TkMODULE, TkKW, "module", EXPR_BEG], [:TkDEF, TkKW, "def", EXPR_FNAME], [:TkUNDEF, TkKW, "undef", EXPR_FNAME], [:TkBEGIN, TkKW, "begin", EXPR_BEG], [:TkRESCUE, TkKW, "rescue", EXPR_MID], [:TkENSURE, TkKW, "ensure", EXPR_BEG], [:TkEND, TkKW, "end", EXPR_END], [:TkIF, TkKW, "if", EXPR_BEG, :TkIF_MOD], [:TkUNLESS, TkKW, "unless", EXPR_BEG, :TkUNLESS_MOD], [:TkTHEN, TkKW, "then", EXPR_BEG], [:TkELSIF, TkKW, "elsif", EXPR_BEG], [:TkELSE, TkKW, "else", EXPR_BEG], [:TkCASE, TkKW, "case", EXPR_BEG], [:TkWHEN, TkKW, "when", EXPR_BEG], [:TkWHILE, TkKW, "while", EXPR_BEG, :TkWHILE_MOD], [:TkUNTIL, TkKW, "until", EXPR_BEG, :TkUNTIL_MOD], [:TkFOR, TkKW, "for", EXPR_BEG], [:TkBREAK, TkKW, "break", EXPR_END], [:TkNEXT, TkKW, "next", EXPR_END], [:TkREDO, TkKW, "redo", EXPR_END], [:TkRETRY, TkKW, "retry", EXPR_END], [:TkIN, TkKW, "in", EXPR_BEG], [:TkDO, TkKW, "do", EXPR_BEG], [:TkRETURN, TkKW, "return", EXPR_MID], [:TkYIELD, TkKW, "yield", EXPR_END], [:TkSUPER, TkKW, "super", EXPR_END], [:TkSELF, TkKW, "self", EXPR_END], [:TkNIL, TkKW, "nil", EXPR_END], [:TkTRUE, TkKW, "true", EXPR_END], [:TkFALSE, TkKW, "false", EXPR_END], [:TkAND, TkKW, "and", EXPR_BEG], [:TkOR, TkKW, "or", EXPR_BEG], [:TkNOT, TkKW, "not", EXPR_BEG], [:TkIF_MOD, TkKW], [:TkUNLESS_MOD, TkKW], [:TkWHILE_MOD, TkKW], [:TkUNTIL_MOD, TkKW], [:TkALIAS, TkKW, "alias", EXPR_FNAME], [:TkDEFINED, TkKW, "defined?", EXPR_END], [:TklBEGIN, TkKW, "BEGIN", EXPR_END], [:TklEND, TkKW, "END", EXPR_END], [:Tk__LINE__, TkKW, "__LINE__", EXPR_END], [:Tk__FILE__, TkKW, "__FILE__", EXPR_END], [:TkIDENTIFIER, TkId], [:TkFID, TkId], [:TkGVAR, TkId], [:TkIVAR, TkId], [:TkCONSTANT, TkId], [:TkINTEGER, TkVal], [:TkFLOAT, TkVal], [:TkSYMBOL, TkVal], [:TkLABEL, TkVal], [:TkSTRING, TkVal], [:TkXSTRING, TkVal], [:TkREGEXP, TkVal], [:TkCOMMENT, TkVal], [:TkDSTRING, TkNode], [:TkDXSTRING, TkNode], [:TkDREGEXP, TkNode], [:TkNTH_REF, TkId], [:TkBACK_REF, TkId], [:TkUPLUS, TkOp, "+@"], [:TkUMINUS, TkOp, "-@"], [:TkPOW, TkOp, "**"], [:TkCMP, TkOp, "<=>"], [:TkEQ, TkOp, "=="], [:TkEQQ, TkOp, "==="], [:TkNEQ, TkOp, "!="], [:TkGEQ, TkOp, ">="], [:TkLEQ, TkOp, "<="], [:TkANDOP, TkOp, "&&"], [:TkOROP, TkOp, "||"], [:TkMATCH, TkOp, "=~"], [:TkNMATCH, TkOp, "!~"], [:TkDOT2, TkOp, ".."], [:TkDOT3, TkOp, "..."], [:TkAREF, TkOp, "[]"], [:TkASET, TkOp, "[]="], [:TkLSHFT, TkOp, "<<"], [:TkRSHFT, TkOp, ">>"], [:TkCOLON2, TkOp], [:TkCOLON3, TkOp], [:OPASGN, TkOp], # +=, -= etc. # [:TkASSOC, TkOp, "=>"], [:TkQUESTION, TkOp, "?"], #? [:TkCOLON, TkOp, ":"], #: [:TkSTAR], # *arg [:TkAMPER], # &arg # [:TkSYMBEG, TkId], [:TkGT, TkOp, ">"], [:TkLT, TkOp, "<"], [:TkPLUS, TkOp, "+"], [:TkMINUS, TkOp, "-"], [:TkMULT, TkOp, "*"], [:TkDIV, TkOp, "/"], [:TkMOD, TkOp, "%"], [:TkBITOR, TkOp, "|"], [:TkBITXOR, TkOp, "^"], [:TkBITAND, TkOp, "&"], [:TkBITNOT, TkOp, "~"], [:TkNOTOP, TkOp, "!"], [:TkBACKQUOTE, TkOp, "`"], [:TkASSIGN, Token, "="], [:TkDOT, Token, "."], [:TkLPAREN, Token, "("], # (exp) [:TkLBRACK, Token, "["], # [arry] [:TkLBRACE, Token, "{"], # {hash} [:TkRPAREN, Token, ")"], [:TkRBRACK, Token, "]"], [:TkRBRACE, Token, "}"], [:TkCOMMA, Token, ","], [:TkSEMICOLON, Token, ";"], [:TkSPACE, TkWhitespace], [:TkNL, TkWhitespace], [:TkEND_OF_SCRIPT, TkWhitespace], [:TkBACKSLASH, TkUnknownChar, "\\"], [:TkAT, TkUnknownChar, "@"], [:TkDOLLAR, TkUnknownChar, "\$"] ] # { reading => token_class } # { reading => [token_class, *opt] } TkReading2Token = {} TkSymbol2Token = {} # @private def self.def_token(token_n, super_token = Token, reading = nil, *opts) token_n = token_n.id2name unless token_n.is_a?(String) if RubyToken.const_defined?(token_n) # IRB.fail AlreadyDefinedToken, token_n end token_c = Class.new super_token RubyToken.const_set token_n, token_c # token_c.inspect if reading if TkReading2Token[reading] raise "duplicate #{token_n} #{reading}" end if opts.empty? TkReading2Token[reading] = [token_c] else TkReading2Token[reading] = [token_c].concat(opts) end end TkSymbol2Token[token_n.intern] = token_c if token_c <= TkOp token_c.class_eval %{ def self.op_name; "#{reading}"; end } end end for defs in TokenDefinitions def_token(*defs) end NEWLINE_TOKEN = TkNL.new(0, 0) NEWLINE_TOKEN.set_text("\n") end # Lexical analyzer for Ruby source # @private class RubyLex # Read an input stream character by character. We allow for unlimited # ungetting of characters just read. # # We simplify the implementation greatly by reading the entire input # into a buffer initially, and then simply traversing it using # pointers. # # We also have to allow for the here document diversion. This # little gem comes about when the lexer encounters a here # document. At this point we effectively need to split the input # stream into two parts: one to read the body of the here document, # the other to read the rest of the input line where the here # document was initially encountered. For example, we might have # # do_something(<<-A, <<-B) # stuff # for # A # stuff # for # B # # When the lexer encounters the <= @size ch = @content[@offset, 1] @offset += 1 @hwm = @offset if @hwm < @offset if @newline_pending @line_num += 1 @last_newline = @offset - 1 @newline_pending = false end if ch == "\n" @newline_pending = true end ch end def getc_already_read getc end def ungetc(_ch) raise "unget past beginning of file" if @offset <= 0 @offset -= 1 if @content[@offset] == ?\n @newline_pending = false end end def get_read res = @content[@read_back_offset...@offset] @read_back_offset = @offset res end def peek(at) pos = @offset + at if pos >= @size nil else @content[pos, 1] end end def peek_equal(str) @content[@offset, str.length] == str end def divert_read_from(reserve) @content[@offset, 0] = reserve @size = @content.size end end # end of nested class BufferedReader include RubyToken include IRB attr_reader :continue attr_reader :lex_state def self.debug? false end def initialize(content) lex_init @reader = BufferedReader.new(content) @exp_line_no = @line_no = 1 @base_char_no = 0 @indent = 0 @ltype = nil @quoted = nil @lex_state = EXPR_BEG @space_seen = false @continue = false @line = "" @skip_space = false @read_auto_clean_up = false @exception_on_syntax_error = true @colonblock_seen = false end attr_accessor :skip_space attr_accessor :read_auto_clean_up attr_accessor :exception_on_syntax_error attr :indent # io functions def line_no @reader.line_num end def char_no @reader.column end def get_read @reader.get_read end def getc @reader.getc end def getc_of_rests @reader.getc_already_read end def gets (c = getc) || return l = "" begin l.concat c unless c == "\r" break if c == "\n" end while c = getc # rubocop:disable Lint/Loop l end def ungetc(c = nil) @reader.ungetc(c) end def peek_equal?(str) @reader.peek_equal(str) end def peek(i = 0) @reader.peek(i) end def lex catch(:eof) do until ((tk = token).is_a?(TkNL) || tk.is_a?(TkEND_OF_SCRIPT)) && !@continue || tk.nil? end line = get_read if line == "" && tk.is_a?(TkEND_OF_SCRIPT) || tk.nil? nil else line end end end def token set_token_position(line_no, char_no) catch(:eof) do begin begin tk = @OP.match(self) @space_seen = tk.is_a?(TkSPACE) rescue SyntaxError abort if @exception_on_syntax_error tk = TkError.new(line_no, char_no) end end while @skip_space && tk.is_a?(TkSPACE) if @read_auto_clean_up get_read end # throw :eof unless tk p tk if $DEBUG tk.lex_state = lex_state if tk tk end end ENINDENT_CLAUSE = [ "case", "class", "def", "do", "for", "if", "module", "unless", "until", "while", "begin" ] #, "when" ACCEPTS_COLON = ["if", "for", "unless", "until", "while"] DEINDENT_CLAUSE = ["end"] #, "when" PERCENT_LTYPE = { "q" => "\'", "Q" => "\"", "x" => "\`", "r" => "/", "w" => "]", "W" => "]" } PERCENT_PAREN = { "{" => "}", "[" => "]", "<" => ">", "(" => ")" } Ltype2Token = { "\'" => TkSTRING, "\"" => TkSTRING, "\`" => TkXSTRING, "/" => TkREGEXP, "]" => TkDSTRING } Ltype2Token.default = TkSTRING DLtype2Token = { "\"" => TkDSTRING, "\`" => TkDXSTRING, "/" => TkDREGEXP } def lex_init() @OP = SLex.new @OP.def_rules("\0", "\004", "\032") do |chars, _io| Token(TkEND_OF_SCRIPT).set_text(chars) end @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |chars, _io| @space_seen = true while (ch = getc) =~ /[ \t\f\r\13]/ chars << ch end ungetc Token(TkSPACE).set_text(chars) end @OP.def_rule("#") do |_op, _io| identify_comment end @OP.def_rule("=begin", proc { @prev_char_no == 0 && peek(0) =~ /\s/ }) do |op, _io| str = String.new(op) @ltype = "=" begin line = String.new begin ch = getc line << ch end until ch == "\n" str << line end until line =~ /^=end/ ungetc @ltype = nil if str =~ /\A=begin\s+rdoc/i str.sub!(/\A=begin.*\n/, '') str.sub!(/^=end.*/m, '') Token(TkCOMMENT).set_text(str) else Token(TkCOMMENT).set_text(str) end end @OP.def_rule("\n") do print "\\n\n" if RubyLex.debug? @colonblock_seen = false case @lex_state when EXPR_BEG, EXPR_FNAME, EXPR_DOT @continue = true else @continue = false @lex_state = EXPR_BEG end Token(TkNL).set_text("\n") end @OP.def_rules("*", "**", "!", "!=", "!~", "=", "==", "===", "=~", "<=>", "<", "<=", ">", ">=", ">>") do |op, _io| @lex_state = EXPR_BEG Token(op).set_text(op) end @OP.def_rules("<<") do |op, _io| tk = nil if @lex_state != EXPR_END && @lex_state != EXPR_CLASS && (@lex_state != EXPR_ARG || @space_seen) c = peek(0) tk = identify_here_document if /[-~\w\"\'\`]/ =~ c end if !tk @lex_state = EXPR_BEG tk = Token(op).set_text(op) end tk end @OP.def_rules("'", '"') do |op, _io| identify_string(op) end @OP.def_rules("`") do |op, _io| if @lex_state == EXPR_FNAME Token(op).set_text(op) else identify_string(op) end end @OP.def_rules('?') do |op, _io| if @lex_state == EXPR_END @lex_state = EXPR_BEG Token(TkQUESTION).set_text(op) else ch = getc if @lex_state == EXPR_ARG && ch !~ /\s/ ungetc @lex_state = EXPR_BEG Token(TkQUESTION).set_text(op) else str = String.new(op) str << ch if ch == '\\' #' str << read_escape end @lex_state = EXPR_END Token(TkINTEGER).set_text(str) end end end @OP.def_rules("&", "&&", "|", "||") do |op, _io| @lex_state = EXPR_BEG Token(op).set_text(op) end @OP.def_rules("+=", "-=", "*=", "**=", "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do |op, _io| @lex_state = EXPR_BEG op =~ /^(.*)=$/ Token(TkOPASGN, $1).set_text(op) end @OP.def_rule("+@", proc { @lex_state == EXPR_FNAME }) do |op, _io| Token(TkUPLUS).set_text(op) end @OP.def_rule("-@", proc { @lex_state == EXPR_FNAME }) do |op, _io| Token(TkUMINUS).set_text(op) end @OP.def_rules("+", "-") do |op, _io| catch(:RET) do if @lex_state == EXPR_ARG if @space_seen && peek(0) =~ /[0-9]/ throw :RET, identify_number(op) else @lex_state = EXPR_BEG end elsif @lex_state != EXPR_END && peek(0) =~ /[0-9]/ throw :RET, identify_number(op) else @lex_state = EXPR_BEG end Token(op).set_text(op) end end @OP.def_rule(".") do @lex_state = EXPR_BEG if peek(0) =~ /[0-9]/ ungetc identify_number("") else # for obj.if @lex_state = EXPR_DOT Token(TkDOT).set_text(".") end end @OP.def_rules("..", "...") do |op, _io| @lex_state = EXPR_BEG Token(op).set_text(op) end lex_int2 end def lex_int2 @OP.def_rules("]", "}", ")") do |op, _io| @lex_state = EXPR_END @indent -= 1 Token(op).set_text(op) end @OP.def_rule(":") do if (@colonblock_seen && @lex_state != EXPR_BEG) || peek(0) =~ /\s/ @lex_state = EXPR_BEG tk = Token(TkCOLON) else @lex_state = EXPR_FNAME tk = Token(TkSYMBEG) end tk.set_text(":") end @OP.def_rule("::") do # p @lex_state.id2name, @space_seen if @lex_state == EXPR_BEG || @lex_state == EXPR_ARG && @space_seen @lex_state = EXPR_BEG tk = Token(TkCOLON3) else @lex_state = EXPR_DOT tk = Token(TkCOLON2) end tk.set_text("::") end @OP.def_rule("/") do |op, _io| if @lex_state == EXPR_BEG || @lex_state == EXPR_MID identify_string(op) elsif peek(0) == '=' getc @lex_state = EXPR_BEG Token(TkOPASGN, :/).set_text("/=") #") elsif @lex_state == EXPR_ARG && @space_seen && peek(0) !~ /\s/ identify_string(op) else @lex_state = EXPR_BEG Token("/").set_text(op) end end @OP.def_rules("^") do @lex_state = EXPR_BEG Token("^").set_text("^") end # @OP.def_rules("^=") do # @lex_state = EXPR_BEG # Token(TkOPASGN, :^) # end @OP.def_rules(",", ";") do |op, _io| @colonblock_seen = false @lex_state = EXPR_BEG Token(op).set_text(op) end @OP.def_rule("~") do @lex_state = EXPR_BEG Token("~").set_text("~") end @OP.def_rule("~@", proc { @lex_state = EXPR_FNAME }) do @lex_state = EXPR_BEG Token("~").set_text("~@") end @OP.def_rule("(") do @indent += 1 # if @lex_state == EXPR_BEG || @lex_state == EXPR_MID # @lex_state = EXPR_BEG # tk = Token(TkfLPAREN) # else @lex_state = EXPR_BEG tk = Token(TkLPAREN) # end tk.set_text("(") end @OP.def_rule("[]", proc { @lex_state == EXPR_FNAME }) do Token("[]").set_text("[]") end @OP.def_rule("[]=", proc { @lex_state == EXPR_FNAME }) do Token("[]=").set_text("[]=") end @OP.def_rule("[") do @indent += 1 # if @lex_state == EXPR_FNAME # t = Token(TkfLBRACK) # else # if @lex_state == EXPR_BEG || @lex_state == EXPR_MID # t = Token(TkLBRACK) # elsif @lex_state == EXPR_ARG && @space_seen # else # t = Token(TkfLBRACK) # end # end t = Token(TkLBRACK) @lex_state = EXPR_BEG t.set_text("[") end @OP.def_rule("{") do @indent += 1 # if @lex_state != EXPR_END && @lex_state != EXPR_ARG # t = Token(TkLBRACE) # else # t = Token(TkfLBRACE) # end t = Token(TkLBRACE) @lex_state = EXPR_BEG t.set_text("{") end @OP.def_rule('\\') do #' if getc == "\n" @space_seen = true @continue = true Token(TkSPACE).set_text("\\\n") else ungetc Token("\\").set_text("\\") #" end end @OP.def_rule('%') do |_op, _io| if @lex_state == EXPR_BEG || @lex_state == EXPR_MID identify_quotation('%') elsif peek(0) == '=' getc Token(TkOPASGN, "%").set_text("%=") elsif @lex_state == EXPR_ARG && @space_seen && peek(0) !~ /\s/ identify_quotation('%') else @lex_state = EXPR_BEG Token("%").set_text("%") end end @OP.def_rule('$') do #' identify_gvar end @OP.def_rule('@') do if peek(0) =~ /[@\w]/ ungetc identify_identifier else Token("@").set_text("@") end end # @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do # |op, io| # @indent += 1 # @lex_state = EXPR_FNAME # # @lex_state = EXPR_END # # until @rests[0] == "\n" or @rests[0] == ";" # # rests.shift # # end # end @OP.def_rule("__END__", proc { @prev_char_no == 0 && peek(0) =~ /[\r\n]/ }) do throw :eof end @OP.def_rule("") do |op, io| printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug? if peek(0) =~ /[0-9]/ t = identify_number("") elsif peek(0) =~ /[\w]/ t = identify_identifier end printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug? t end p @OP if RubyLex.debug? end def identify_gvar @lex_state = EXPR_END str = String.new("$") tk = case ch = getc when %r{[~_*$?!@/\\;,=:<>".]} str << ch Token(TkGVAR, str) when "-" str << "-" << getc Token(TkGVAR, str) when "&", "`", "'", "+" str << ch Token(TkBACK_REF, str) when /[1-9]/ str << ch while (ch = getc) =~ /[0-9]/ str << ch end ungetc Token(TkNTH_REF) when /\w/ ungetc ungetc return identify_identifier else ungetc Token("$") end tk.set_text(str) end def identify_identifier token = "" token.concat getc if peek(0) =~ /[$@]/ token.concat getc if peek(0) == "@" while (ch = getc) =~ /\w|_/ print ":", ch, ":" if RubyLex.debug? token.concat ch end ungetc if ch == "!" || ch == "?" token.concat getc end # fix token # $stderr.puts "identifier - #{token}, state = #@lex_state" case token when /^\$/ return Token(TkGVAR, token).set_text(token) when /^\@/ @lex_state = EXPR_END return Token(TkIVAR, token).set_text(token) end if @lex_state != EXPR_DOT print token, "\n" if RubyLex.debug? token_c, *trans = TkReading2Token[token] if token_c # reserved word? if @lex_state != EXPR_BEG && @lex_state != EXPR_FNAME && trans[1] # modifiers token_c = TkSymbol2Token[trans[1]] @lex_state = trans[0] else if @lex_state != EXPR_FNAME if ENINDENT_CLAUSE.include?(token) @indent += 1 if ACCEPTS_COLON.include?(token) @colonblock_seen = true else @colonblock_seen = false end elsif DEINDENT_CLAUSE.include?(token) @indent -= 1 @colonblock_seen = false end @lex_state = trans[0] else @lex_state = EXPR_END end end return Token(token_c, token).set_text(token) end end if @lex_state == EXPR_FNAME @lex_state = EXPR_END if peek(0) == '=' token.concat getc end elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT @lex_state = EXPR_ARG else @lex_state = EXPR_END end if token[0, 1] =~ /[A-Z]/ return Token(TkCONSTANT, token).set_text(token) elsif token[token.size - 1, 1] =~ /[!?]/ return Token(TkFID, token).set_text(token) else return Token(TkIDENTIFIER, token).set_text(token) end end def identify_here_document ch = getc if ch == "-" ch = getc elsif ch == "~" ch = getc indent = true end if /['"`]/ =~ ch # ' lt = ch quoted = "" while (c = getc) && c != lt quoted.concat c end else lt = '"' quoted = ch.dup while (c = getc) && c =~ /\w/ quoted.concat c end ungetc end ltback, @ltype = @ltype, lt reserve = String.new while ch = getc reserve << ch if ch == "\\" #" ch = getc reserve << ch elsif ch == "\n" break end end str = String.new while (l = gets) l.chomp! if l == quoted str = dedent(str) if indent break else str << l.chomp << "\n" end end @reader.divert_read_from(reserve) @ltype = ltback @lex_state = EXPR_END Token(Ltype2Token[lt], str).set_text(str.dump) end def dedent(str) lines = str.split("\n", -1) dedent_amt = lines.map do |line| line =~ /\S/ ? line.match(/^ */).offset(0)[1] : nil end.compact.min || 0 return str if dedent_amt.zero? lines.map { |line| line =~ /\S/ ? line.gsub(/^ {#{dedent_amt}}/, "") : line }.join("\n") end def identify_quotation(initial_char) ch = getc if lt = PERCENT_LTYPE[ch] initial_char += ch ch = getc elsif ch =~ /\W/ lt = "\"" else # RubyLex.fail SyntaxError, "unknown type of %string ('#{ch}')" end # if ch !~ /\W/ # ungetc # next # end # @ltype = lt @quoted = ch unless @quoted = PERCENT_PAREN[ch] identify_string(lt, @quoted, ch, initial_char) if lt end def identify_number(start) str = start.dup if start == "+" || start == "-" || start == "" start = getc str << start end @lex_state = EXPR_END if start == "0" if peek(0) == "x" ch = getc str << ch match = /[0-9a-f_]/ else match = /[0-7_]/ end while ch = getc if ch !~ match ungetc break else str << ch end end return Token(TkINTEGER).set_text(str) end type = TkINTEGER allow_point = true allow_e = true while ch = getc case ch when /[0-9_]/ str << ch when allow_point && "." type = TkFLOAT if peek(0) !~ /[0-9]/ ungetc break end str << ch allow_point = false when allow_e && "e", allow_e && "E" str << ch type = TkFLOAT if peek(0) =~ /[+-]/ str << getc end allow_e = false allow_point = false else ungetc break end end Token(type).set_text(str) end def identify_string(ltype, quoted = ltype, opener = nil, initial_char = nil) @ltype = ltype @quoted = quoted subtype = nil str = String.new str << initial_char if initial_char str << (opener || quoted) nest = 0 begin while ch = getc str << ch if @quoted == ch if nest == 0 break else nest -= 1 end elsif opener == ch nest += 1 elsif @ltype != "'" && @ltype != "]" && ch == "#" ch = getc if ch == "{" subtype = true str << ch << skip_inner_expression else ungetc(ch) end elsif ch == '\\' #' str << read_escape end end if @ltype == "/" if peek(0) =~ /i|o|n|e|s/ str << getc end end if subtype Token(DLtype2Token[ltype], str) else Token(Ltype2Token[ltype], str) end.set_text(str) ensure @ltype = nil @quoted = nil @lex_state = EXPR_END end end def skip_inner_expression res = String.new nest = 0 while (ch = getc) res << ch if ch == '}' break if nest == 0 nest -= 1 elsif ch == '{' nest += 1 end end res end def identify_comment @ltype = "#" comment = String.new("#") while ch = getc if ch == "\\" ch = getc if ch == "\n" ch = " " else comment << "\\" end else if ch == "\n" @ltype = nil ungetc break end end comment << ch end Token(TkCOMMENT).set_text(comment) end def read_escape res = String.new case ch = getc when /[0-7]/ ungetc ch 3.times do case ch = getc when /[0-7]/ when nil break else ungetc break end res << ch end when "x" res << ch 2.times do case ch = getc when /[0-9a-fA-F]/ when nil break else ungetc break end res << ch end when "M" res << ch if (ch = getc) != '-' ungetc else res << ch if (ch = getc) == "\\" #" res << ch res << read_escape else res << ch end end when "C", "c" #, "^" res << ch if ch == "C" && (ch = getc) != "-" ungetc else res << ch if (ch = getc) == "\\" #" res << ch res << read_escape else res << ch end end else res << ch end res end end end end