require "e2mmap"
require "irb/slex"
module YARD
module Parser::Ruby::Legacy
# Legacy lexical tokenizer module.
module RubyToken
EXPR_BEG = :EXPR_BEG
EXPR_MID = :EXPR_MID
EXPR_END = :EXPR_END
EXPR_ARG = :EXPR_ARG
EXPR_FNAME = :EXPR_FNAME
EXPR_DOT = :EXPR_DOT
EXPR_CLASS = :EXPR_CLASS
# Represents a token in the Ruby lexer
class Token
# @return [Integer] the line number in the file/stream the token is
# located.
attr_reader :line_no
# @return [Integer] the character number in the file/stream the token
# is located.
attr_reader :char_no
# @return [String] the token text value
attr_reader :text
# @return [Symbol] the lexical state at the token
attr_accessor :lex_state
# @private
NO_TEXT = "??".freeze
# Creates a new Token object
# @param [Integer] line_no the line number to initialize the token to
# @param [Integer] char_no the char number to initialize the token to
def initialize(line_no, char_no)
@line_no = line_no
@char_no = char_no
@text = NO_TEXT
end
# Chainable way to sets the text attribute
#
# @param [String] text the new text
# @return [Token] this token object
def set_text(text)
@text = text
self
end
end
# Represents a block
class TkBlockContents < Token
def text; '...' end
end
# Represents an end statement
class TkStatementEnd < Token
def text; '' end
end
class TkNode < Token
attr :node
end
# Represents whitespace
class TkWhitespace < Token
end
# Represents a Ruby identifier
class TkId < Token
def initialize(line_no, char_no, name)
super(line_no, char_no)
@name = name
end
attr :name
end
# Represents a Ruby keyword
class TkKW < TkId
end
# Represents a Ruby value
class TkVal < Token
def initialize(line_no, char_no, value = nil)
super(line_no, char_no)
set_text(value)
end
end
class TkOp < Token
def name
self.class.op_name
end
end
class TkOPASGN < TkOp
def initialize(line_no, char_no, op)
super(line_no, char_no)
op = TkReading2Token[op] unless op.is_a?(Symbol)
@op = op
end
attr :op
end
class TkUnknownChar < Token
def initialize(line_no, char_no, _id)
super(line_no, char_no)
@name = char_no > 255 ? '?' : char_no.chr
end
attr :name
end
class TkError < Token
end
# @private
def set_token_position(line, char)
@prev_line_no = line
@prev_char_no = char
end
# @private
def Token(token, value = nil) # rubocop:disable Style/MethodName
tk = nil
case token
when String, Symbol
source = token.is_a?(String) ? TkReading2Token : TkSymbol2Token
if (tk = source[token]).nil?
IRB.fail TkReading2TokenNoKey, token
end
tk = Token(tk[0], value)
else
if token
tk = if (token.ancestors & [TkId, TkVal, TkOPASGN, TkUnknownChar]).empty?
token.new(@prev_line_no, @prev_char_no)
else
token.new(@prev_line_no, @prev_char_no, value)
end
end
end
tk
end
# @private
TokenDefinitions = [
[:TkCLASS, TkKW, "class", EXPR_CLASS],
[:TkMODULE, TkKW, "module", EXPR_BEG],
[:TkDEF, TkKW, "def", EXPR_FNAME],
[:TkUNDEF, TkKW, "undef", EXPR_FNAME],
[:TkBEGIN, TkKW, "begin", EXPR_BEG],
[:TkRESCUE, TkKW, "rescue", EXPR_MID],
[:TkENSURE, TkKW, "ensure", EXPR_BEG],
[:TkEND, TkKW, "end", EXPR_END],
[:TkIF, TkKW, "if", EXPR_BEG, :TkIF_MOD],
[:TkUNLESS, TkKW, "unless", EXPR_BEG, :TkUNLESS_MOD],
[:TkTHEN, TkKW, "then", EXPR_BEG],
[:TkELSIF, TkKW, "elsif", EXPR_BEG],
[:TkELSE, TkKW, "else", EXPR_BEG],
[:TkCASE, TkKW, "case", EXPR_BEG],
[:TkWHEN, TkKW, "when", EXPR_BEG],
[:TkWHILE, TkKW, "while", EXPR_BEG, :TkWHILE_MOD],
[:TkUNTIL, TkKW, "until", EXPR_BEG, :TkUNTIL_MOD],
[:TkFOR, TkKW, "for", EXPR_BEG],
[:TkBREAK, TkKW, "break", EXPR_END],
[:TkNEXT, TkKW, "next", EXPR_END],
[:TkREDO, TkKW, "redo", EXPR_END],
[:TkRETRY, TkKW, "retry", EXPR_END],
[:TkIN, TkKW, "in", EXPR_BEG],
[:TkDO, TkKW, "do", EXPR_BEG],
[:TkRETURN, TkKW, "return", EXPR_MID],
[:TkYIELD, TkKW, "yield", EXPR_END],
[:TkSUPER, TkKW, "super", EXPR_END],
[:TkSELF, TkKW, "self", EXPR_END],
[:TkNIL, TkKW, "nil", EXPR_END],
[:TkTRUE, TkKW, "true", EXPR_END],
[:TkFALSE, TkKW, "false", EXPR_END],
[:TkAND, TkKW, "and", EXPR_BEG],
[:TkOR, TkKW, "or", EXPR_BEG],
[:TkNOT, TkKW, "not", EXPR_BEG],
[:TkIF_MOD, TkKW],
[:TkUNLESS_MOD, TkKW],
[:TkWHILE_MOD, TkKW],
[:TkUNTIL_MOD, TkKW],
[:TkALIAS, TkKW, "alias", EXPR_FNAME],
[:TkDEFINED, TkKW, "defined?", EXPR_END],
[:TklBEGIN, TkKW, "BEGIN", EXPR_END],
[:TklEND, TkKW, "END", EXPR_END],
[:Tk__LINE__, TkKW, "__LINE__", EXPR_END],
[:Tk__FILE__, TkKW, "__FILE__", EXPR_END],
[:TkIDENTIFIER, TkId],
[:TkFID, TkId],
[:TkGVAR, TkId],
[:TkIVAR, TkId],
[:TkCONSTANT, TkId],
[:TkINTEGER, TkVal],
[:TkFLOAT, TkVal],
[:TkSYMBOL, TkVal],
[:TkLABEL, TkVal],
[:TkSTRING, TkVal],
[:TkXSTRING, TkVal],
[:TkREGEXP, TkVal],
[:TkCOMMENT, TkVal],
[:TkDSTRING, TkNode],
[:TkDXSTRING, TkNode],
[:TkDREGEXP, TkNode],
[:TkNTH_REF, TkId],
[:TkBACK_REF, TkId],
[:TkUPLUS, TkOp, "+@"],
[:TkUMINUS, TkOp, "-@"],
[:TkPOW, TkOp, "**"],
[:TkCMP, TkOp, "<=>"],
[:TkEQ, TkOp, "=="],
[:TkEQQ, TkOp, "==="],
[:TkNEQ, TkOp, "!="],
[:TkGEQ, TkOp, ">="],
[:TkLEQ, TkOp, "<="],
[:TkANDOP, TkOp, "&&"],
[:TkOROP, TkOp, "||"],
[:TkMATCH, TkOp, "=~"],
[:TkNMATCH, TkOp, "!~"],
[:TkDOT2, TkOp, ".."],
[:TkDOT3, TkOp, "..."],
[:TkAREF, TkOp, "[]"],
[:TkASET, TkOp, "[]="],
[:TkLSHFT, TkOp, "<<"],
[:TkRSHFT, TkOp, ">>"],
[:TkCOLON2, TkOp],
[:TkCOLON3, TkOp],
[:OPASGN, TkOp], # +=, -= etc. #
[:TkASSOC, TkOp, "=>"],
[:TkQUESTION, TkOp, "?"], #?
[:TkCOLON, TkOp, ":"], #:
[:TkSTAR], # *arg
[:TkAMPER], # &arg #
[:TkSYMBEG, TkId],
[:TkGT, TkOp, ">"],
[:TkLT, TkOp, "<"],
[:TkPLUS, TkOp, "+"],
[:TkMINUS, TkOp, "-"],
[:TkMULT, TkOp, "*"],
[:TkDIV, TkOp, "/"],
[:TkMOD, TkOp, "%"],
[:TkBITOR, TkOp, "|"],
[:TkBITXOR, TkOp, "^"],
[:TkBITAND, TkOp, "&"],
[:TkBITNOT, TkOp, "~"],
[:TkNOTOP, TkOp, "!"],
[:TkBACKQUOTE, TkOp, "`"],
[:TkASSIGN, Token, "="],
[:TkDOT, Token, "."],
[:TkLPAREN, Token, "("], # (exp)
[:TkLBRACK, Token, "["], # [arry]
[:TkLBRACE, Token, "{"], # {hash}
[:TkRPAREN, Token, ")"],
[:TkRBRACK, Token, "]"],
[:TkRBRACE, Token, "}"],
[:TkCOMMA, Token, ","],
[:TkSEMICOLON, Token, ";"],
[:TkSPACE, TkWhitespace],
[:TkNL, TkWhitespace],
[:TkEND_OF_SCRIPT, TkWhitespace],
[:TkBACKSLASH, TkUnknownChar, "\\"],
[:TkAT, TkUnknownChar, "@"],
[:TkDOLLAR, TkUnknownChar, "\$"]
]
# { reading => token_class }
# { reading => [token_class, *opt] }
TkReading2Token = {}
TkSymbol2Token = {}
# @private
def self.def_token(token_n, super_token = Token, reading = nil, *opts)
token_n = token_n.id2name unless token_n.is_a?(String)
if RubyToken.const_defined?(token_n)
# IRB.fail AlreadyDefinedToken, token_n
end
token_c = Class.new super_token
RubyToken.const_set token_n, token_c
# token_c.inspect
if reading
if TkReading2Token[reading]
IRB.fail TkReading2TokenDuplicateError, token_n, reading
end
if opts.empty?
TkReading2Token[reading] = [token_c]
else
TkReading2Token[reading] = [token_c].concat(opts)
end
end
TkSymbol2Token[token_n.intern] = token_c
if token_c <= TkOp
token_c.class_eval %{
def self.op_name; "#{reading}"; end
}
end
end
for defs in TokenDefinitions
def_token(*defs)
end
NEWLINE_TOKEN = TkNL.new(0, 0)
NEWLINE_TOKEN.set_text("\n")
end
# Lexical analyzer for Ruby source
# @private
class RubyLex
# Read an input stream character by character. We allow for unlimited
# ungetting of characters just read.
#
# We simplify the implementation greatly by reading the entire input
# into a buffer initially, and then simply traversing it using
# pointers.
#
# We also have to allow for the here document diversion. This
# little gem comes about when the lexer encounters a here
# document. At this point we effectively need to split the input
# stream into two parts: one to read the body of the here document,
# the other to read the rest of the input line where the here
# document was initially encountered. For example, we might have
#
# do_something(<<-A, <<-B)
# stuff
# for
# A
# stuff
# for
# B
#
# When the lexer encounters the <= @size
ch = @content[@offset, 1]
@offset += 1
@hwm = @offset if @hwm < @offset
if @newline_pending
@line_num += 1
@last_newline = @offset - 1
@newline_pending = false
end
if ch == "\n"
@newline_pending = true
end
ch
end
def getc_already_read
getc
end
def ungetc(_ch)
raise "unget past beginning of file" if @offset <= 0
@offset -= 1
if @content[@offset] == ?\n
@newline_pending = false
end
end
def get_read
res = @content[@read_back_offset...@offset]
@read_back_offset = @offset
res
end
def peek(at)
pos = @offset + at
if pos >= @size
nil
else
@content[pos, 1]
end
end
def peek_equal(str)
@content[@offset, str.length] == str
end
def divert_read_from(reserve)
@content[@offset, 0] = reserve
@size = @content.size
end
end
# end of nested class BufferedReader
extend Exception2MessageMapper
def_exception(:AlreadyDefinedToken, "Already defined token(%s)")
def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')")
def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')")
def_exception(:TkReading2TokenDuplicateError,
"key duplicate(token_n='%s', key='%s')")
def_exception(:SyntaxError, "%s")
include RubyToken
include IRB
attr_reader :continue
attr_reader :lex_state
def self.debug?
false
end
def initialize(content)
lex_init
@reader = BufferedReader.new(content)
@exp_line_no = @line_no = 1
@base_char_no = 0
@indent = 0
@ltype = nil
@quoted = nil
@lex_state = EXPR_BEG
@space_seen = false
@continue = false
@line = ""
@skip_space = false
@read_auto_clean_up = false
@exception_on_syntax_error = true
@colonblock_seen = false
end
attr_accessor :skip_space
attr_accessor :read_auto_clean_up
attr_accessor :exception_on_syntax_error
attr :indent
# io functions
def line_no
@reader.line_num
end
def char_no
@reader.column
end
def get_read
@reader.get_read
end
def getc
@reader.getc
end
def getc_of_rests
@reader.getc_already_read
end
def gets
(c = getc) || return
l = ""
begin
l.concat c unless c == "\r"
break if c == "\n"
end while c = getc # rubocop:disable Lint/Loop
l
end
def ungetc(c = nil)
@reader.ungetc(c)
end
def peek_equal?(str)
@reader.peek_equal(str)
end
def peek(i = 0)
@reader.peek(i)
end
def lex
catch(:eof) do
until ((tk = token).is_a?(TkNL) || tk.is_a?(TkEND_OF_SCRIPT)) &&
!@continue ||
tk.nil?
end
line = get_read
if line == "" && tk.is_a?(TkEND_OF_SCRIPT) || tk.nil?
nil
else
line
end
end
end
def token
set_token_position(line_no, char_no)
catch(:eof) do
begin
begin
tk = @OP.match(self)
@space_seen = tk.is_a?(TkSPACE)
rescue SyntaxError
abort if @exception_on_syntax_error
tk = TkError.new(line_no, char_no)
end
end while @skip_space && tk.is_a?(TkSPACE)
if @read_auto_clean_up
get_read
end
# throw :eof unless tk
p tk if $DEBUG
tk.lex_state = lex_state if tk
tk
end
end
ENINDENT_CLAUSE = [
"case", "class", "def", "do", "for", "if",
"module", "unless", "until", "while", "begin"
] #, "when"
ACCEPTS_COLON = ["if", "for", "unless", "until", "while"]
DEINDENT_CLAUSE = ["end"] #, "when"
PERCENT_LTYPE = {
"q" => "\'",
"Q" => "\"",
"x" => "\`",
"r" => "/",
"w" => "]",
"W" => "]"
}
PERCENT_PAREN = {
"{" => "}",
"[" => "]",
"<" => ">",
"(" => ")"
}
Ltype2Token = {
"\'" => TkSTRING,
"\"" => TkSTRING,
"\`" => TkXSTRING,
"/" => TkREGEXP,
"]" => TkDSTRING
}
Ltype2Token.default = TkSTRING
DLtype2Token = {
"\"" => TkDSTRING,
"\`" => TkDXSTRING,
"/" => TkDREGEXP
}
def lex_init()
@OP = SLex.new
@OP.def_rules("\0", "\004", "\032") do |chars, _io|
Token(TkEND_OF_SCRIPT).set_text(chars)
end
@OP.def_rules(" ", "\t", "\f", "\r", "\13") do |chars, _io|
@space_seen = true
while (ch = getc) =~ /[ \t\f\r\13]/
chars << ch
end
ungetc
Token(TkSPACE).set_text(chars)
end
@OP.def_rule("#") do |_op, _io|
identify_comment
end
@OP.def_rule("=begin", proc { @prev_char_no == 0 && peek(0) =~ /\s/ }) do |op, _io|
str = String.new(op)
@ltype = "="
begin
line = String.new
begin
ch = getc
line << ch
end until ch == "\n"
str << line
end until line =~ /^=end/
ungetc
@ltype = nil
if str =~ /\A=begin\s+rdoc/i
str.sub!(/\A=begin.*\n/, '')
str.sub!(/^=end.*/m, '')
Token(TkCOMMENT).set_text(str)
else
Token(TkCOMMENT).set_text(str)
end
end
@OP.def_rule("\n") do
print "\\n\n" if RubyLex.debug?
@colonblock_seen = false
case @lex_state
when EXPR_BEG, EXPR_FNAME, EXPR_DOT
@continue = true
else
@continue = false
@lex_state = EXPR_BEG
end
Token(TkNL).set_text("\n")
end
@OP.def_rules("*", "**",
"!", "!=", "!~",
"=", "==", "===",
"=~", "<=>",
"<", "<=",
">", ">=", ">>") do |op, _io|
@lex_state = EXPR_BEG
Token(op).set_text(op)
end
@OP.def_rules("<<") do |op, _io|
tk = nil
if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
(@lex_state != EXPR_ARG || @space_seen)
c = peek(0)
tk = identify_here_document if /[-\w\"\'\`]/ =~ c
end
if !tk
@lex_state = EXPR_BEG
tk = Token(op).set_text(op)
end
tk
end
@OP.def_rules("'", '"') do |op, _io|
identify_string(op)
end
@OP.def_rules("`") do |op, _io|
if @lex_state == EXPR_FNAME
Token(op).set_text(op)
else
identify_string(op)
end
end
@OP.def_rules('?') do |op, _io|
if @lex_state == EXPR_END
@lex_state = EXPR_BEG
Token(TkQUESTION).set_text(op)
else
ch = getc
if @lex_state == EXPR_ARG && ch !~ /\s/
ungetc
@lex_state = EXPR_BEG
Token(TkQUESTION).set_text(op)
else
str = String.new(op)
str << ch
if ch == '\\' #'
str << read_escape
end
@lex_state = EXPR_END
Token(TkINTEGER).set_text(str)
end
end
end
@OP.def_rules("&", "&&", "|", "||") do |op, _io|
@lex_state = EXPR_BEG
Token(op).set_text(op)
end
@OP.def_rules("+=", "-=", "*=", "**=",
"&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do |op, _io|
@lex_state = EXPR_BEG
op =~ /^(.*)=$/
Token(TkOPASGN, $1).set_text(op)
end
@OP.def_rule("+@", proc { @lex_state == EXPR_FNAME }) do |op, _io|
Token(TkUPLUS).set_text(op)
end
@OP.def_rule("-@", proc { @lex_state == EXPR_FNAME }) do |op, _io|
Token(TkUMINUS).set_text(op)
end
@OP.def_rules("+", "-") do |op, _io|
catch(:RET) do
if @lex_state == EXPR_ARG
if @space_seen && peek(0) =~ /[0-9]/
throw :RET, identify_number(op)
else
@lex_state = EXPR_BEG
end
elsif @lex_state != EXPR_END && peek(0) =~ /[0-9]/
throw :RET, identify_number(op)
else
@lex_state = EXPR_BEG
end
Token(op).set_text(op)
end
end
@OP.def_rule(".") do
@lex_state = EXPR_BEG
if peek(0) =~ /[0-9]/
ungetc
identify_number("")
else
# for obj.if
@lex_state = EXPR_DOT
Token(TkDOT).set_text(".")
end
end
@OP.def_rules("..", "...") do |op, _io|
@lex_state = EXPR_BEG
Token(op).set_text(op)
end
lex_int2
end
def lex_int2
@OP.def_rules("]", "}", ")") do |op, _io|
@lex_state = EXPR_END
@indent -= 1
Token(op).set_text(op)
end
@OP.def_rule(":") do
if (@colonblock_seen && @lex_state != EXPR_BEG) || peek(0) =~ /\s/
@lex_state = EXPR_BEG
tk = Token(TkCOLON)
else
@lex_state = EXPR_FNAME
tk = Token(TkSYMBEG)
end
tk.set_text(":")
end
@OP.def_rule("::") do
# p @lex_state.id2name, @space_seen
if @lex_state == EXPR_BEG || @lex_state == EXPR_ARG && @space_seen
@lex_state = EXPR_BEG
tk = Token(TkCOLON3)
else
@lex_state = EXPR_DOT
tk = Token(TkCOLON2)
end
tk.set_text("::")
end
@OP.def_rule("/") do |op, _io|
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
identify_string(op)
elsif peek(0) == '='
getc
@lex_state = EXPR_BEG
Token(TkOPASGN, :/).set_text("/=") #")
elsif @lex_state == EXPR_ARG && @space_seen && peek(0) !~ /\s/
identify_string(op)
else
@lex_state = EXPR_BEG
Token("/").set_text(op)
end
end
@OP.def_rules("^") do
@lex_state = EXPR_BEG
Token("^").set_text("^")
end
# @OP.def_rules("^=") do
# @lex_state = EXPR_BEG
# Token(TkOPASGN, :^)
# end
@OP.def_rules(",", ";") do |op, _io|
@colonblock_seen = false
@lex_state = EXPR_BEG
Token(op).set_text(op)
end
@OP.def_rule("~") do
@lex_state = EXPR_BEG
Token("~").set_text("~")
end
@OP.def_rule("~@", proc { @lex_state = EXPR_FNAME }) do
@lex_state = EXPR_BEG
Token("~").set_text("~@")
end
@OP.def_rule("(") do
@indent += 1
# if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
# @lex_state = EXPR_BEG
# tk = Token(TkfLPAREN)
# else
@lex_state = EXPR_BEG
tk = Token(TkLPAREN)
# end
tk.set_text("(")
end
@OP.def_rule("[]", proc { @lex_state == EXPR_FNAME }) do
Token("[]").set_text("[]")
end
@OP.def_rule("[]=", proc { @lex_state == EXPR_FNAME }) do
Token("[]=").set_text("[]=")
end
@OP.def_rule("[") do
@indent += 1
# if @lex_state == EXPR_FNAME
# t = Token(TkfLBRACK)
# else
# if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
# t = Token(TkLBRACK)
# elsif @lex_state == EXPR_ARG && @space_seen
# else
# t = Token(TkfLBRACK)
# end
# end
t = Token(TkLBRACK)
@lex_state = EXPR_BEG
t.set_text("[")
end
@OP.def_rule("{") do
@indent += 1
# if @lex_state != EXPR_END && @lex_state != EXPR_ARG
# t = Token(TkLBRACE)
# else
# t = Token(TkfLBRACE)
# end
t = Token(TkLBRACE)
@lex_state = EXPR_BEG
t.set_text("{")
end
@OP.def_rule('\\') do #'
if getc == "\n"
@space_seen = true
@continue = true
Token(TkSPACE).set_text("\\\n")
else
ungetc
Token("\\").set_text("\\") #"
end
end
@OP.def_rule('%') do |_op, _io|
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
identify_quotation('%')
elsif peek(0) == '='
getc
Token(TkOPASGN, "%").set_text("%=")
elsif @lex_state == EXPR_ARG && @space_seen && peek(0) !~ /\s/
identify_quotation('%')
else
@lex_state = EXPR_BEG
Token("%").set_text("%")
end
end
@OP.def_rule('$') do #'
identify_gvar
end
@OP.def_rule('@') do
if peek(0) =~ /[@\w]/
ungetc
identify_identifier
else
Token("@").set_text("@")
end
end
# @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do
# |op, io|
# @indent += 1
# @lex_state = EXPR_FNAME
# # @lex_state = EXPR_END
# # until @rests[0] == "\n" or @rests[0] == ";"
# # rests.shift
# # end
# end
@OP.def_rule("__END__", proc { @prev_char_no == 0 && peek(0) =~ /[\r\n]/ }) do
throw :eof
end
@OP.def_rule("") do |op, io|
printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
if peek(0) =~ /[0-9]/
t = identify_number("")
elsif peek(0) =~ /[\w]/
t = identify_identifier
end
printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
t
end
p @OP if RubyLex.debug?
end
def identify_gvar
@lex_state = EXPR_END
str = String.new("$")
tk = case ch = getc
when %r{[~_*$?!@/\\;,=:<>".]}
str << ch
Token(TkGVAR, str)
when "-"
str << "-" << getc
Token(TkGVAR, str)
when "&", "`", "'", "+"
str << ch
Token(TkBACK_REF, str)
when /[1-9]/
str << ch
while (ch = getc) =~ /[0-9]/
str << ch
end
ungetc
Token(TkNTH_REF)
when /\w/
ungetc
ungetc
return identify_identifier
else
ungetc
Token("$")
end
tk.set_text(str)
end
def identify_identifier
token = ""
token.concat getc if peek(0) =~ /[$@]/
token.concat getc if peek(0) == "@"
while (ch = getc) =~ /\w|_/
print ":", ch, ":" if RubyLex.debug?
token.concat ch
end
ungetc
if ch == "!" || ch == "?"
token.concat getc
end
# fix token
# $stderr.puts "identifier - #{token}, state = #@lex_state"
case token
when /^\$/
return Token(TkGVAR, token).set_text(token)
when /^\@/
@lex_state = EXPR_END
return Token(TkIVAR, token).set_text(token)
end
if @lex_state != EXPR_DOT
print token, "\n" if RubyLex.debug?
token_c, *trans = TkReading2Token[token]
if token_c
# reserved word?
if @lex_state != EXPR_BEG &&
@lex_state != EXPR_FNAME &&
trans[1]
# modifiers
token_c = TkSymbol2Token[trans[1]]
@lex_state = trans[0]
else
if @lex_state != EXPR_FNAME
if ENINDENT_CLAUSE.include?(token)
@indent += 1
if ACCEPTS_COLON.include?(token)
@colonblock_seen = true
else
@colonblock_seen = false
end
elsif DEINDENT_CLAUSE.include?(token)
@indent -= 1
@colonblock_seen = false
end
@lex_state = trans[0]
else
@lex_state = EXPR_END
end
end
return Token(token_c, token).set_text(token)
end
end
if @lex_state == EXPR_FNAME
@lex_state = EXPR_END
if peek(0) == '='
token.concat getc
end
elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
@lex_state = EXPR_ARG
else
@lex_state = EXPR_END
end
if token[0, 1] =~ /[A-Z]/
return Token(TkCONSTANT, token).set_text(token)
elsif token[token.size - 1, 1] =~ /[!?]/
return Token(TkFID, token).set_text(token)
else
return Token(TkIDENTIFIER, token).set_text(token)
end
end
def identify_here_document
ch = getc
if ch == "-"
ch = getc
indent = true
end
if /['"`]/ =~ ch # '
lt = ch
quoted = ""
while (c = getc) && c != lt
quoted.concat c
end
else
lt = '"'
quoted = ch.dup
while (c = getc) && c =~ /\w/
quoted.concat c
end
ungetc
end
ltback, @ltype = @ltype, lt
reserve = String.new
while ch = getc
reserve << ch
if ch == "\\" #"
ch = getc
reserve << ch
elsif ch == "\n"
break
end
end
str = String.new
while (l = gets)
l.chomp!
l.strip! if indent
break if l == quoted
str << l.chomp << "\n"
end
@reader.divert_read_from(reserve)
@ltype = ltback
@lex_state = EXPR_END
Token(Ltype2Token[lt], str).set_text(str.dump)
end
def identify_quotation(initial_char)
ch = getc
if lt = PERCENT_LTYPE[ch]
initial_char += ch
ch = getc
elsif ch =~ /\W/
lt = "\""
else
# RubyLex.fail SyntaxError, "unknown type of %string ('#{ch}')"
end
# if ch !~ /\W/
# ungetc
# next
# end
# @ltype = lt
@quoted = ch unless @quoted = PERCENT_PAREN[ch]
identify_string(lt, @quoted, ch, initial_char) if lt
end
def identify_number(start)
str = start.dup
if start == "+" || start == "-" || start == ""
start = getc
str << start
end
@lex_state = EXPR_END
if start == "0"
if peek(0) == "x"
ch = getc
str << ch
match = /[0-9a-f_]/
else
match = /[0-7_]/
end
while ch = getc
if ch !~ match
ungetc
break
else
str << ch
end
end
return Token(TkINTEGER).set_text(str)
end
type = TkINTEGER
allow_point = true
allow_e = true
while ch = getc
case ch
when /[0-9_]/
str << ch
when allow_point && "."
type = TkFLOAT
if peek(0) !~ /[0-9]/
ungetc
break
end
str << ch
allow_point = false
when allow_e && "e", allow_e && "E"
str << ch
type = TkFLOAT
if peek(0) =~ /[+-]/
str << getc
end
allow_e = false
allow_point = false
else
ungetc
break
end
end
Token(type).set_text(str)
end
def identify_string(ltype, quoted = ltype, opener = nil, initial_char = nil)
@ltype = ltype
@quoted = quoted
subtype = nil
str = String.new
str << initial_char if initial_char
str << (opener || quoted)
nest = 0
begin
while ch = getc
str << ch
if @quoted == ch
if nest == 0
break
else
nest -= 1
end
elsif opener == ch
nest += 1
elsif @ltype != "'" && @ltype != "]" && ch == "#"
ch = getc
if ch == "{"
subtype = true
str << ch << skip_inner_expression
else
ungetc(ch)
end
elsif ch == '\\' #'
str << read_escape
end
end
if @ltype == "/"
if peek(0) =~ /i|o|n|e|s/
str << getc
end
end
if subtype
Token(DLtype2Token[ltype], str)
else
Token(Ltype2Token[ltype], str)
end.set_text(str)
ensure
@ltype = nil
@quoted = nil
@lex_state = EXPR_END
end
end
def skip_inner_expression
res = String.new
nest = 0
while (ch = getc)
res << ch
if ch == '}'
break if nest == 0
nest -= 1
elsif ch == '{'
nest += 1
end
end
res
end
def identify_comment
@ltype = "#"
comment = String.new("#")
while ch = getc
if ch == "\\"
ch = getc
if ch == "\n"
ch = " "
else
comment << "\\"
end
else
if ch == "\n"
@ltype = nil
ungetc
break
end
end
comment << ch
end
Token(TkCOMMENT).set_text(comment)
end
def read_escape
res = String.new
case ch = getc
when /[0-7]/
ungetc ch
3.times do
case ch = getc
when /[0-7]/
when nil
break
else
ungetc
break
end
res << ch
end
when "x"
res << ch
2.times do
case ch = getc
when /[0-9a-fA-F]/
when nil
break
else
ungetc
break
end
res << ch
end
when "M"
res << ch
if (ch = getc) != '-'
ungetc
else
res << ch
if (ch = getc) == "\\" #"
res << ch
res << read_escape
else
res << ch
end
end
when "C", "c" #, "^"
res << ch
if ch == "C" && (ch = getc) != "-"
ungetc
else
res << ch
if (ch = getc) == "\\" #"
res << ch
res << read_escape
else
res << ch
end
end
else
res << ch
end
res
end
end
end
end