require 'ripper' require_relative 'composite_observable' require_relative 'lexed_line' require_relative 'lexer/lexer_constants' require_relative 'logger' require_relative 'lexer/token' class Tailor # This is what provides the main file parsing for tailor. For every event # that's encountered, it calls the appropriate notifier method. Notifier # methods are provided by {Tailor::CompositeObservable}. class Lexer < Ripper::Lexer include CompositeObservable include LexerConstants include LogSwitch::Mixin # @param [String] file The string to lex, or name of the file to read # and analyze. def initialize(file) @original_file_text = if File.exists? file @file_name = file File.open(@file_name, 'r').read else @file_name = "" file end @file_text = ensure_trailing_newline(@original_file_text) @file_text = sub_line_ending_backslashes(@file_text) super @file_text @added_newline = @file_text != @original_file_text end # This kicks off the process of parsing the file and publishing events # as the events are discovered. def lex file_beg_changed notify_file_beg_observers(@file_name) super file_end_changed notify_file_end_observers(count_trailing_newlines(@original_file_text)) end def on_backref(token) log "BACKREF: '#{token}'" super(token) end # Called when the lexer matches the first ` in a `` statement (the second # matches :on_tstring_end; this may or may not be a Ruby bug). # # @param [String] token The token that the lexer matched. def on_backtick(token) log "BACKTICK: '#{token}'" super(token) end # Called when the lexer matches a comma. # # @param [String] token The token that the lexer matched. def on_comma(token) log "COMMA: #{token}" log "Line length: #{current_line_of_text.length}" comma_changed notify_comma_observers(current_line_of_text, lineno, column) super(token) end # Called when the lexer matches a #. The token includes the # as well as # the content after it. # # @param [String] token The token that the lexer matched. def on_comment(token) log "COMMENT: '#{token}'" l_token = Tailor::Lexer::Token.new(token) lexed_line = LexedLine.new(super, lineno) comment_changed notify_comment_observers(l_token, lexed_line, @file_text, lineno, column) super(token) end # Called when the lexer matches a constant (including class names, of # course). # # @param [String] token The token that the lexer matched. def on_const(token) log "CONST: '#{token}'" l_token = Tailor::Lexer::Token.new(token) lexed_line = LexedLine.new(super, lineno) const_changed notify_const_observers(l_token, lexed_line, lineno, column) super(token) end # Called when the lexer matches a class variable. # # @param [String] token The token that the lexer matched. def on_cvar(token) log "CVAR: '#{token}'" super(token) end # Called when the lexer matches the content inside a =begin/=end. # # @param [String] token The token that the lexer matched. def on_embdoc(token) log "EMBDOC: '#{token}'" super(token) end # Called when the lexer matches =begin. # # @param [String] token The token that the lexer matched. def on_embdoc_beg(token) log "EMBDOC_BEG: '#{token}'" super(token) end # Called when the lexer matches =end. # # @param [String] token The token that the lexer matched. def on_embdoc_end(token) log "EMBDOC_BEG: '#{token}'" super(token) end # Called when the lexer matches a #{. # # @param [String] token The token that the lexer matched. def on_embexpr_beg(token) log "EMBEXPR_BEG: '#{token}'" embexpr_beg_changed notify_embexpr_beg_observers super(token) end # Called when the lexer matches the } that closes a #{. Note that as of # MRI 1.9.3-p125, this never gets called. Logged as a bug and fixed, but # not yet released: https://bugs.ruby-lang.org/issues/6211. # # @param [String] token The token that the lexer matched. def on_embexpr_end(token) log "EMBEXPR_END: '#{token}'" embexpr_end_changed notify_embexpr_end_observers super(token) end def on_embvar(token) log "EMBVAR: '#{token}'" super(token) end # Called when the lexer matches a Float. # # @param [String] token The token that the lexer matched. def on_float(token) log "FLOAT: '#{token}'" super(token) end # Called when the lexer matches a global variable. # # @param [String] token The token that the lexer matched. def on_gvar(token) log "GVAR: '#{token}'" super(token) end # Called when the lexer matches the beginning of a heredoc. # # @param [String] token The token that the lexer matched. def on_heredoc_beg(token) log "HEREDOC_BEG: '#{token}'" super(token) end # Called when the lexer matches the end of a heredoc. # # @param [String] token The token that the lexer matched. def on_heredoc_end(token) log "HEREDOC_END: '#{token}'" super(token) end # Called when the lexer matches an identifier (method name, variable, the # text part of a Symbol, etc.). # # @param [String] token The token that the lexer matched. def on_ident(token) log "IDENT: '#{token}'" l_token = Tailor::Lexer::Token.new(token) lexed_line = LexedLine.new(super, lineno) ident_changed notify_ident_observers(l_token, lexed_line, lineno, column) super(token) end # Called when the lexer matches a Ruby ignored newline. Ignored newlines # occur when a newline is encountered, but the statement that was expressed # on that line was not completed on that line. # # @param [String] token The token that the lexer matched. def on_ignored_nl(token) log "IGNORED_NL" current_line = LexedLine.new(super, lineno) ignored_nl_changed notify_ignored_nl_observers(current_line, lineno, column) super(token) end # Called when the lexer matches an Integer. # # @param [String] token The token that the lexer matched. def on_int(token) log "INT: '#{token}'" super(token) end # Called when the lexer matches an instance variable. # # @param [String] token The token that the lexer matched. def on_ivar(token) log "IVAR: '#{token}'" super(token) end # Called when the lexer matches a Ruby keyword. # # @param [String] token The token that the lexer matched. def on_kw(token) log "KW: #{token}" current_line = LexedLine.new(super, lineno) l_token = Tailor::Lexer::Token.new(token, { loop_with_do: current_line.loop_with_do?, full_line_of_text: current_line_of_text } ) kw_changed notify_kw_observers(l_token, current_line, lineno, column) super(token) end # Called when the lexer matches a label (the first part in a non-rocket # style Hash). # # Example: # one: 1 # Matches one: # # @param [String] token The token that the lexer matched. def on_label(token) log "LABEL: '#{token}'" super(token) end # Called when the lexer matches a {. Note a #{ match calls # +#on_embexpr_beg+. # # @param [String] token The token that the lexer matched. def on_lbrace(token) log "LBRACE: '#{token}'" current_line = LexedLine.new(super, lineno) lbrace_changed notify_lbrace_observers(current_line, lineno, column) super(token) end # Called when the lexer matches a [. # # @param [String] token The token that the lexer matched. def on_lbracket(token) log "LBRACKET: '#{token}'" current_line = LexedLine.new(super, lineno) lbracket_changed notify_lbracket_observers(current_line, lineno, column) super(token) end # Called when the lexer matches a (. # # @param [String] token The token that the lexer matched. def on_lparen(token) log "LPAREN: '#{token}'" lparen_changed notify_lparen_observers(lineno, column) super(token) end # This is the first thing that exists on a new line--NOT the last! def on_nl(token) log "NL" current_line = LexedLine.new(super, lineno) nl_changed notify_nl_observers(current_line, lineno, column) super(token) end # Called when the lexer matches an operator. # # @param [String] token The token that the lexer matched. def on_op(token) log "OP: '#{token}'" super(token) end # Called when the lexer matches a period. # # @param [String] token The token that the lexer matched. def on_period(token) log "PERIOD: '#{token}'" period_changed notify_period_observers(current_line_of_text.length, lineno, column) super(token) end # Called when the lexer matches '%w'. Statement is ended by a # +:on_words_end+. # # @param [String] token The token that the lexer matched. def on_qwords_beg(token) log "QWORDS_BEG: '#{token}'" super(token) end # Called when the lexer matches a }. # # @param [String] token The token that the lexer matched. def on_rbrace(token) log "RBRACE: '#{token}'" current_line = LexedLine.new(super, lineno) rbrace_changed notify_rbrace_observers(current_line, lineno, column) super(token) end # Called when the lexer matches a ]. # # @param [String] token The token that the lexer matched. def on_rbracket(token) log "RBRACKET: '#{token}'" current_line = LexedLine.new(super, lineno) rbracket_changed notify_rbracket_observers(current_line, lineno, column) super(token) end # Called when the lexer matches the beginning of a Regexp. # # @param [String] token The token that the lexer matched. def on_regexp_beg(token) log "REGEXP_BEG: '#{token}'" super(token) end # Called when the lexer matches the end of a Regexp. # # @param [String] token The token that the lexer matched. def on_regexp_end(token) log "REGEXP_END: '#{token}'" super(token) end # Called when the lexer matches a ). # # @param [String] token The token that the lexer matched. def on_rparen(token) log "RPAREN: '#{token}'" current_line = LexedLine.new(super, lineno) rparen_changed notify_rparen_observers(current_line, lineno, column) super(token) end # Called when the lexer matches a ;. # # @param [String] token The token that the lexer matched. def on_semicolon(token) log "SEMICOLON: '#{token}'" super(token) end # Called when the lexer matches any type of space character. # # @param [String] token The token that the lexer matched. def on_sp(token) log "SP: '#{token}'; size: #{token.size}" l_token = Tailor::Lexer::Token.new(token) sp_changed notify_sp_observers(l_token, lineno, column) # Deal with lines that end with \ if token == "\\\n" current_line = LexedLine.new(super, lineno) ignored_nl_changed notify_ignored_nl_observers(current_line, lineno, column) end super(token) end # Called when the lexer matches the : at the beginning of a Symbol. # # @param [String] token The token that the lexer matched. def on_symbeg(token) log "SYMBEG: '#{token}'" super(token) end # Called when the lexer matches the -> as a lambda. # # @param [String] token The token that the lexer matched. def on_tlambda(token) log "TLAMBDA: '#{token}'" super(token) end # Called when the lexer matches the { that represents the beginning of a # -> lambda. # # @param [String] token The token that the lexer matched. def on_tlambeg(token) log "TLAMBEG: '#{token}'" super(token) end # Called when the lexer matches the beginning of a String. # # @param [String] token The token that the lexer matched. def on_tstring_beg(token) log "TSTRING_BEG: '#{token}'" current_line = LexedLine.new(super, lineno) tstring_beg_changed notify_tstring_beg_observers(current_line, lineno) super(token) end # Called when the lexer matches the content of any String. # # @param [String] token The token that the lexer matched. def on_tstring_content(token) log "TSTRING_CONTENT: '#{token}'" super(token) end # Called when the lexer matches the end of a String. # # @param [String] token The token that the lexer matched. def on_tstring_end(token) log "TSTRING_END: '#{token}'" tstring_end_changed notify_tstring_end_observers(lineno) super(token) end # Called when the lexer matches '%W'. # # @param [String] token The token that the lexer matched. def on_words_beg(token) log "WORDS_BEG: '#{token}'" super(token) end # Called when the lexer matches the separators in a %w or %W (by default, # this is a single space). # # @param [String] token The token that the lexer matched. def on_words_sep(token) log "WORDS_SEP: '#{token}'" super(token) end # Called when the lexer matches __END__. # # @param [String] token The token that the lexer matched. def on___end__(token) log "__END__: '#{token}'" super(token) end # Called when the lexer matches CHAR. # # @param [String] token The token that the lexer matched. def on_CHAR(token) log "CHAR: '#{token}'" super(token) end # The current line of text being examined. # # @return [String] The current line of text. def current_line_of_text @file_text.split("\n").at(lineno - 1) || '' end # Counts the number of newlines at the end of the file. # # @param [String] text The file's text. # @return [Fixnum] The number of \n at the end of the file. def count_trailing_newlines(text) if text.end_with? "\n" count = 0 text.reverse.chars do |c| if c == "\n" count += 1 else break end end count else 0 end end # Adds a newline to the end of the test if one doesn't exist. Without doing # this, Ripper won't trigger a newline event for the last line of the file, # which is required for some rulers to do their thing. # # @param [String] file_text The text to check. # @return [String] The file text with a newline at the end. def ensure_trailing_newline(file_text) count_trailing_newlines(file_text) > 0 ? file_text : (file_text + "\n") end #--------------------------------------------------------------------------- # Privates! #--------------------------------------------------------------------------- private # Used internally as part of the hack to deal with Ripper's lack of dealing # with line-ending backslashes that break up statements. # # @param [String] file_text The file test to check. # @param [String] The altered file text. def sub_line_ending_backslashes(file_text) backslash_replacement = "# TAILOR REMOVED BACKSLASH" file_text.gsub!(/\\\s*\n?$/, backslash_replacement) file_text end def log(*args) l = begin; lineno; rescue; ""; end c = begin; column; rescue; ""; end subclass_name = self.class.to_s.sub(/^Tailor::/, '') args.first.insert(0, "<#{subclass_name}> #{l}[#{c}]: ") Tailor::Logger.log(*args) end end end