# frozen_string_literal: true require 'fileutils' require 'astrolabe/builder' require_relative 'fast/rewriter' # suppress output to avoid parser gem warnings' def suppress_output original_stdout = $stdout.clone original_stderr = $stderr.clone $stderr.reopen File.new('/dev/null', 'w') $stdout.reopen File.new('/dev/null', 'w') yield ensure $stdout.reopen original_stdout $stderr.reopen original_stderr end suppress_output do require 'parser' require 'parser/current' end # Fast is a tool to help you search in the code through the Abstract Syntax Tree module Fast # Literals are shortcuts allowed inside {ExpressionParser} LITERAL = { '...' => ->(node) { node&.children&.any? }, '_' => ->(node) { !node.nil? }, 'nil' => nil }.freeze # Allowed tokens in the node pattern domain TOKENIZER = %r/ [\+\-\/\*\\!] # operators or negation | ===? # == or === | \d+\.\d* # decimals and floats | "[^"]+" # strings | _ # something not nil: match | \.{3} # a node with children: ... | \[|\] # square brackets `[` and `]` for all | \^ # node has children with | \? # maybe expression | [\d\w_]+[=\\!\?]? # method names or numbers | \(|\) # parens `(` and `)` for tuples | \{|\} # curly brackets `{` and `}` for any | \$ # capture | \#\w[\d\w_]+[\\!\?]? # custom method call | \.\w[\d\w_]+\? # instance method call | \\\d # find using captured expression | %\d # bind extra arguments to the expression /x.freeze # Set some convention methods from file. class Node < Astrolabe::Node # @return [String] with path of the file or simply buffer name. def buffer_name expression.source_buffer.name end # @return [Parser::Source::Range] from the expression def expression location.expression end # @return [String] with the content of the #expression def source expression.source end # @return [Boolean] true if a file exists with the #buffer_name def from_file? File.exist?(buffer_name) end # @return [Array] with authors from the current expression range def blame_authors `git blame -L #{expression.first_line},#{expression.last_line} #{buffer_name}`.lines.map do |line| line.split('(')[1].split(/\d+/).first.strip end end # @return [String] with the first element from #blame_authors def author blame_authors.first end # Search recursively into a node and its children using a pattern. # @param [String] pattern # @param [Array] *args extra arguments to interpolate in the pattern. # @return [Array>] with files and results def search(pattern, *args) Fast.search(pattern, self, *args) end # Captures elements from search recursively # @param [String] pattern # @param [Array] *args extra arguments to interpolate in the pattern. # @return [Array>] with files and results def capture(pattern, *args) Fast.capture(pattern, self, *args) end end # Custom builder allow us to set a buffer name for each Node class Builder < Astrolabe::Builder attr_writer :buffer_name # Generates {Node} from the given information. # # @return [Node] the generated node def n(type, children, source_map) Node.new(type, children, location: source_map, buffer_name: @buffer_name) end end class << self # @return [Fast::Node] from the parsed content # @example # Fast.ast("1") # => s(:int, 1) # Fast.ast("a.b") # => s(:send, s(:send, nil, :a), :b) def ast(content, buffer_name: '(string)') buffer = Parser::Source::Buffer.new(buffer_name) buffer.source = content Parser::CurrentRuby.new(builder_for(buffer_name)).parse(buffer) end def builder_for(buffer_name) builder = Builder.new builder.buffer_name = buffer_name builder end # @return [Fast::Node] parsed from file content # caches the content based on the filename. # Also, it can parse SQL files. # @example # Fast.ast_from_file("example.rb") # => s(...) def ast_from_file(file) @cache ||= {} @cache[file] ||= begin method = if file.end_with?('.sql') require_relative 'fast/sql' unless respond_to?(:parse_sql) :parse_sql else :ast end Fast.public_send(method, IO.read(file), buffer_name: file) end end # Verify if a given AST matches with a specific pattern # @return [Boolean] case matches ast with the current expression # @example # Fast.match?("int", Fast.ast("1")) # => true def match?(pattern, ast, *args) Matcher.new(pattern, ast, *args).match? end # Search with pattern directly on file # @return [Array] that matches the pattern def search_file(pattern, file) node = ast_from_file(file) return [] unless node case node when Array node.map { |n| search(pattern, n) }.flatten.compact else search pattern, node end end # Search with pattern on a directory or multiple files # @param [String] pattern # @param [Array] *locations where to search. Default is '.' # @return [Hash>] with files and results def search_all(pattern, locations = ['.'], parallel: true, on_result: nil) group_results(build_grouped_search(:search_file, pattern, on_result), locations, parallel: parallel) end # Capture with pattern on a directory or multiple files # @param [String] pattern # @param [Array] locations where to search. Default is '.' # @return [Hash] with files and captures def capture_all(pattern, locations = ['.'], parallel: true, on_result: nil) group_results(build_grouped_search(:capture_file, pattern, on_result), locations, parallel: parallel) end # @return [Proc] binding `pattern` argument from a given `method_name`. # @param [Symbol] method_name with `:capture_file` or `:search_file` # @param [String] pattern to match in a search to any file # @param [Proc] on_result is a callback that can be notified soon it matches def build_grouped_search(method_name, pattern, on_result) search_pattern = method(method_name).curry.call(pattern) proc do |file| results = search_pattern.call(file) next if results.nil? || results.empty? on_result&.(file, results) { file => results } end end # Compact grouped results by file allowing parallel processing. # @param [Proc] group_files allows to define a search that can be executed # parallel or not. # @param [Proc] on_result allows to define a callback for fast feedback # while it process several locations in parallel. # @param [Boolean] parallel runs the `group_files` in parallel # @return [Hash[String, Array]] with files and results def group_results(group_files, locations, parallel: true) files = ruby_files_from(*locations) if parallel require 'parallel' unless defined?(Parallel) Parallel.map(files, &group_files) else files.map(&group_files) end.compact.inject(&:merge!) end # Capture elements from searches in files. Keep in mind you need to use `$` # in the pattern to make it work. # @return [Array] captured from the pattern matched in the file def capture_file(pattern, file) node = ast_from_file(file) return [] unless node case node when Array node.map { |n| capture(pattern, n) }.flatten.compact else capture pattern, node end end # Search recursively into a node and its children. # If the node matches with the pattern it returns the node, # otherwise it recursively collect possible children nodes # @yield node and capture if block given def search(pattern, node, *args) if (match = match?(pattern, node, *args)) yield node, match if block_given? match != true ? [node, match] : [node] else case node when Array node.flat_map { |child| search(pattern, child, *args) } else node.each_child_node .flat_map { |child| search(pattern, child, *args) } .compact.flatten end end end # Only captures from a search # @return [Array] with all captured elements. def capture(pattern, node) if (match = match?(pattern, node)) match == true ? node : match else node.each_child_node .flat_map { |child| capture(pattern, child) } .compact.flatten end end def expression(string) ExpressionParser.new(string).parse end attr_accessor :debugging # Utility function to inspect search details using debug block. # # It prints output of all matching cases. # # @example # Fast.debug do # Fast.match?([:int, 1], s(:int, 1)) # end # int == (int 1) # => true # 1 == 1 # => true def debug return yield if debugging self.debugging = true result = nil Find.class_eval do alias_method :original_match_recursive, :match_recursive alias_method :match_recursive, :debug_match_recursive result = yield alias_method :match_recursive, :original_match_recursive # rubocop:disable Lint/DuplicateMethods end self.debugging = false result end # @return [Array] with all ruby files from arguments. # @param files can be file paths or directories. # When the argument is a folder, it recursively fetches all `.rb` files from it. def ruby_files_from(*files) dir_filter = File.method(:directory?) directories = files.select(&dir_filter) if directories.any? files -= directories files |= directories.flat_map { |dir| Dir["#{dir}/**/*.rb"] } files.uniq! end files.reject(&dir_filter) end # Extracts a node pattern expression from a given node supressing identifiers and primitive types. # Useful to index abstract patterns or similar code structure. # @see https://jonatas.github.io/fast/similarity_tutorial/ # @return [String] with an pattern to search from it. # @param node [Fast::Node] # @example # Fast.expression_from(Fast.ast('1')) # => '(int _)' # Fast.expression_from(Fast.ast('a = 1')) # => '(lvasgn _ (int _))' # Fast.expression_from(Fast.ast('def name; person.name end')) # => '(def _ (args) (send (send nil _) _))' def expression_from(node) case node when Parser::AST::Node children_expression = node.children.map(&method(:expression_from)).join(' ') "(#{node.type}#{" #{children_expression}" if node.children.any?})" when nil, 'nil' 'nil' when Symbol, String, Numeric '_' end end end # ExpressionParser empowers the AST search in Ruby. # All classes inheriting Fast::Find have a grammar shortcut that is processed here. # # @example find a simple int node # Fast.expression("int") # # => # # @example parens make the expression an array of Fast::Find and children classes # Fast.expression("(int _)") # # => [#, #] # @example not int token # Fast.expression("!int") # # => #> # @example int or float token # Fast.expression("{int float}") # # => #, # # # # # #]> # @example capture something not nil # Fast.expression("$_") # # => #> # @example capture a hash with keys that all are not string and not symbols # Fast.expression("(hash (pair ([!sym !str] _))") # # => [#, # # [#, # # [#>, # # #>]>, # # #]]]")") # @example of match using string expression # Fast.match?(Fast.ast("{1 => 1}"),"(hash (pair ([!sym !str] _))") => true")") class ExpressionParser # @param expression [String] def initialize(expression) @tokens = expression.scan TOKENIZER end # rubocop:disable Metrics/CyclomaticComplexity # rubocop:disable Metrics/AbcSize # rubocop:disable Metrics/MethodLength def parse case (token = next_token) when '(' then parse_until_peek(')') when '{' then Any.new(parse_until_peek('}')) when '[' then All.new(parse_until_peek(']')) when /^"/ then FindString.new(token[1..-2]) when /^#\w/ then MethodCall.new(token[1..]) when /^\.\w[\w\d_]+\?/ then InstanceMethodCall.new(token[1..]) when '$' then Capture.new(parse) when '!' then (@tokens.any? ? Not.new(parse) : Find.new(token)) when '?' then Maybe.new(parse) when '^' then Parent.new(parse) when '\\' then FindWithCapture.new(parse) when /^%\d/ then FindFromArgument.new(token[1..]) else Find.new(token) end end # rubocop:enable Metrics/CyclomaticComplexity # rubocop:enable Metrics/AbcSize # rubocop:enable Metrics/MethodLength private def next_token @tokens.shift end def parse_until_peek(token) list = [] list << parse until @tokens.empty? || @tokens.first == token next_token list end end # Find is the top level class that respond to #match?(node) interface. # It matches recurively and check deeply depends of the token type. class Find attr_accessor :token def initialize(token) self.token = token end def match?(node) match_recursive(valuate(token), node) end def match_recursive(expression, node) case expression when Proc then expression.call(node) when Find then expression.match?(node) when Symbol then compare_symbol_or_head(expression, node) when Enumerable expression.each_with_index.all? do |exp, i| match_recursive(exp, i.zero? ? node : node.children[i - 1]) end else node == expression end end def compare_symbol_or_head(expression, node) case node when Parser::AST::Node node.type == expression.to_sym when String node == expression.to_s when TrueClass expression == :true when FalseClass expression == :false else node == expression end end def debug_match_recursive(expression, node) match = original_match_recursive(expression, node) debug(expression, node, match) match end def debug(expression, node, match) puts "#{expression} == #{node} # => #{match}" end def to_s "f[#{[*token].map(&:to_s).join(', ')}]" end def ==(other) return false if other.nil? || !other.respond_to?(:token) token == other.token end private def valuate(token) if token.is_a?(String) return valuate(LITERAL[token]) if LITERAL.key?(token) typecast_value(token) else token end end def typecast_value(token) case token when /^\d+\.\d*/ then token.to_f when /^\d+/ then token.to_i else token.to_sym end end end # Find literal strings using double quotes class FindString < Find def initialize(token) @token = token end def match?(node) node == token end end # Find using custom methods class MethodCall < Find def initialize(method_name) @method_name = method_name end def match?(node) Kernel.send(@method_name, node) end end # Search using custom instance methods class InstanceMethodCall < Find def initialize(method_name) @method_name = method_name end def match?(node) node.send(@method_name) end end # Allow use previous captures while searching in the AST. # Use `\\1` to point the match to the first captured element # or sequential numbers considering the order of the captures. # # @example check comparision of integers that will always return true # ast = Fast.ast("1 == 1") => s(:send, s(:int, 1), :==, s(:int, 1)) # Fast.match?("(send $(int _) == \1)", ast) # => [s(:int, 1)] class FindWithCapture < Find attr_writer :previous_captures def initialize(token) token = token.token if token.respond_to?(:token) raise 'You must use captures!' unless token @capture_index = token.to_i end def match?(node) node == @previous_captures[@capture_index - 1] end def to_s "fc[\\#{@capture_index}]" end end # Allow the user to interpolate expressions from external stuff. # Use `%1` in the expression and the Matcher#prepare_arguments will # interpolate the argument in the expression. # @example interpolate the node value 1 # Fast.match?("(int %1)", Fast.ast("1"), 1) # => true # Fast.match?("(int %1)", Fast.ast("1"), 2) # => false # @example interpolate multiple arguments # Fast.match?("(%1 %2)", Fast.ast("1"), :int, 1) # => true class FindFromArgument < Find attr_writer :arguments def initialize(token) token = token.token if token.respond_to?(:token) raise 'You must define index' unless token @capture_argument = token.to_i - 1 raise 'Arguments start in one' if @capture_argument.negative? end def match?(node) raise 'You must define arguments to match' unless @arguments compare_symbol_or_head @arguments[@capture_argument], node end def to_s "find_with_arg[\\#{@capture_argument}]" end end # Capture some expression while searching for it. # # The captures behaves exactly like Fast::Find and the only difference is that # when it {#match?} stores #captures for future usage. # # @example capture int node # capture = Fast::Capture.new("int") => # # capture.match?(Fast.ast("1")) # => [s(:int, 1)] # # @example binding directly in the Fast.expression # Fast.match?(Fast.ast("1"), "(int $_)") # => [1] # # @example capture the value of a local variable assignment # (${int float} _) # @example expression to capture only the node type # (${int float} _) # @example expression to capture entire node # $({int float} _) # @example expression to capture only the node value of int or float nodes # ({int float} $_) # @example expression to capture both node type and value # ($_ $_) # # You can capture stuff in multiple levels and # build expressions that reference captures with Fast::FindWithCapture. class Capture < Find # Stores nodes that matches with the current expression. attr_reader :captures def initialize(token) super @captures = [] end # Append the matching node to {#captures} if it matches def match?(node) @captures << node if super end def to_s "c[#{token}]" end end # Sometimes you want to check some children but get the parent element, # for such cases, parent can be useful. # Example: You're searching for `int` usages in your code. # But you don't want to check the integer itself, but who is using it: # `^^(int _)` will give you the variable being assigned or the expression being used. class Parent < Find alias match_node match? def match?(node) node.each_child_node.any?(&method(:match_node)) end def to_s "^#{token}" end end # Matches any of the internal expressions. Works like a **OR** condition. # @example Matchig int or float # Fast.expression("{int float}") class Any < Find def match?(node) token.any? { |expression| Fast.match?(expression, node) } end def to_s "any[#{token.map(&:to_s).join(', ')}]" end end # Intersect expressions. Works like a **AND** operator. class All < Find def match?(node) token.all? { |expression| expression.match?(node) } end def to_s "all[#{token}]" end end # Negates the current expression # `!int` is equilvalent to "not int" class Not < Find def match?(node) !super end end # True if the node does not exist # When exists, it should match. class Maybe < Find def match?(node) node.nil? || super end end # Joins the AST and the search expression to create a complete matcher that # recusively check if the node pattern expression matches with the given AST. # ### Using captures # # One of the most important features of the matcher is find captures and also # bind them on demand in case the expression is using previous captures. # # @example simple match # ast = Fast.ast("a = 1") # expression = Fast.expression("(lvasgn _ (int _))") # Matcher.new(expression, ast).match? # true # # @example simple capture # ast = Fast.ast("a = 1") # expression = Fast.expression("(lvasgn _ (int $_))") # Matcher.new(expression, ast).match? # => [1] # class Matcher def initialize(pattern, ast, *args) @ast = ast @expression = if pattern.is_a?(String) Fast.expression(pattern) else [*pattern].map(&Find.method(:new)) end @captures = [] prepare_arguments(@expression, args) if args.any? end # @return [true] if the @param ast recursively matches with expression. # @return #find_captures case matches def match?(expression = @expression, ast = @ast) head, *tail_expression = expression return false unless head.match?(ast) return find_captures if tail_expression.empty? match_tail?(tail_expression, ast.children) end # @return [true] if all children matches with tail def match_tail?(tail, child) tail.each_with_index.all? do |token, i| prepare_token(token) token.is_a?(Array) ? match?(token, child[i]) : token.match?(child[i]) end && find_captures end # Look recursively into @param expression to check if the expression is have # captures. # @return [true] if any sub expression have captures. def captures?(expression = @expression) case expression when Capture then true when Array then expression.any?(&method(:captures?)) when Find then captures?(expression.token) end end # Find search captures recursively. # # @return [Array] of captures from the expression # @return [true] in case of no captures in the expression # @see Fast::Capture # @see Fast::FindFromArgument def find_captures(expression = @expression) return true if expression == @expression && !captures?(expression) case expression when Capture then expression.captures when Array then expression.flat_map(&method(:find_captures)).compact when Find then find_captures(expression.token) end end private # Prepare arguments case the expression needs to bind extra arguments. # @return [void] def prepare_arguments(expression, arguments) case expression when Array expression.each do |item| prepare_arguments(item, arguments) end when Fast::FindFromArgument expression.arguments = arguments when Fast::Find prepare_arguments expression.token, arguments end end # Prepare token with previous captures # @param [FindWithCapture] token set the current captures # @return [void] # @see [FindWithCapture#previous_captures] def prepare_token(token) case token when Fast::FindWithCapture token.previous_captures = find_captures end end end end