module Linguist # A collection of simple heuristics that can be used to better analyze languages. class Heuristics # Public: Use heuristics to detect language of the blob. # # blob - An object that quacks like a blob. # possible_languages - Array of Language objects # # Examples # # Heuristics.call(FileBlob.new("path/to/file"), [ # Language["Ruby"], Language["Python"] # ]) # # Returns an Array of languages, or empty if none matched or were inconclusive. def self.call(blob, languages) data = blob.data @heuristics.each do |heuristic| return Array(heuristic.call(data)) if heuristic.matches?(languages) end [] # No heuristics matched end # Internal: Define a new heuristic. # # languages - String names of languages to disambiguate. # heuristic - Block which takes data as an argument and returns a Language or nil. # # Examples # # disambiguate "Perl", "Prolog" do |data| # if data.include?("use strict") # Language["Perl"] # elsif data.include?(":-") # Language["Prolog"] # end # end # def self.disambiguate(*languages, &heuristic) @heuristics << new(languages, &heuristic) end # Internal: Array of defined heuristics @heuristics = [] # Internal def initialize(languages, &heuristic) @languages = languages @heuristic = heuristic end # Internal: Check if this heuristic matches the candidate languages. def matches?(candidates) candidates.any? && candidates.all? { |l| @languages.include?(l.name) } end # Internal: Perform the heuristic def call(data) @heuristic.call(data) end disambiguate "Objective-C", "C++", "C" do |data| if (/@(interface|class|protocol|property|end|synchronised|selector|implementation)\b/.match(data)) Language["Objective-C"] elsif (/^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>/.match(data) || /^\s*template\s*