lib/linguist/heuristics.rb in github-linguist-5.3.1 vs lib/linguist/heuristics.rb in github-linguist-5.3.2

- old
+ new

@@ -1,8 +1,10 @@ module Linguist # A collection of simple heuristics that can be used to better analyze languages. class Heuristics + HEURISTICS_CONSIDER_BYTES = 50 * 1024 + # Public: Use heuristics to detect language of the blob. # # blob - An object that quacks like a blob. # possible_languages - Array of Language objects # @@ -12,11 +14,11 @@ # Language["Ruby"], Language["Python"] # ]) # # Returns an Array of languages, or empty if none matched or were inconclusive. def self.call(blob, candidates) - data = blob.data + data = blob.data[0...HEURISTICS_CONSIDER_BYTES] @heuristics.each do |heuristic| if heuristic.matches?(blob.name, candidates) return Array(heuristic.call(data)) end @@ -70,10 +72,18 @@ @heuristic.call(data) end # Common heuristics ObjectiveCRegex = /^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])/ + CPlusPlusRegex = Regexp.union( + /^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>/, + /^\s*template\s*</, + /^[ \t]*try/, + /^[ \t]*catch\s*\(/, + /^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+/, + /^[ \t]*(private|public|protected):$/, + /std::\w+/) disambiguate ".as" do |data| if /^\s*(package\s+[a-z0-9_\.]+|import\s+[a-zA-Z0-9_\.]+;|class\s+[A-Za-z0-9_]+\s+extends\s+[A-Za-z0-9_]+)/.match(data) Language["ActionScript"] else @@ -217,12 +227,11 @@ end disambiguate ".h" do |data| if ObjectiveCRegex.match(data) Language["Objective-C"] - elsif (/^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>/.match(data) || - /^\s*template\s*</.match(data) || /^[ \t]*try/.match(data) || /^[ \t]*catch\s*\(/.match(data) || /^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+/.match(data) || /^[ \t]*(private|public|protected):$/.match(data) || /std::\w+/.match(data)) + elsif CPlusPlusRegex.match(data) Language["C++"] end end disambiguate ".inc" do |data| @@ -356,13 +365,13 @@ Language["Perl 6"] end end disambiguate ".pm" do |data| - if /^\s*(?:use\s+v6\s*;|(?:\bmy\s+)?class|module)\b/.match(data) - Language["Perl 6"] - elsif /\buse\s+(?:strict\b|v?5\.)/.match(data) + if /\buse\s+(?:strict\b|v?5\.)/.match(data) Language["Perl"] + elsif /^\s*(?:use\s+v6\s*;|(?:\bmy\s+)?class|module)\b/.match(data) + Language["Perl 6"] elsif /^\s*\/\* XPM \*\//.match(data) Language["XPM"] end end