lib/rouge/lexer.rb in rouge-1.11.0 vs lib/rouge/lexer.rb in rouge-1.11.1
- old
+ new
@@ -107,30 +107,21 @@
# This accepts the same arguments as Lexer.guess, but will never throw
# an error. It will return a (possibly empty) list of potential lexers
# to use.
def guesses(info={})
mimetype, filename, source = info.values_at(:mimetype, :filename, :source)
- lexers = registry.values.uniq
- total_size = lexers.size
+ custom_globs = info[:custom_globs]
- lexers = filter_by_mimetype(lexers, mimetype) if mimetype
- return lexers if lexers.size == 1
+ guessers = (info[:guessers] || []).dup
- lexers = filter_by_filename(lexers, filename) if filename
- return lexers if lexers.size == 1
+ guessers << Guessers::Mimetype.new(mimetype) if mimetype
+ guessers << Guessers::GlobMapping.by_pairs(custom_globs, filename) if custom_globs && filename
+ guessers << Guessers::Filename.new(filename) if filename
+ guessers << Guessers::Modeline.new(source) if source
+ guessers << Guessers::Source.new(source) if source
- if source
- # If we're filtering against *all* lexers, we only use confident return
- # values from analyze_text. But if we've filtered down already, we can trust
- # the analysis more.
- source_threshold = lexers.size < total_size ? 0 : 0.5
- return [best_by_source(lexers, source, source_threshold)].compact
- elsif lexers.size < total_size
- return lexers
- else
- return []
- end
+ Guesser.guess(guessers, Lexer.all)
end
class AmbiguousGuess < StandardError
attr_reader :alternatives
def initialize(alternatives); @alternatives = alternatives; end
@@ -173,74 +164,9 @@
def guess_by_source(source)
guess :source => source
end
private
- def filter_by_mimetype(lexers, mt)
- filtered = lexers.select { |lexer| lexer.mimetypes.include? mt }
- filtered.any? ? filtered : lexers
- end
-
- # returns a list of lexers that match the given filename with
- # equal specificity (i.e. number of wildcards in the pattern).
- # This helps disambiguate between, e.g. the Nginx lexer, which
- # matches `nginx.conf`, and the Conf lexer, which matches `*.conf`.
- # In this case, nginx will win because the pattern has no wildcards,
- # while `*.conf` has one.
- def filter_by_filename(lexers, fname)
- fname = File.basename(fname)
-
- out = []
- best_seen = nil
- lexers.each do |lexer|
- score = lexer.filenames.map do |pattern|
- if File.fnmatch?(pattern, fname, File::FNM_DOTMATCH)
- # specificity is better the fewer wildcards there are
- pattern.scan(/[*?\[]/).size
- end
- end.compact.min
-
- next unless score
-
- if best_seen.nil? || score < best_seen
- best_seen = score
- out = [lexer]
- elsif score == best_seen
- out << lexer
- end
- end
-
- out.any? ? out : lexers
- end
-
- def best_by_source(lexers, source, threshold=0)
- source = case source
- when String
- source
- when ->(s){ s.respond_to? :read }
- source.read
- else
- raise 'invalid source'
- end
-
- assert_utf8!(source)
-
- source = TextAnalyzer.new(source)
-
- best_result = threshold
- best_match = nil
- lexers.each do |lexer|
- result = lexer.analyze_text(source) || 0
- return lexer if result == 1
-
- if result > best_result
- best_match = lexer
- best_result = result
- end
- end
-
- best_match
- end
protected
# @private
def register(name, lexer)
registry[name.to_s] = lexer