lib/rouge/lexer.rb in rouge-0.2.1 vs lib/rouge/lexer.rb in rouge-0.2.2

- old
+ new

@@ -23,10 +23,44 @@ # Given a string, return the correct lexer class. def find(name) registry[name.to_s] end + # Find a lexer, with fancy shiny features. + # + # * The string you pass can include CGI-style options + # + # Lexer.find_fancy('erb?parent=tex') + # + # * You can pass the special name 'guess' so we guess for you, + # and you can pass a second argument of the code to guess by + # + # Lexer.find_fancy('guess', "#!/bin/bash\necho Hello, world") + # + # This is used in the Redcarpet plugin as well as Rouge's own + # markdown lexer for highlighting internal code blocks. + # + def find_fancy(str, code=nil) + name, opts = str ? str.split('?', 2) : [nil, ''] + + # parse the options hash from a cgi-style string + opts = CGI.parse(opts || '').map do |k, vals| + [ k.to_sym, vals.empty? ? true : vals[0] ] + end + + opts = Hash[opts] + + lexer_class = case name + when 'guess', nil + self.guess(:source => code, :mimetype => opts[:mimetype]) + when String + self.find(name) + end + + lexer_class && lexer_class.new(opts) + end + # Specify or get this lexer's description. def desc(arg=:absent) if arg == :absent @desc else @@ -73,16 +107,18 @@ def guess_by_filename(fname) fname = File.basename(fname) registry.values.detect do |lexer| lexer.filenames.any? do |pattern| - File.fnmatch?(pattern, fname) + File.fnmatch?(pattern, fname, File::FNM_DOTMATCH) end end end def guess_by_source(source) + assert_utf8!(source) + source = TextAnalyzer.new(source) best_result = 0 best_match = nil registry.values.each do |lexer| @@ -153,10 +189,19 @@ # end def mimetypes(*mts) (@mimetypes ||= []).concat(mts) end + # @private + def assert_utf8!(str) + return if %w(US-ASCII UTF-8).include? str.encoding.name + raise EncodingError.new( + "Bad encoding: #{str.encoding.names.join(',')}. " + + "Please convert your string to UTF-8." + ) + end + private def registry @registry ||= {} end end @@ -215,12 +260,15 @@ # @option opts :continue # Continue the lex from the previous state (i.e. don't call #reset!) def lex(string, opts={}, &b) return enum_for(:lex, string) unless block_given? + Lexer.assert_utf8!(string) + reset! unless opts[:continue] + # consolidate consecutive tokens of the same type last_token = nil last_val = nil stream_tokens(StringScanner.new(string)) do |tok, val| next if val.empty? @@ -233,9 +281,14 @@ last_token = tok last_val = val end b.call(last_token, last_val) if last_token + end + + # delegated to {Lexer.tag} + def tag + self.class.tag end # @abstract # # Yield `[token, chunk]` pairs, given a prepared input stream. This