lib/rouge/lexer.rb in rouge-0.2.1 vs lib/rouge/lexer.rb in rouge-0.2.2
- old
+ new
@@ -23,10 +23,44 @@
# Given a string, return the correct lexer class.
def find(name)
registry[name.to_s]
end
+ # Find a lexer, with fancy shiny features.
+ #
+ # * The string you pass can include CGI-style options
+ #
+ # Lexer.find_fancy('erb?parent=tex')
+ #
+ # * You can pass the special name 'guess' so we guess for you,
+ # and you can pass a second argument of the code to guess by
+ #
+ # Lexer.find_fancy('guess', "#!/bin/bash\necho Hello, world")
+ #
+ # This is used in the Redcarpet plugin as well as Rouge's own
+ # markdown lexer for highlighting internal code blocks.
+ #
+ def find_fancy(str, code=nil)
+ name, opts = str ? str.split('?', 2) : [nil, '']
+
+ # parse the options hash from a cgi-style string
+ opts = CGI.parse(opts || '').map do |k, vals|
+ [ k.to_sym, vals.empty? ? true : vals[0] ]
+ end
+
+ opts = Hash[opts]
+
+ lexer_class = case name
+ when 'guess', nil
+ self.guess(:source => code, :mimetype => opts[:mimetype])
+ when String
+ self.find(name)
+ end
+
+ lexer_class && lexer_class.new(opts)
+ end
+
# Specify or get this lexer's description.
def desc(arg=:absent)
if arg == :absent
@desc
else
@@ -73,16 +107,18 @@
def guess_by_filename(fname)
fname = File.basename(fname)
registry.values.detect do |lexer|
lexer.filenames.any? do |pattern|
- File.fnmatch?(pattern, fname)
+ File.fnmatch?(pattern, fname, File::FNM_DOTMATCH)
end
end
end
def guess_by_source(source)
+ assert_utf8!(source)
+
source = TextAnalyzer.new(source)
best_result = 0
best_match = nil
registry.values.each do |lexer|
@@ -153,10 +189,19 @@
# end
def mimetypes(*mts)
(@mimetypes ||= []).concat(mts)
end
+ # @private
+ def assert_utf8!(str)
+ return if %w(US-ASCII UTF-8).include? str.encoding.name
+ raise EncodingError.new(
+ "Bad encoding: #{str.encoding.names.join(',')}. " +
+ "Please convert your string to UTF-8."
+ )
+ end
+
private
def registry
@registry ||= {}
end
end
@@ -215,12 +260,15 @@
# @option opts :continue
# Continue the lex from the previous state (i.e. don't call #reset!)
def lex(string, opts={}, &b)
return enum_for(:lex, string) unless block_given?
+ Lexer.assert_utf8!(string)
+
reset! unless opts[:continue]
+ # consolidate consecutive tokens of the same type
last_token = nil
last_val = nil
stream_tokens(StringScanner.new(string)) do |tok, val|
next if val.empty?
@@ -233,9 +281,14 @@
last_token = tok
last_val = val
end
b.call(last_token, last_val) if last_token
+ end
+
+ # delegated to {Lexer.tag}
+ def tag
+ self.class.tag
end
# @abstract
#
# Yield `[token, chunk]` pairs, given a prepared input stream. This