# ~*~ encoding: utf-8 ~*~ # Code # # Handle code blocks: # - in the extractstep, extract them so they don't get rendered by the Render filter # - in the process step, apply highlighting and wrapping and reinsert them into the document class Gollum::Filter::Code < Gollum::Filter # The @language_handlers Hash can be filled with Regep keys and corresponding Proc values. The Procs will be executed to handle a codeblock whose language definition matches the key. # See the Code Filter tests for an example # Use the Gollum::Filter::Code.language_handlers method to access and modify this class instance variable @language_handlers = {} class << self attr_accessor :language_handlers end def extract(data) case @markup.format when :asciidoc data.gsub!(/^(\[source,([^\r\n]*)\]\n)?----\n(.+?)\n----$/m) do cache_codeblock(Regexp.last_match[2], Regexp.last_match[3]) end when :org org_headers = %r{([ \t]*#\+HEADER[S]?:[^\r\n]*\n)*} org_name = %r{([ \t]*#\+NAME:[^\r\n]*\n)?} org_lang = %r{[ ]*([^\n \r]*)[ ]*[^\r\n]*} org_begin = %r{([ \t]*)#\+BEGIN_SRC#{org_lang}\r?\n} org_end = %r{\r?\n[ \t]*#\+END_SRC[ \t\r]*} data.gsub!(/^#{org_headers}#{org_name}#{org_begin}(.+?)#{org_end}$/mi) do "#{Regexp.last_match[3]}#{cache_codeblock(Regexp.last_match[4], Regexp.last_match[5])}" end when :markdown data.gsub!(/^([ ]{0,3})(~~~+) ?([^\r\n]+)?\r?\n(.+?)\r?\n[ ]{0,3}(~~~+)[ \t\r]*$/m) do m_indent = Regexp.last_match[1] m_start = Regexp.last_match[2] # ~~~ m_lang = Regexp.last_match[3] m_code = Regexp.last_match[4] m_end = Regexp.last_match[5] # ~~~ # The closing code fence must be at least as long as the opening fence next '' if m_end.length < m_start.length lang = m_lang ? m_lang.strip.split.first : nil "#{m_indent}#{cache_codeblock(lang, m_code, m_indent)}" end end data.gsub!(/^([ ]{0,3})``` ?([^\r\n]+)?\r?\n(.+?)\r?\n[ ]{0,3}```[ \t]*\r?$/m) do "#{Regexp.last_match[1]}#{cache_codeblock(Regexp.last_match[2].to_s.strip, Regexp.last_match[3], Regexp.last_match[1])}" # print the SHA1 ID with the proper indentation end data end # Process all code from the codemap and replace the placeholders with the # final HTML. # # data - The String data (with placeholders). # encoding - Encoding Constant or String. # # Returns the marked up String data. def process(data) return data if data.nil? || data.size.zero? || @map.size.zero? blocks = [] @map.each do |_id, spec| next if spec[:output] # cached code = spec[:code] remove_leading_space(code, /^#{spec[:indent]}/m) remove_leading_space(code, /^( |\t)/m) blocks << [spec[:lang], code] end wrapped_blocks = [] blocks.each do |lang, code| if (_pattern, proc = self.class.language_handlers.find { |pattern, _v| lang =~ pattern }) then wrapped_blocks << proc.call(CGI.escape_html(lang), CGI.escape_html(code)) next end encoding = @markup.encoding || 'utf-8' if defined? Pygments # Set the default lexer to 'text' to prevent #153 and #154 lang = lang || 'text' lexer = Pygments::Lexer[(lang)] || Pygments::Lexer['text'] # must set startinline to true for php to be highlighted without hl_code = lexer.highlight(code, :options => { :encoding => encoding.to_s, :startinline => true }) else # Rouge begin # if `lang` was not defined then assume plaintext lexer = Rouge::Lexer.find_fancy(lang || 'plaintext') formatter = Rouge::Formatters::HTML.new wrap_template = '
%s
'
# if `lang` is defined but cannot be found then wrap it with an error
if lexer.nil?
lexer = Rouge::Lexers::PlainText
wrap_template = '%s
'
end
formatted = formatter.format(lexer.lex(code))
hl_code = Kernel.sprintf(wrap_template, formatted)
rescue
hl_code = code
end
end
wrapped_blocks << hl_code
end
@map.each do |id, spec|
body = spec[:output] || begin
if (body = wrapped_blocks.shift.to_s).size > 0
@markup.update_cache(:code, id, body)
body
else
"#{CGI.escapeHTML(spec[:code])}
"
end
end
# Removes paragraph tags surrounding blocks, see issue https://github.com/gollum/gollum-lib/issues/97 data.gsub!(/(#{id}<\/p>|#{id})/) { body } end data end private # Remove the leading space from a code block. Leading space # is only removed if every single line in the block has leading # whitespace. # # code - The code block to remove spaces from # regex - A regex to match whitespace def remove_leading_space(code, regex) if code.lines.all? { |line| line =~ /\A\r?\n\Z/ || line =~ regex } code.gsub!(regex) { '' } end end def cache_codeblock(language, code, indent = "") language = language.to_s.empty? ? nil : language id = "#{open_pattern}#{Digest::SHA1.hexdigest("#{language}.#{code}")}#{close_pattern}" cached = @markup.check_cache(:code, id) @map[id] = cached ? { :output => cached } : { :lang => language, :code => code, :indent => indent } id end end