# encoding: utf-8 module Nanoc::Filters class ColorizeSyntax < Nanoc::Filter requires 'nokogiri', 'stringio', 'open3' # The default colorizer to use for a language if the colorizer for that # language is not overridden. DEFAULT_COLORIZER = :coderay # Syntax-highlights code blocks in the given content. Code blocks should # be enclosed in `pre` elements that contain a `code` element. The code # element should have an indication of the language the code is in. There # are two possible ways of adding such an indication: # # 1. A HTML class starting with `language-` and followed by the # code language, as specified by HTML5. For example, ``. # # 2. A comment on the very first line of the code block in the format # `#!language` where `language` is the language the code is in. For # example, `#!ruby`. # # Options for individual colorizers will be taken from the {#run} # options’ value for the given colorizer. For example, if the filter is # invoked with a `:coderay => coderay_options_hash` option, the # `coderay_options_hash` hash will be passed to the CodeRay colorizer. # # Currently, the following colorizers are supported: # # * `:coderay` for [Coderay](http://coderay.rubychan.de/) # * `:pygmentize` for [pygmentize](http://pygments.org/docs/cmdline/), the # commandline frontend for [Pygments](http://pygments.org/) # * `:pygmentsrb` for [pygments.rb](https://github.com/tmm1/pygments.rb), # a Ruby interface for [Pygments](http://pygments.org/) # * `:simon_highlight` for [Highlight](http://www.andre-simon.de/doku/highlight/en/highlight.html) # * `:rouge` for [Rouge](https://github.com/jayferd/rouge/) # # Additional colorizer implementations are welcome! # # @example Using a class to indicate type of code be highlighted # #

    #     def foo
    #       "asdf"
    #     end
    #     
# # @example Using a comment to indicate type of code be highlighted # #

    #     #!ruby
    #     def foo
    #       "asdf"
    #     end
    #     
# # @example Invoking the filter with custom parameters # # filter :colorize_syntax, # :colorizers => { :ruby => :coderay }, # :coderay => { :line_numbers => :list } # # @param [String] content The content to filter # # @option params [Symbol] :default_colorizer (DEFAULT_COLORIZER) The # default colorizer, i.e. the colorizer that will be used when the # colorizer is not overriden for a specific language. # # @option params [Symbol] :syntax (:html) The syntax to use, which can be # `:html`, `:xml` or `:xhtml`, the latter two being the same. # # @option params [Hash] :colorizers ({}) A hash containing # a mapping of programming languages (symbols, not strings) onto # colorizers (symbols). # # @option params [Boolean] :outside_pre (false) `true` if the colorizer # should be applied on `code` elements outside `pre` elements, false # if only `code` elements inside` pre` elements should be colorized. # # @option params [Symbol] :is_fullpage (false) Whether to treat the input # as a full HTML page or a page fragment. When true, HTML boilerplate # such as the doctype, `html`, `head` and `body` elements will be added. # # @return [String] The filtered content def run(content, params = {}) Nanoc::Extra::JRubyNokogiriWarner.check_and_warn # Take colorizers from parameters @colorizers = Hash.new(params[:default_colorizer] || DEFAULT_COLORIZER) (params[:colorizers] || {}).each_pair do |language, colorizer| @colorizers[language] = colorizer end # Determine syntax (HTML or XML) syntax = params[:syntax] || :html case syntax when :html klass = Nokogiri::HTML when :xml, :xhtml klass = Nokogiri::XML else raise "unknown syntax: #{syntax.inspect} (expected :html or :xml)" end # Colorize doc = parse(content, klass, params.fetch(:is_fullpage, false)) selector = params[:outside_pre] ? 'code' : 'pre > code' doc.css(selector).each do |element| # Get language has_class = false language = nil if element['class'] # Get language from class match = element['class'].match(/(^| )language-([^ ]+)/) language = match[2] if match has_class = true if language else # Get language from comment line match = element.inner_text.strip.split[0].match(/^#!([^\/][^\n]*)$/) language = match[1] if match element.content = element.content.sub(/^#!([^\/][^\n]*)$\n/, '') if language end # Give up if there is no hope left next if language.nil? # Highlight raw = strip(element.inner_text) highlighted_code = highlight(raw, language, params) element.children = Nokogiri::HTML.fragment(strip(highlighted_code), 'utf-8') # Add language-something class unless has_class klass = element['class'] || '' klass << ' ' unless [' ', nil].include?(klass[-1, 1]) klass << "language-#{language}" element['class'] = klass end highlight_postprocess(language, element.parent) end method = "to_#{syntax}".to_sym doc.send(method, encoding: 'UTF-8') end # Parses the given content using the given class. This method also handles # an issue with Nokogiri on JRuby causing “cannot modify frozen string” # errors. # # @param [String] content The content to parse # # @param [Class] klass The Nokogiri parser class (either Nokogiri::HTML # or Nokogiri::XML) # # @param [Boolean] is_fullpage true if the given content is a full page, # false if it is a fragment # # @api private def parse(content, klass, is_fullpage) if is_fullpage klass.parse(content, nil, 'UTF-8') else klass.fragment(content) end rescue => e if e.message =~ /can't modify frozen string/ parse(content.dup, klass, is_fullpage) else raise e end end # Runs the code through [CodeRay](http://coderay.rubychan.de/). # # @api private # # @param [String] code The code to colorize # # @param [String] language The language the code is written in # # @param [Hash] params Parameters to pass on to CodeRay # # @return [String] The colorized output def coderay(code, language, params = {}) require 'coderay' ::CodeRay.scan(code, language).html(params) end # Returns the input itself, not performing any code highlighting. # # @param [String] code The code to colorize # # @param [String] language The language the code is written in (unused) # # @return [String] The colorized output, which is identical to the input # in this case def dummy(code, language, params = {}) # rubocop:disable Lint/UnusedMethodArgument code end # Runs the content through [pygmentize](http://pygments.org/docs/cmdline/), # the commandline frontend for [Pygments](http://pygments.org/). # # @api private # # @param [String] code The code to colorize # # @param [String] language The language the code is written in # # @option params [String, Symbol] :encoding The encoding of the code block # # @return [String] The colorized output def pygmentize(code, language, params = {}) check_availability('pygmentize', '-V') params[:encoding] ||= 'utf-8' params[:nowrap] ||= 'True' cmd = ['pygmentize', '-l', language, '-f', 'html'] cmd << '-O' << params.map { |k, v| "#{k}=#{v}" }.join(',') unless params.empty? stdout = StringIO.new stderr = $stderr piper = Nanoc::Extra::Piper.new(stdout: stdout, stderr: stderr) piper.run(cmd, code) stdout.string end # Runs the content through [Pygments](http://pygments.org/) via # [pygments.rb](https://github.com/tmm1/pygments.rb). # # @api private # # @param [String] code The code to colorize # # @param [String] language The language the code is written in # # @return [String] The colorized output def pygmentsrb(code, language, params = {}) require 'pygments' args = params.dup args[:lexer] ||= language args[:options] ||= {} args[:options][:encoding] ||= 'utf-8' args[:options][:nowrap] ||= 'True' Pygments.highlight(code, args) end SIMON_HIGHLIGHT_OPT_MAP = { wrap: '-W', include_style: '-I', line_numbers: '-l', } # Runs the content through [Highlight](http://www.andre-simon.de/doku/highlight/en/highlight.html). # # @api private # # @since 3.2.0 # # @param [String] code The code to colorize # # @param [String] language The language the code is written in # # @option params [String] :style The style to use # # @return [String] The colorized output def simon_highlight(code, language, params = {}) check_availability('highlight', '--version') cmd = ['highlight', '--syntax', language, '--fragment'] params.each do |key, _value| if SIMON_HIGHLIGHT_OPT_MAP[key] cmd << SIMON_HIGHLIGHT_OPT_MAP[key] else # TODO: allow passing other options case key when :style cmd << '--style' << params[:style] end end end stdout = StringIO.new stderr = $stderr piper = Nanoc::Extra::Piper.new(stdout: stdout, stderr: stderr) piper.run(cmd, code) stdout.string end # Wraps the element in
def coderay_postprocess(_language, element) # Skip if we're a free return if element.parent.nil? #
div_inner = Nokogiri::XML::Node.new('div', element.document) div_inner['class'] = 'code' div_inner.children = element.dup #
div_outer = Nokogiri::XML::Node.new('div', element.document) div_outer['class'] = 'CodeRay' div_outer.children = div_inner # orig element element.swap div_outer end # Runs the content through [Rouge](https://github.com/jayferd/rouge/. # # @api private # # @param [String] code The code to colorize # # @param [String] language The language the code is written in # # @return [String] The colorized output def rouge(code, language, params = {}) require 'rouge' formatter_options = { css_class: params.fetch(:css_class, 'highlight'), } formatter = Rouge::Formatters::HTML.new(formatter_options) lexer = Rouge::Lexer.find_fancy(language, code) || Rouge::Lexers::PlainText formatter.format(lexer.lex(code)) end # Removes the double wrapping. # # Before: # #

    #
    # After:
    #
    #   

    def rouge_postprocess(_language, element)
      return if element.name != 'pre'

      code1 = element.xpath('code').first
      return if code1.nil?

      pre = code1.xpath('pre').first
      return if pre.nil?

      code2 = pre.xpath('code').first
      return if code2.nil?

      code1.inner_html = code2.inner_html
      code1['class'] = [code1['class'], pre['class']].compact.join(' ')
    end

    protected

    KNOWN_COLORIZERS = [:coderay, :dummy, :pygmentize, :pygmentsrb, :simon_highlight, :rouge]

    # Removes the first blank lines and any whitespace at the end.
    def strip(s)
      s.lines.drop_while { |line| line.strip.empty? }.join.rstrip
    end

    def highlight(code, language, params = {})
      colorizer = @colorizers[language.to_sym]
      if KNOWN_COLORIZERS.include?(colorizer)
        send(colorizer, code, language, params[colorizer] || {})
      else
        raise "I don’t know how to highlight code using the “#{colorizer}” colorizer"
      end
    end

    def highlight_postprocess(language, element)
      colorizer = @colorizers[language.to_sym]
      if KNOWN_COLORIZERS.include?(colorizer)
        sym = (colorizer.to_s + '_postprocess').to_sym
        if self.respond_to?(sym)
          send(sym, language, element)
        end
      else
        raise "I don’t know how to highlight code using the “#{colorizer}” colorizer"
      end
    end

    def check_availability(*cmd)
      piper = Nanoc::Extra::Piper.new(stdout: StringIO.new, stderr: StringIO.new)
      piper.run(cmd, nil)
    end
  end
end