# This file defines the String#to_xhtml and String#to_inline_xhtml # methods, which are invoked to transform plain text into XHTML. # # This particular implementation features the Markdown # formatting system via Maruku, syntax coloring via CodeRay, # and smart source code sizing (block versus inline display). require 'cgi' begin require 'rubygems' gem 'maruku', '~> 0.5' gem 'coderay', '>= 0.7' rescue LoadError end require 'coderay' require 'maruku' class String # The content of these XHTML tags will be preserved while # they are being processed by Textile. By doing this, we # avoid unwanted Textile transformations, such as quotation # marks becoming curly ( ), in source code. PROTECTED_TAGS = %w[tt code pre] # The content of these XHTML tags will be preserved # *verbatim* throughout the text-to-XHTML conversion process. VERBATIM_TAGS = %w[noformat] # Transforms this string into an *inline* XHTML string (one that # does not contain any block-level XHTML elements at the root). def to_inline_xhtml to_xhtml true end # Transforms this string into XHTML while ensuring that the # result contains one or more block-level elements at the root. # # inline:: If true, the resulting XHTML will *not* # contain a block-level element at the root. # def to_xhtml inline = false with_protected_tags(self, VERBATIM_TAGS, true) do |text| html = with_protected_tags(text, PROTECTED_TAGS, false) do |s| s.thru_maruku inline end # Markdown's "code spans" should really be "pre spans" while html.gsub! %r{(

)(.*?)(

)}m, '\1\2\3' end # allow user to type

 blocks on single lines
      # without affecting the display of their content
      html.gsub! %r{()[ \t]*\r?\n|\r?\n[ \t]*()}, '\1\2'

      # ensure tables have a border: this *greatly* improves
      # readability in text-based web browsers like w3m and lynx
      html.gsub! %r/(.*)\Z}, '\1' if inline
    html
  end

  # Adds syntax coloring to  elements in the given text.  If the
  #  tag has an attribute lang="...", then that is considered the
  # programming language for which appropriate syntax coloring should be
  # applied.  Otherwise, the programming language is assumed to be ruby.
  def thru_coderay #:nodoc:
    gsub %r{<(code)(.*?)>(.*?)}m do
      atts, code = $2, CGI.unescapeHTML($3).sub(/\A\r?\n/, '')
      lang = atts[/\blang=('|")(.*?)\1/i, 2] || :ruby

      html = CodeRay.scan(code, lang).html(:css => :style)
      tag = if code =~ /\n/ then :pre else :code end

      %{<#{tag} class="code"#{atts}>#{html}}
    end
  end

  private

  # Protects the given tags in the given input, passes
  # that protected input to the given block, restores the
  # given tags in the result of the block and returns it.
  #
  # verbatim:: If true, the content of the elments having the
  #            given tags will not be temporarily altered so
  #            that process nested elements can be processed.
  #
  def with_protected_tags input, tags, verbatim #:nodoc: :yields: input
    raise ArgumentError unless block_given?

    input = input.dup
    escapes = {}

    # protect the given tags by escaping them
    tags.each do |tag|
      input.gsub! %r{(<#{tag}.*?>)(.*?)()}m do
        head, body, tail = $1, $2, $3

        # XXX: when we restore protected tags later on, String.gsub! is
        #      removing all single backslashes for some reason... so we
        #      protect against this by doubling all single backslashes first
        body.gsub! %r/\\/, '\&\&'

        original =
          if verbatim
            body
          else
            head << CGI.escapeHTML(CGI.unescapeHTML(body)) << tail
          end

        escaped = ERBook::Document.digest(original)
        escapes[escaped] = original

        escaped
      end
    end

    # invoke the given block with the protected input
    output = yield input

    # restore the protected tags by unescaping them
    until escapes.empty?
      escapes.each_pair do |esc, orig|
        if output.gsub! esc, orig
          escapes.delete esc
        end
      end
    end

    output
  end
end