markup.rb in gitlab-gollum-lib-4.2.7

- old
+ new

@@ -1,23 +1,38 @@
 # ~*~ encoding: utf-8 ~*~
 require 'digest/sha1'
 require 'cgi'
+require 'rouge'
 require 'base64'
 
 require File.expand_path '../helpers', __FILE__
-require File.expand_path '../remote_code', __FILE__
 
+# Use pygments if it's installed
+begin
+  require 'pygments'
+  Pygments.start
+rescue Exception
+end
+
 module Gollum
 
   class Markup
     include Helpers
 
     @formats = {}
 
     class << self
-      attr_reader :formats
 
+      # Only use the formats that are specified in config.rb
+      def formats
+        if defined? Gollum::Page::FORMAT_NAMES
+          @formats.select { |_, value| Gollum::Page::FORMAT_NAMES.values.include? value[:name] }
+        else
+          @formats
+        end
+      end
+
       # Register a file extension and associated markup type
       #
       # ext     - The file extension
       # name    - The name of the markup type
       # options - Hash of options:
@@ -25,391 +40,130 @@
       #                    Defaults to exact match of ext.
       #
       # If given a block, that block will be registered with GitHub::Markup to
       # render any matching pages
       def register(ext, name, options = {}, &block)
-        regexp = options[:regexp] || Regexp.new(ext.to_s)
-        @formats[ext] = { :name => name, :regexp => regexp }
-        GitHub::Markup.add_markup(regexp, &block) if block_given?
+        @formats[ext] = { :name => name,
+          :regexp => options.fetch(:regexp, Regexp.new(ext.to_s)),
+          :reverse_links => options.fetch(:reverse_links, false) }
       end
     end
 
     attr_accessor :toc
-    attr_reader   :metadata
+    attr_accessor :metadata
+    attr_reader :encoding
+    attr_reader :sanitize
+    attr_reader :format
+    attr_reader :wiki
+    attr_reader :page
+    attr_reader :parent_page
+    attr_reader :sub_page
+    attr_reader :name
+    attr_reader :include_levels
+    attr_reader :to_xml_opts
+    attr_reader :dir
 
     # Initialize a new Markup object.
     #
     # page - The Gollum::Page.
     #
     # Returns a new Gollum::Markup object, ready for rendering.
     def initialize(page)
-      @wiki    = page.wiki
-      @name    = page.filename
-      @data    = page.text_data
-      @version = page.version.id if page.version
-      @format  = page.format
-      @sub_page = page.sub_page
-      @parent_page = page.parent_page
-      @dir     = ::File.dirname(page.path)
-      @tagmap  = {}
-      @codemap = {}
-      @wsdmap  = {}
-      @premap  = {}
-      @toc = nil
-      @metadata = nil
-      @to_xml = { :save_with => Nokogiri::XML::Node::SaveOptions::DEFAULT_XHTML ^ 1, :indent => 0, :encoding => 'UTF-8' }
-    end
-
-    # Render the content with Gollum wiki syntax on top of the file's own
-    # markup language.
-    #
-    # no_follow - Boolean that determines if rel="nofollow" is added to all
-    #             <a> tags.
-    # encoding  - Encoding Constant or String.
-    #
-    # Returns the formatted String content.
-    def render(no_follow = false, encoding = nil)
-      sanitize = no_follow ?
-        @wiki.history_sanitizer :
-        @wiki.sanitizer
-
-      data = @data.dup
-      data = extract_metadata(data)
-      data = extract_remote_code(data)
-      data = extract_code(data)
-      data = extract_wsd(data)
-      data = extract_tags(data)
-      begin
-        data = GitHub::Markup.render(@name, data)
-        if data.nil?
-          raise "There was an error converting #{@name} to HTML."
-        end
-      rescue Object => e
-        data = %{<p class="gollum-error">#{e.message}</p>}
+      if page
+        @wiki        = page.wiki
+        @name        = page.filename
+        @data        = page.text_data
+        @version     = page.version.id if page.version
+        @format      = page.format
+        @sub_page    = page.sub_page
+        @parent_page = page.parent_page
+        @page        = page
+        @dir         = ::File.dirname(page.path)
       end
-      data = process_tags(data)
-      data = process_code(data, encoding)
-
-      doc = Nokogiri::HTML::DocumentFragment.parse(data)
-      doc = sanitize.clean_node!(doc) if sanitize
-      doc,toc = process_headers(doc)
-      @toc = @sub_page ? ( @parent_page ? @parent_page.toc_data : "[[_TOC_]]" ) : toc
-      yield doc if block_given?
-      # nokogiri's save options are ored together. FORMAT has a value of 1 so ^ 1 removes it.
-      # formatting will create extra spaces in pre tags.
-      # https://github.com/sparklemotion/nokogiri/issues/782
-      # DEFAULT_HTML encodes unicode so XHTML is used for proper unicode support in href.
-      data = doc.to_xml( @to_xml )
-
-      data = process_toc_tags(data)
-      data = process_wsd(data)
-      data.gsub!(/<p><\/p>/) do
-        ''
-      end
-
-      data
+      @metadata    = nil
+      @to_xml_opts = { :save_with => Nokogiri::XML::Node::SaveOptions::DEFAULT_XHTML ^ 1, :indent => 0, :encoding => 'UTF-8' }
     end
 
-    # Inserts header anchors and creates TOC
-    #
-    # doc - Nokogiri parsed document
-    #
-    # Returns doc Document and toc String
-    def process_headers(doc)
-      toc = nil
-      doc.css('h1,h2,h3,h4,h5,h6').each do |h|
-        # must escape "
-        h_name = h.content.gsub(' ','-').gsub('"','%22')
-
-        level = h.name.gsub(/[hH]/,'').to_i
-
-        # Add anchors
-        h.add_child(%Q{<a class="anchor" id="#{h_name}" href="##{h_name}"></a>})
-
-        # Build TOC
-        toc ||= Nokogiri::XML::DocumentFragment.parse('<div class="toc"><div class="toc-title">Table of Contents</div></div>')
-        tail ||= toc.child
-        tail_level ||= 0
-
-        while tail_level < level
-          node = Nokogiri::XML::Node.new('ul', doc)
-          tail = tail.add_child(node)
-          tail_level += 1
-        end
-        while tail_level > level
-          tail = tail.parent
-          tail_level -= 1
-        end
-        node = Nokogiri::XML::Node.new('li', doc)
-        # % -> %25 so anchors work on Firefox. See issue #475
-        node.add_child(%Q{<a href="##{h_name}">#{h.content}</a>})
-        tail.add_child(node)
-      end
-      toc = toc.to_xml(@to_xml) if toc != nil
-      [doc, toc]
+    def reverse_links?
+      self.class.formats[@format][:reverse_links]
     end
 
-    #########################################################################
+    # Render data using default chain in the target format.
     #
-    # Tags
+    # data - the data to render
+    # format - format to use as a symbol
+    # name - name using the extension of the format
     #
-    #########################################################################
+    # Returns the processed data
+    def render_default(data, format=:markdown, name='render_default.md')
+      # set instance vars so we're able to render data without a wiki or page.
+      @format = format
+      @name   = name
 
-    # Extract all tags into the tagmap and replace with placeholders.
-    #
-    # data - The raw String data.
-    #
-    # Returns the placeholder'd String data.
-    def extract_tags(data)
-      if @format == :asciidoc
-        return data
-      end
-      data.gsub!(/(.?)\[\[(.+?)\]\]([^\[]?)/m) do
-        if $1 == "'" && $3 != "'"
-          "[[#{$2}]]#{$3}"
-        elsif $2.include?('][')
-          if $2[0..4] == 'file:'
-            pre = $1
-            post = $3
-            parts = $2.split('][')
-            parts[0][0..4] = ""
-            link = "#{parts[1]}|#{parts[0].sub(/\.org/,'')}"
-            id = Digest::SHA1.hexdigest(link)
-            @tagmap[id] = link
-            "#{pre}#{id}#{post}"
-          else
-            $&
-          end
-        else
-          id = Digest::SHA1.hexdigest($2)
-          @tagmap[id] = $2
-          "#{$1}#{id}#{$3}"
-        end
-      end
-      data
-    end
+      chain = [:Metadata, :PlainText, :Emoji, :TOC, :RemoteCode, :Code, :Sanitize, :WSD, :Tags, :Render]
 
-    # Process all tags from the tagmap and replace the placeholders with the
-    # final markup.
-    #
-    # data      - The String data (with placeholders).
-    #
-    # Returns the marked up String data.
-    def process_tags(data)
-      @tagmap.each do |id, tag|
-        # If it's preformatted, just put the tag back
-        if is_preformatted?(data, id)
-          data.gsub!(id) do
-            "[[#{tag}]]"
-          end
-        else
-          data.gsub!(id) do
-            process_tag(tag).gsub('%2F', '/')
-          end
-        end
+      filter_chain = chain.map do |r|
+        Gollum::Filter.const_get(r).new(self)
       end
-      data
-    end
 
-    # Find `id` within `data` and determine if it's within
-    # preformatted tags.
-    #
-    # data      - The String data (with placeholders).
-    # id        - The String SHA1 hash.
-    PREFORMATTED_TAGS = %w(code tt)
-    def is_preformatted?(data, id)
-      doc = Nokogiri::HTML::DocumentFragment.parse(data)
-      node = doc.search("[text()*='#{id}']").first
-      node && (PREFORMATTED_TAGS.include?(node.name) ||
-        node.ancestors.any? { |a| PREFORMATTED_TAGS.include?(a.name) })
+      process_chain data, filter_chain
     end
 
-    # Process a single tag into its final HTML form.
+    # Process the filter chain
     #
-    # tag       - The String tag contents (the stuff inside the double
-    #             brackets).
+    # data - the data to send through the chain
+    # filter_chain - the chain to process
     #
-    # Returns the String HTML version of the tag.
-    def process_tag(tag)
-      if tag =~ /^_TOC_$/
-        %{[[#{tag}]]}
-      elsif tag =~ /^_$/
-        %{<div class="clearfloats"></div>}
-      elsif html = process_image_tag(tag)
-        html
-      elsif html = process_file_link_tag(tag)
-        html
-      else
-        process_page_link_tag(tag)
+    # Returns the formatted data
+    def process_chain(data, filter_chain)
+      # First we extract the data through the chain...
+      filter_chain.each do |filter|
+        data = filter.extract(data)
       end
-    end
 
-    # Attempt to process the tag as an image tag.
-    #
-    # tag - The String tag contents (the stuff inside the double brackets).
-    #
-    # Returns the String HTML if the tag is a valid image tag or nil
-    #   if it is not.
-    def process_image_tag(tag)
-      parts = tag.split('|')
-      return if parts.size.zero?
-
-      name  = parts[0].strip
-      path  = if file = find_file(name)
-        ::File.join @wiki.base_path, file.path
-      elsif name =~ /^https?:\/\/.+(jpg|png|gif|svg|bmp)$/i
-        name
+      # Then we process the data through the chain *backwards*
+      filter_chain.reverse.each do |filter|
+        data = filter.process(data)
       end
 
-      if path
-        opts = parse_image_tag_options(tag)
-
-        containered = false
-
-        classes = [] # applied to whatever the outermost container is
-        attrs   = [] # applied to the image
-
-        align = opts['align']
-        if opts['float']
-          containered = true
-          align ||= 'left'
-          if %w{left right}.include?(align)
-            classes << "float-#{align}"
-          end
-        elsif %w{top texttop middle absmiddle bottom absbottom baseline}.include?(align)
-          attrs << %{align="#{align}"}
-        elsif align
-          if %w{left center right}.include?(align)
-            containered = true
-            classes << "align-#{align}"
-          end
-        end
-
-        if width = opts['width']
-          if width =~ /^\d+(\.\d+)?(em|px)$/
-            attrs << %{width="#{width}"}
-          end
-        end
-
-        if height = opts['height']
-          if height =~ /^\d+(\.\d+)?(em|px)$/
-            attrs << %{height="#{height}"}
-          end
-        end
-
-        if alt = opts['alt']
-          attrs << %{alt="#{alt}"}
-        end
-
-        attr_string = attrs.size > 0 ? attrs.join(' ') + ' ' : ''
-
-        if opts['frame'] || containered
-          classes << 'frame' if opts['frame']
-          %{<span class="#{classes.join(' ')}">} +
-          %{<span>} +
-          %{<img src="#{path}" #{attr_string}/>} +
-          (alt ? %{<span>#{alt}</span>} : '') +
-          %{</span>} +
-          %{</span>}
-        else
-          %{<img src="#{path}" #{attr_string}/>}
-        end
+      # Finally, a little bit of cleanup, just because
+      data.gsub!(/<p><\/p>/) do
+        ''
       end
-    end
 
-    # Parse any options present on the image tag and extract them into a
-    # Hash of option names and values.
-    #
-    # tag - The String tag contents (the stuff inside the double brackets).
-    #
-    # Returns the options Hash:
-    #   key - The String option name.
-    #   val - The String option value or true if it is a binary option.
-    def parse_image_tag_options(tag)
-      tag.split('|')[1..-1].inject({}) do |memo, attr|
-        parts = attr.split('=').map { |x| x.strip }
-        memo[parts[0]] = (parts.size == 1 ? true : parts[1])
-        memo
-      end
+      data
     end
 
-    # Attempt to process the tag as a file link tag.
+    # Render the content with Gollum wiki syntax on top of the file's own
+    # markup language.
     #
-    # tag       - The String tag contents (the stuff inside the double
-    #             brackets).
+    # no_follow - Boolean that determines if rel="nofollow" is added to all
+    #             <a> tags.
+    # encoding  - Encoding Constant or String.
     #
-    # Returns the String HTML if the tag is a valid file link tag or nil
-    #   if it is not.
-    def process_file_link_tag(tag)
-      parts = tag.split('|')
-      return if parts.size.zero?
+    # Returns the formatted String content.
+    def render(no_follow = false, encoding = nil, include_levels = 10)
+      @sanitize = no_follow ?
+          @wiki.history_sanitizer :
+          @wiki.sanitizer
 
-      name  = parts[0].strip
-      path  = parts[1] && parts[1].strip
-      path  = if path && file = find_file(path)
-        ::File.join @wiki.base_path, file.path
-      elsif path =~ %r{^https?://}
-        path
-      else
-        nil
-      end
+      @encoding       = encoding
+      @include_levels = include_levels
 
-      if name && path && file
-        %{<a href="#{::File.join @wiki.base_path, file.path}">#{name}</a>}
-      elsif name && path
-        %{<a href="#{path}">#{name}</a>}
-      else
-        nil
+      data         = @data.dup
+      filter_chain = @wiki.filter_chain.map do |r|
+        Gollum::Filter.const_get(r).new(self)
       end
-    end
 
-    # Attempt to process the tag as a page link tag.
-    #
-    # tag       - The String tag contents (the stuff inside the double
-    #             brackets).
-    #
-    # Returns the String HTML if the tag is a valid page link tag or nil
-    #   if it is not.
-    def process_page_link_tag(tag)
-      parts = tag.split('|')
-      parts.reverse! if @format == :mediawiki
-
-      name, page_name = *parts.compact.map(&:strip)
-      cname = @wiki.page_class.cname(page_name || name)
-
-      if name =~ %r{^https?://} && page_name.nil?
-        %{<a href="#{name}">#{name}</a>}
-      else
-        presence    = "absent"
-        link_name   = cname
-        page, extra = find_page_from_name(cname)
-        if page
-          link_name = @wiki.page_class.cname(page.name)
-          presence  = "present"
-        end
-        link = ::File.join(@wiki.base_path, page ? page.escaped_url_path : CGI.escape(link_name))
-
-        # //page is invalid
-        # strip all duplicate forward slashes using helpers.rb trim_leading_slash
-        # //page => /page
-        link = trim_leading_slash link
-
-        %{<a class="internal #{presence}" href="#{link}#{extra}">#{name}</a>}
+      # Since the last 'extract' action in our chain *should* be the markup
+      # to HTML converter, we now have HTML which we can parse and yield, for
+      # anyone who wants it
+      if block_given?
+        yield Nokogiri::HTML::DocumentFragment.parse(data)
       end
-    end
 
-
-    # Process the special table of contents tag [[_TOC_]]
-    #
-    # data      - The String data (with placeholders).
-    #
-    # Returns the marked up String data.
-    def process_toc_tags(data)
-      data.gsub!("[[_TOC_]]") do
-        @toc.nil? ? '' : @toc
-      end
-      data
+      process_chain data, filter_chain
     end
 
     # Find the given file in the repo.
     #
     # name - The String absolute or relative path of the file.
@@ -420,240 +174,9 @@
         @wiki.file(name[1..-1], version)
       else
         path = @dir == '.' ? name : ::File.join(@dir, name)
         @wiki.file(path, version)
       end
-    end
-
-    # Find a page from a given cname.  If the page has an anchor (#) and has
-    # no match, strip the anchor and try again.
-    #
-    # cname - The String canonical page name including path.
-    #
-    # Returns a Gollum::Page instance if a page is found, or an Array of
-    # [Gollum::Page, String extra] if a page without the extra anchor data
-    # is found.
-    def find_page_from_name(cname)
-      slash = cname.rindex('/')
-
-      unless slash.nil?
-        name = cname[slash+1..-1]
-        path = cname[0..slash]
-        page = @wiki.paged(name, path)
-      else
-        page = @wiki.paged(cname, '/') || @wiki.page(cname)
-      end
-
-      if page
-        return page
-      end
-      if pos = cname.index('#')
-        [@wiki.page(cname[0...pos]), cname[pos..-1]]
-      end
-    end
-
-    #########################################################################
-    #
-    # Remote code - fetch code from url and replace the contents to a
-    #               code-block that gets run the next parse.
-    #           Acceptable formats:
-    #              ```language:local-file.ext```
-    #              ```language:/abs/other-file.ext```
-    #              ```language:https://example.com/somefile.txt```
-    #
-    #########################################################################
-
-    def extract_remote_code data
-      data.gsub /^[ \t]*``` ?([^:\n\r]+):((http)?[^`\n\r]+)```/ do
-        language = $1
-        uri = $2
-        protocol = $3
-
-        # Detect local file
-        if protocol.nil?
-          if file = self.find_file(uri, @wiki.ref)
-            contents = file.raw_data
-          else
-            # How do we communicate a render error?
-            next "File not found: #{CGI::escapeHTML(uri)}"
-          end
-        else
-          contents = Gollum::RemoteCode.new(uri).contents
-        end
-
-        "```#{language}\n#{contents}\n```\n"
-      end
-    end
-
-    #########################################################################
-    #
-    # Code
-    #
-    #########################################################################
-
-    # Extract all code blocks into the codemap and replace with placeholders.
-    #
-    # data - The raw String data.
-    #
-    # Returns the placeholder'd String data.
-    def extract_code(data)
-      data.gsub!(/^([ \t]*)(~~~+) ?([^\r\n]+)?\r?\n(.+?)\r?\n\1(~~~+)[ \t\r]*$/m) do
-        m_indent = $1
-        m_start  = $2 # ~~~
-        m_lang   = $3
-        m_code   = $4
-        m_end    = $5 # ~~~
-
-        # start and finish tilde fence must be the same length
-        return '' if m_start.length != m_end.length
-
-        lang   = m_lang ? m_lang.strip : nil
-        id     = Digest::SHA1.hexdigest("#{lang}.#{m_code}")
-        cached = check_cache(:code, id)
-
-        # extract lang from { .ruby } or { #stuff .ruby .indent }
-        # see http://johnmacfarlane.net/pandoc/README.html#delimited-code-blocks
-
-        if lang
-            lang = lang.match(/\.([^}\s]+)/)
-            lang = lang[1] unless lang.nil?
-        end
-
-        @codemap[id] = cached   ?
-          { :output => cached } :
-          { :lang => lang, :code => m_code, :indent => m_indent }
-
-        "#{m_indent}#{id}" # print the SHA1 ID with the proper indentation
-      end
-
-      data.gsub!(/^([ \t]*)``` ?([^\r\n]+)?\r?\n(.+?)\r?\n\1```[ \t]*\r?$/m) do
-        lang   = $2 ? $2.strip : nil
-        id     = Digest::SHA1.hexdigest("#{lang}.#{$3}")
-        cached = check_cache(:code, id)
-        @codemap[id] = cached   ?
-          { :output => cached } :
-          { :lang => lang, :code => $3, :indent => $1 }
-        "#{$1}#{id}" # print the SHA1 ID with the proper indentation
-      end
-      data
-    end
-
-    # Remove the leading space from a code block. Leading space
-    # is only removed if every single line in the block has leading
-    # whitespace.
-    #
-    # code      - The code block to remove spaces from
-    # regex     - A regex to match whitespace
-    def remove_leading_space(code, regex)
-      if code.lines.all? { |line| line =~ /\A\r?\n\Z/ || line =~ regex }
-        code.gsub!(regex) do
-          ''
-        end
-      end
-    end
-
-    # Process all code from the codemap and replace the placeholders with the
-    # final HTML.
-    #
-    # data     - The String data (with placeholders).
-    # encoding - Encoding Constant or String.
-    #
-    # Returns the marked up String data.
-    def process_code(data, encoding = nil)
-      return data if data.nil? || data.size.zero? || @codemap.size.zero?
-
-      blocks    = []
-      @codemap.each do |id, spec|
-        next if spec[:output] # cached
-
-        code = spec[:code]
-
-        remove_leading_space(code, /^#{spec[:indent]}/m)
-        remove_leading_space(code, /^(  |\t)/m)
-
-        blocks << [spec[:lang], code]
-      end
-
-      highlighted = []
-      blocks.each do |lang, code|
-        encoding ||= 'utf-8'
-        hl_code = code
-        highlighted << hl_code
-      end
-
-      @codemap.each do |id, spec|
-        body = spec[:output] || begin
-          if (body = highlighted.shift.to_s).size > 0
-            update_cache(:code, id, body)
-            body
-          else
-            "<pre><code>#{CGI.escapeHTML(spec[:code])}</code></pre>"
-          end
-        end
-        data.gsub!(id) do
-          body
-        end
-      end
-
-      data
-    end
-
-    #########################################################################
-    #
-    # Sequence Diagrams
-    #
-    #########################################################################
-
-    # Extract all sequence diagram blocks into the wsdmap and replace with
-    # placeholders.
-    #
-    # data - The raw String data.
-    #
-    # Returns the placeholder'd String data.
-    def extract_wsd(data)
-      data.gsub(/^\{\{\{\{\{\{ ?(.+?)\r?\n(.+?)\r?\n\}\}\}\}\}\}\r?$/m) do
-        id = Digest::SHA1.hexdigest($2)
-        @wsdmap[id] = { :style => $1, :code => $2 }
-        id
-      end
-    end
-
-    # Process all diagrams from the wsdmap and replace the placeholders with
-    # the final HTML.
-    #
-    # data - The String data (with placeholders).
-    #
-    # Returns the marked up String data.
-    def process_wsd(data)
-      @wsdmap.each do |id, spec|
-        style = spec[:style]
-        code = spec[:code]
-        data.gsub!(id) do
-          Gollum::WebSequenceDiagram.new(code, style).to_tag
-        end
-      end
-      data
-    end
-
-    #########################################################################
-    #
-    # Metadata
-    #
-    #########################################################################
-
-    # Extract metadata for data and build metadata table. Metadata
-    # is content found between markers, and must
-    # be a valid YAML mapping.
-    #
-    # Because ri and ruby 1.8.7 are awesome, the markers can't
-    # be included in this documentation without triggering
-    # `Unhandled special: Special: type=17`
-    # Please read the source code for the exact markers
-    #
-    # Returns the String of formatted data with metadata removed.
-    def extract_metadata(data)
-      @metadata = {}
-      data
     end
 
     # Hook for getting the formatted value of extracted tag data.
     #
     # type - Symbol value identifying what type of data is being extracted.