# ~*~ encoding: utf-8 ~*~
require 'digest/sha1'
require 'cgi'
require 'base64'
require File.expand_path '../helpers', __FILE__
require File.expand_path '../remote_code', __FILE__
module Gollum
class Markup
include Helpers
@formats = {}
class << self
attr_reader :formats
# Register a file extension and associated markup type
#
# ext - The file extension
# name - The name of the markup type
# options - Hash of options:
# regexp - Regexp to match against.
# Defaults to exact match of ext.
#
# If given a block, that block will be registered with GitHub::Markup to
# render any matching pages
def register(ext, name, options = {}, &block)
regexp = options[:regexp] || Regexp.new(ext.to_s)
@formats[ext] = { :name => name, :regexp => regexp }
GitHub::Markup.add_markup(regexp, &block) if block_given?
end
end
attr_accessor :toc
attr_reader :metadata
# Initialize a new Markup object.
#
# page - The Gollum::Page.
#
# Returns a new Gollum::Markup object, ready for rendering.
def initialize(page)
@wiki = page.wiki
@name = page.filename
@data = page.text_data
@version = page.version.id if page.version
@format = page.format
@sub_page = page.sub_page
@parent_page = page.parent_page
@dir = ::File.dirname(page.path)
@tagmap = {}
@codemap = {}
@wsdmap = {}
@premap = {}
@toc = nil
@metadata = nil
@to_xml = { :save_with => Nokogiri::XML::Node::SaveOptions::DEFAULT_XHTML ^ 1, :indent => 0, :encoding => 'UTF-8' }
end
# Render the content with Gollum wiki syntax on top of the file's own
# markup language.
#
# no_follow - Boolean that determines if rel="nofollow" is added to all
# tags.
# encoding - Encoding Constant or String.
#
# Returns the formatted String content.
def render(no_follow = false, encoding = nil)
sanitize = no_follow ?
@wiki.history_sanitizer :
@wiki.sanitizer
data = @data.dup
data = extract_metadata(data)
data = extract_remote_code(data)
data = extract_code(data)
data = extract_wsd(data)
data = extract_tags(data)
begin
data = GitHub::Markup.render(@name, data)
if data.nil?
raise "There was an error converting #{@name} to HTML."
end
rescue Object => e
data = %{#{e.message}
}
end
data = process_tags(data)
data = process_code(data, encoding)
doc = Nokogiri::HTML::DocumentFragment.parse(data)
doc = sanitize.clean_node!(doc) if sanitize
doc,toc = process_headers(doc)
@toc = @sub_page ? ( @parent_page ? @parent_page.toc_data : "[[_TOC_]]" ) : toc
yield doc if block_given?
# nokogiri's save options are ored together. FORMAT has a value of 1 so ^ 1 removes it.
# formatting will create extra spaces in pre tags.
# https://github.com/sparklemotion/nokogiri/issues/782
# DEFAULT_HTML encodes unicode so XHTML is used for proper unicode support in href.
data = doc.to_xml( @to_xml )
data = process_toc_tags(data)
data = process_wsd(data)
data.gsub!(/<\/p>/) do
''
end
data
end
# Inserts header anchors and creates TOC
#
# doc - Nokogiri parsed document
#
# Returns doc Document and toc String
def process_headers(doc)
toc = nil
doc.css('h1,h2,h3,h4,h5,h6').each do |h|
# must escape "
h_name = h.content.gsub(' ','-').gsub('"','%22')
level = h.name.gsub(/[hH]/,'').to_i
# Add anchors
h.add_child(%Q{})
# Build TOC
toc ||= Nokogiri::XML::DocumentFragment.parse('
')
tail ||= toc.child
tail_level ||= 0
while tail_level < level
node = Nokogiri::XML::Node.new('ul', doc)
tail = tail.add_child(node)
tail_level += 1
end
while tail_level > level
tail = tail.parent
tail_level -= 1
end
node = Nokogiri::XML::Node.new('li', doc)
# % -> %25 so anchors work on Firefox. See issue #475
node.add_child(%Q{#{h.content}})
tail.add_child(node)
end
toc = toc.to_xml(@to_xml) if toc != nil
[doc, toc]
end
#########################################################################
#
# Tags
#
#########################################################################
# Extract all tags into the tagmap and replace with placeholders.
#
# data - The raw String data.
#
# Returns the placeholder'd String data.
def extract_tags(data)
if @format == :asciidoc
return data
end
data.gsub!(/(.?)\[\[(.+?)\]\]([^\[]?)/m) do
if $1 == "'" && $3 != "'"
"[[#{$2}]]#{$3}"
elsif $2.include?('][')
if $2[0..4] == 'file:'
pre = $1
post = $3
parts = $2.split('][')
parts[0][0..4] = ""
link = "#{parts[1]}|#{parts[0].sub(/\.org/,'')}"
id = Digest::SHA1.hexdigest(link)
@tagmap[id] = link
"#{pre}#{id}#{post}"
else
$&
end
else
id = Digest::SHA1.hexdigest($2)
@tagmap[id] = $2
"#{$1}#{id}#{$3}"
end
end
data
end
# Process all tags from the tagmap and replace the placeholders with the
# final markup.
#
# data - The String data (with placeholders).
#
# Returns the marked up String data.
def process_tags(data)
@tagmap.each do |id, tag|
# If it's preformatted, just put the tag back
if is_preformatted?(data, id)
data.gsub!(id) do
"[[#{tag}]]"
end
else
data.gsub!(id) do
process_tag(tag).gsub('%2F', '/')
end
end
end
data
end
# Find `id` within `data` and determine if it's within
# preformatted tags.
#
# data - The String data (with placeholders).
# id - The String SHA1 hash.
PREFORMATTED_TAGS = %w(code tt)
def is_preformatted?(data, id)
doc = Nokogiri::HTML::DocumentFragment.parse(data)
node = doc.search("[text()*='#{id}']").first
node && (PREFORMATTED_TAGS.include?(node.name) ||
node.ancestors.any? { |a| PREFORMATTED_TAGS.include?(a.name) })
end
# Process a single tag into its final HTML form.
#
# tag - The String tag contents (the stuff inside the double
# brackets).
#
# Returns the String HTML version of the tag.
def process_tag(tag)
if tag =~ /^_TOC_$/
%{[[#{tag}]]}
elsif tag =~ /^_$/
%{
}
elsif html = process_image_tag(tag)
html
elsif html = process_file_link_tag(tag)
html
else
process_page_link_tag(tag)
end
end
# Attempt to process the tag as an image tag.
#
# tag - The String tag contents (the stuff inside the double brackets).
#
# Returns the String HTML if the tag is a valid image tag or nil
# if it is not.
def process_image_tag(tag)
parts = tag.split('|')
return if parts.size.zero?
name = parts[0].strip
path = if file = find_file(name)
::File.join @wiki.base_path, file.path
elsif name =~ /^https?:\/\/.+(jpg|png|gif|svg|bmp)$/i
name
end
if path
opts = parse_image_tag_options(tag)
containered = false
classes = [] # applied to whatever the outermost container is
attrs = [] # applied to the image
align = opts['align']
if opts['float']
containered = true
align ||= 'left'
if %w{left right}.include?(align)
classes << "float-#{align}"
end
elsif %w{top texttop middle absmiddle bottom absbottom baseline}.include?(align)
attrs << %{align="#{align}"}
elsif align
if %w{left center right}.include?(align)
containered = true
classes << "align-#{align}"
end
end
if width = opts['width']
if width =~ /^\d+(\.\d+)?(em|px)$/
attrs << %{width="#{width}"}
end
end
if height = opts['height']
if height =~ /^\d+(\.\d+)?(em|px)$/
attrs << %{height="#{height}"}
end
end
if alt = opts['alt']
attrs << %{alt="#{alt}"}
end
attr_string = attrs.size > 0 ? attrs.join(' ') + ' ' : ''
if opts['frame'] || containered
classes << 'frame' if opts['frame']
%{} +
%{} +
%{} +
(alt ? %{#{alt}} : '') +
%{} +
%{}
else
%{}
end
end
end
# Parse any options present on the image tag and extract them into a
# Hash of option names and values.
#
# tag - The String tag contents (the stuff inside the double brackets).
#
# Returns the options Hash:
# key - The String option name.
# val - The String option value or true if it is a binary option.
def parse_image_tag_options(tag)
tag.split('|')[1..-1].inject({}) do |memo, attr|
parts = attr.split('=').map { |x| x.strip }
memo[parts[0]] = (parts.size == 1 ? true : parts[1])
memo
end
end
# Attempt to process the tag as a file link tag.
#
# tag - The String tag contents (the stuff inside the double
# brackets).
#
# Returns the String HTML if the tag is a valid file link tag or nil
# if it is not.
def process_file_link_tag(tag)
parts = tag.split('|')
return if parts.size.zero?
name = parts[0].strip
path = parts[1] && parts[1].strip
path = if path && file = find_file(path)
::File.join @wiki.base_path, file.path
elsif path =~ %r{^https?://}
path
else
nil
end
if name && path && file
%{#{name}}
elsif name && path
%{#{name}}
else
nil
end
end
# Attempt to process the tag as a page link tag.
#
# tag - The String tag contents (the stuff inside the double
# brackets).
#
# Returns the String HTML if the tag is a valid page link tag or nil
# if it is not.
def process_page_link_tag(tag)
parts = tag.split('|')
parts.reverse! if @format == :mediawiki
name, page_name = *parts.compact.map(&:strip)
cname = @wiki.page_class.cname(page_name || name)
if name =~ %r{^https?://} && page_name.nil?
%{#{name}}
else
presence = "absent"
link_name = cname
page, extra = find_page_from_name(cname)
if page
link_name = @wiki.page_class.cname(page.name)
presence = "present"
end
link = ::File.join(@wiki.base_path, page ? page.escaped_url_path : CGI.escape(link_name))
# //page is invalid
# strip all duplicate forward slashes using helpers.rb trim_leading_slash
# //page => /page
link = trim_leading_slash link
%{#{name}}
end
end
# Process the special table of contents tag [[_TOC_]]
#
# data - The String data (with placeholders).
#
# Returns the marked up String data.
def process_toc_tags(data)
data.gsub!("[[_TOC_]]") do
@toc.nil? ? '' : @toc
end
data
end
# Find the given file in the repo.
#
# name - The String absolute or relative path of the file.
#
# Returns the Gollum::File or nil if none was found.
def find_file(name, version=@version)
if name =~ /^\//
@wiki.file(name[1..-1], version)
else
path = @dir == '.' ? name : ::File.join(@dir, name)
@wiki.file(path, version)
end
end
# Find a page from a given cname. If the page has an anchor (#) and has
# no match, strip the anchor and try again.
#
# cname - The String canonical page name including path.
#
# Returns a Gollum::Page instance if a page is found, or an Array of
# [Gollum::Page, String extra] if a page without the extra anchor data
# is found.
def find_page_from_name(cname)
slash = cname.rindex('/')
unless slash.nil?
name = cname[slash+1..-1]
path = cname[0..slash]
page = @wiki.paged(name, path)
else
page = @wiki.paged(cname, '/') || @wiki.page(cname)
end
if page
return page
end
if pos = cname.index('#')
[@wiki.page(cname[0...pos]), cname[pos..-1]]
end
end
#########################################################################
#
# Remote code - fetch code from url and replace the contents to a
# code-block that gets run the next parse.
# Acceptable formats:
# ```language:local-file.ext```
# ```language:/abs/other-file.ext```
# ```language:https://example.com/somefile.txt```
#
#########################################################################
def extract_remote_code data
data.gsub /^[ \t]*``` ?([^:\n\r]+):((http)?[^`\n\r]+)```/ do
language = $1
uri = $2
protocol = $3
# Detect local file
if protocol.nil?
if file = self.find_file(uri, @wiki.ref)
contents = file.raw_data
else
# How do we communicate a render error?
next "File not found: #{CGI::escapeHTML(uri)}"
end
else
contents = Gollum::RemoteCode.new(uri).contents
end
"```#{language}\n#{contents}\n```\n"
end
end
#########################################################################
#
# Code
#
#########################################################################
# Extract all code blocks into the codemap and replace with placeholders.
#
# data - The raw String data.
#
# Returns the placeholder'd String data.
def extract_code(data)
data.gsub!(/^([ \t]*)(~~~+) ?([^\r\n]+)?\r?\n(.+?)\r?\n\1(~~~+)[ \t\r]*$/m) do
m_indent = $1
m_start = $2 # ~~~
m_lang = $3
m_code = $4
m_end = $5 # ~~~
# start and finish tilde fence must be the same length
return '' if m_start.length != m_end.length
lang = m_lang ? m_lang.strip : nil
id = Digest::SHA1.hexdigest("#{lang}.#{m_code}")
cached = check_cache(:code, id)
# extract lang from { .ruby } or { #stuff .ruby .indent }
# see http://johnmacfarlane.net/pandoc/README.html#delimited-code-blocks
if lang
lang = lang.match(/\.([^}\s]+)/)
lang = lang[1] unless lang.nil?
end
@codemap[id] = cached ?
{ :output => cached } :
{ :lang => lang, :code => m_code, :indent => m_indent }
"#{m_indent}#{id}" # print the SHA1 ID with the proper indentation
end
data.gsub!(/^([ \t]*)``` ?([^\r\n]+)?\r?\n(.+?)\r?\n\1```[ \t]*\r?$/m) do
lang = $2 ? $2.strip : nil
id = Digest::SHA1.hexdigest("#{lang}.#{$3}")
cached = check_cache(:code, id)
@codemap[id] = cached ?
{ :output => cached } :
{ :lang => lang, :code => $3, :indent => $1 }
"#{$1}#{id}" # print the SHA1 ID with the proper indentation
end
data
end
# Remove the leading space from a code block. Leading space
# is only removed if every single line in the block has leading
# whitespace.
#
# code - The code block to remove spaces from
# regex - A regex to match whitespace
def remove_leading_space(code, regex)
if code.lines.all? { |line| line =~ /\A\r?\n\Z/ || line =~ regex }
code.gsub!(regex) do
''
end
end
end
# Process all code from the codemap and replace the placeholders with the
# final HTML.
#
# data - The String data (with placeholders).
# encoding - Encoding Constant or String.
#
# Returns the marked up String data.
def process_code(data, encoding = nil)
return data if data.nil? || data.size.zero? || @codemap.size.zero?
blocks = []
@codemap.each do |id, spec|
next if spec[:output] # cached
code = spec[:code]
remove_leading_space(code, /^#{spec[:indent]}/m)
remove_leading_space(code, /^( |\t)/m)
blocks << [spec[:lang], code]
end
highlighted = []
blocks.each do |lang, code|
encoding ||= 'utf-8'
hl_code = code
highlighted << hl_code
end
@codemap.each do |id, spec|
body = spec[:output] || begin
if (body = highlighted.shift.to_s).size > 0
update_cache(:code, id, body)
body
else
"#{CGI.escapeHTML(spec[:code])}
"
end
end
data.gsub!(id) do
body
end
end
data
end
#########################################################################
#
# Sequence Diagrams
#
#########################################################################
# Extract all sequence diagram blocks into the wsdmap and replace with
# placeholders.
#
# data - The raw String data.
#
# Returns the placeholder'd String data.
def extract_wsd(data)
data.gsub(/^\{\{\{\{\{\{ ?(.+?)\r?\n(.+?)\r?\n\}\}\}\}\}\}\r?$/m) do
id = Digest::SHA1.hexdigest($2)
@wsdmap[id] = { :style => $1, :code => $2 }
id
end
end
# Process all diagrams from the wsdmap and replace the placeholders with
# the final HTML.
#
# data - The String data (with placeholders).
#
# Returns the marked up String data.
def process_wsd(data)
@wsdmap.each do |id, spec|
style = spec[:style]
code = spec[:code]
data.gsub!(id) do
Gollum::WebSequenceDiagram.new(code, style).to_tag
end
end
data
end
#########################################################################
#
# Metadata
#
#########################################################################
# Extract metadata for data and build metadata table. Metadata
# is content found between markers, and must
# be a valid YAML mapping.
#
# Because ri and ruby 1.8.7 are awesome, the markers can't
# be included in this documentation without triggering
# `Unhandled special: Special: type=17`
# Please read the source code for the exact markers
#
# Returns the String of formatted data with metadata removed.
def extract_metadata(data)
@metadata = {}
data
end
# Hook for getting the formatted value of extracted tag data.
#
# type - Symbol value identifying what type of data is being extracted.
# id - String SHA1 hash of original extracted tag data.
#
# Returns the String cached formatted data, or nil.
def check_cache(type, id)
end
# Hook for caching the formatted value of extracted tag data.
#
# type - Symbol value identifying what type of data is being extracted.
# id - String SHA1 hash of original extracted tag data.
# data - The String formatted value to be cached.
#
# Returns nothing.
def update_cache(type, id, data)
end
end
MarkupGFM = Markup
end