require 'digest/sha1'
require 'cgi'
require 'pygments'
require 'base64'
# initialize Pygments
Pygments.start
module Gollum
class Markup
attr_accessor :toc
attr_reader :metadata
# Initialize a new Markup object.
#
# page - The Gollum::Page.
#
# Returns a new Gollum::Markup object, ready for rendering.
def initialize(page)
@wiki = page.wiki
@name = page.filename
@data = page.text_data
@version = page.version.id if page.version
@format = page.format
@sub_page = page.sub_page
@parent_page = page.parent_page
@dir = ::File.dirname(page.path)
@tagmap = {}
@codemap = {}
@texmap = {}
@wsdmap = {}
@premap = {}
@toc = nil
@metadata = nil
end
# Render the content with Gollum wiki syntax on top of the file's own
# markup language.
#
# no_follow - Boolean that determines if rel="nofollow" is added to all
# tags.
# encoding - Encoding Constant or String.
#
# Returns the formatted String content.
def render(no_follow = false, encoding = nil)
sanitize = no_follow ?
@wiki.history_sanitizer :
@wiki.sanitizer
data = @data.dup
data = extract_metadata(data)
data = extract_code(data)
data = extract_tex(data)
data = extract_wsd(data)
data = extract_tags(data)
begin
data = GitHub::Markup.render(@name, data)
if data.nil?
raise "There was an error converting #{@name} to HTML."
end
rescue Object => e
data = %{#{e.message}
}
end
data = process_tags(data)
data = process_code(data, encoding)
doc = Nokogiri::HTML::DocumentFragment.parse(data)
doc = sanitize.clean_node!(doc) if sanitize
doc,toc = process_headers(doc)
@toc = @sub_page ? ( @parent_page ? @parent_page.toc_data : "[[_TOC_]]" ) : toc
yield doc if block_given?
data = doc.to_html
data = process_toc_tags(data)
data = process_tex(data)
data = process_wsd(data)
data.gsub!(/<\/p>/, '')
data
end
# Inserts header anchors and creates TOC
#
# doc - Nokogiri parsed document
#
# Returns doc Document and toc String
def process_headers(doc)
toc = nil
doc.css('h1,h2,h3,h4,h5,h6').each do |h|
id = encodeURIComponent(h.content.gsub(' ','-'))
level = h.name.gsub(/[hH]/,'').to_i
# Add anchors
anchor = Nokogiri::XML::Node.new('a', doc)
anchor['class'] = 'anchor'
anchor['id'] = id
# % -> %25 so anchors work on Firefox. See issue #475
anchor['href'] = '#' + id.gsub('%', '%25')
h.add_child(anchor)
# Build TOC
toc ||= Nokogiri::XML::DocumentFragment.parse('
')
tail ||= toc.child
tail_level ||= 0
while tail_level < level
node = Nokogiri::XML::Node.new('ul', doc)
tail = tail.add_child(node)
tail_level += 1
end
while tail_level > level
tail = tail.parent
tail_level -= 1
end
node = Nokogiri::XML::Node.new('li', doc)
# % -> %25 so anchors work on Firefox. See issue #475
node.add_child("#{h.content}")
tail.add_child(node)
end
toc = toc.to_xhtml if toc != nil
[doc, toc]
end
#########################################################################
#
# TeX
#
#########################################################################
# Extract all TeX into the texmap and replace with placeholders.
#
# data - The raw String data.
#
# Returns the placeholder'd String data.
def extract_tex(data)
data.gsub(/\\\[\s*(.*?)\s*\\\]/m) do
tag = CGI.escapeHTML($1)
id = Digest::SHA1.hexdigest(tag)
@texmap[id] = [:block, tag]
id
end.gsub(/\\\(\s*(.*?)\s*\\\)/m) do
tag = CGI.escapeHTML($1)
id = Digest::SHA1.hexdigest(tag)
@texmap[id] = [:inline, tag]
id
end
end
# Process all TeX from the texmap and replace the placeholders with the
# final markup.
#
# data - The String data (with placeholders).
#
# Returns the marked up String data.
def process_tex(data)
@texmap.each do |id, spec|
type, tex = *spec
# Obtain the formula with parameters
out = nil
begin
width, height, align, base64 = Gollum::Tex.render_formula(tex, true)
# TODO: Should we load the binary inside the html?
#out = %{}
# Use the alignment values from the formula rendering but still use the call to '_tex.png'. Although it will call render_formula()
# again, it will use the already cached formula and it might have some advantages from the point of view of browser caching (really not sure here).
out = %{}
rescue # In case of error
out = CGI.escapeHTML(tex)
end
data.gsub!(id, out)
end
data
end
#########################################################################
#
# Tags
#
#########################################################################
# Extract all tags into the tagmap and replace with placeholders.
#
# data - The raw String data.
#
# Returns the placeholder'd String data.
def extract_tags(data)
if @format == :asciidoc
return data
end
data.gsub!(/(.?)\[\[(.+?)\]\]([^\[]?)/m) do
if $1 == "'" && $3 != "'"
"[[#{$2}]]#{$3}"
elsif $2.include?('][')
if $2[0..4] == 'file:'
pre = $1
post = $3
parts = $2.split('][')
parts[0][0..4] = ""
link = "#{parts[1]}|#{parts[0].sub(/\.org/,'')}"
id = Digest::SHA1.hexdigest(link)
@tagmap[id] = link
"#{pre}#{id}#{post}"
else
$&
end
else
id = Digest::SHA1.hexdigest($2)
@tagmap[id] = $2
"#{$1}#{id}#{$3}"
end
end
data
end
# Process all tags from the tagmap and replace the placeholders with the
# final markup.
#
# data - The String data (with placeholders).
#
# Returns the marked up String data.
def process_tags(data)
@tagmap.each do |id, tag|
# If it's preformatted, just put the tag back
if is_preformatted?(data, id)
data.gsub!(id, "[[#{tag}]]")
else
data.gsub!(id, process_tag(tag).gsub('%2F', '/'))
end
end
data
end
# Find `id` within `data` and determine if it's within
# preformatted tags.
#
# data - The String data (with placeholders).
# id - The String SHA1 hash.
PREFORMATTED_TAGS = %w(code tt)
def is_preformatted?(data, id)
doc = Nokogiri::HTML::DocumentFragment.parse(data)
node = doc.search("[text()*='#{id}']").first
node && (PREFORMATTED_TAGS.include?(node.name) ||
node.ancestors.any? { |a| PREFORMATTED_TAGS.include?(a.name) })
end
# Process a single tag into its final HTML form.
#
# tag - The String tag contents (the stuff inside the double
# brackets).
#
# Returns the String HTML version of the tag.
def process_tag(tag)
if tag =~ /^_TOC_$/
%{[[#{tag}]]}
elsif html = process_image_tag(tag)
html
elsif html = process_file_link_tag(tag)
html
else
process_page_link_tag(tag)
end
end
# Attempt to process the tag as an image tag.
#
# tag - The String tag contents (the stuff inside the double brackets).
#
# Returns the String HTML if the tag is a valid image tag or nil
# if it is not.
def process_image_tag(tag)
parts = tag.split('|')
return if parts.size.zero?
name = parts[0].strip
path = if file = find_file(name)
::File.join @wiki.base_path, file.path
elsif name =~ /^https?:\/\/.+(jpg|png|gif|svg|bmp)$/i
name
end
if path
opts = parse_image_tag_options(tag)
containered = false
classes = [] # applied to whatever the outermost container is
attrs = [] # applied to the image
align = opts['align']
if opts['float']
containered = true
align ||= 'left'
if %w{left right}.include?(align)
classes << "float-#{align}"
end
elsif %w{top texttop middle absmiddle bottom absbottom baseline}.include?(align)
attrs << %{align="#{align}"}
elsif align
if %w{left center right}.include?(align)
containered = true
classes << "align-#{align}"
end
end
if width = opts['width']
if width =~ /^\d+(\.\d+)?(em|px)$/
attrs << %{width="#{width}"}
end
end
if height = opts['height']
if height =~ /^\d+(\.\d+)?(em|px)$/
attrs << %{height="#{height}"}
end
end
if alt = opts['alt']
attrs << %{alt="#{alt}"}
end
attr_string = attrs.size > 0 ? attrs.join(' ') + ' ' : ''
if opts['frame'] || containered
classes << 'frame' if opts['frame']
%{} +
%{} +
%{} +
(alt ? %{#{alt}} : '') +
%{} +
%{}
else
%{}
end
end
end
# Parse any options present on the image tag and extract them into a
# Hash of option names and values.
#
# tag - The String tag contents (the stuff inside the double brackets).
#
# Returns the options Hash:
# key - The String option name.
# val - The String option value or true if it is a binary option.
def parse_image_tag_options(tag)
tag.split('|')[1..-1].inject({}) do |memo, attr|
parts = attr.split('=').map { |x| x.strip }
memo[parts[0]] = (parts.size == 1 ? true : parts[1])
memo
end
end
# Attempt to process the tag as a file link tag.
#
# tag - The String tag contents (the stuff inside the double
# brackets).
#
# Returns the String HTML if the tag is a valid file link tag or nil
# if it is not.
def process_file_link_tag(tag)
parts = tag.split('|')
return if parts.size.zero?
name = parts[0].strip
path = parts[1] && parts[1].strip
path = if path && file = find_file(path)
::File.join @wiki.base_path, file.path
elsif path =~ %r{^https?://}
path
else
nil
end
if name && path && file
%{#{name}}
elsif name && path
%{#{name}}
else
nil
end
end
# Attempt to process the tag as a page link tag.
#
# tag - The String tag contents (the stuff inside the double
# brackets).
#
# Returns the String HTML if the tag is a valid page link tag or nil
# if it is not.
def process_page_link_tag(tag)
parts = tag.split('|')
parts.reverse! if @format == :mediawiki
name, page_name = *parts.compact.map(&:strip)
cname = @wiki.page_class.cname(page_name || name)
if name =~ %r{^https?://} && page_name.nil?
%{#{name}}
else
presence = "absent"
link_name = cname
page, extra = find_page_from_name(cname)
if page
link_name = @wiki.page_class.cname(page.name)
presence = "present"
end
link = ::File.join(@wiki.base_path, page ? page.escaped_url_path : CGI.escape(link_name))
%{#{name}}
end
end
# Process the special table of contents tag [[_TOC_]]
#
# data - The String data (with placeholders).
#
# Returns the marked up String data.
def process_toc_tags(data)
data.gsub!("[[_TOC_]]", @toc.nil? ? '' : @toc)
data
end
# Find the given file in the repo.
#
# name - The String absolute or relative path of the file.
#
# Returns the Gollum::File or nil if none was found.
def find_file(name)
if name =~ /^\//
@wiki.file(name[1..-1], @version)
else
path = @dir == '.' ? name : ::File.join(@dir, name)
@wiki.file(path, @version)
end
end
# Find a page from a given cname. If the page has an anchor (#) and has
# no match, strip the anchor and try again.
#
# cname - The String canonical page name including path.
#
# Returns a Gollum::Page instance if a page is found, or an Array of
# [Gollum::Page, String extra] if a page without the extra anchor data
# is found.
def find_page_from_name(cname)
slash = cname.rindex('/')
unless slash.nil?
name = cname[slash+1..-1]
path = cname[0..slash]
page = @wiki.paged(name, path)
else
page = @wiki.paged(cname, '/')
end
if page
return page
end
if pos = cname.index('#')
[@wiki.page(cname[0...pos]), cname[pos..-1]]
end
end
#########################################################################
#
# Code
#
#########################################################################
# Extract all code blocks into the codemap and replace with placeholders.
#
# data - The raw String data.
#
# Returns the placeholder'd String data.
def extract_code(data)
data.gsub!(/^([ \t]*)``` ?([^\r\n]+)?\r?\n(.+?)\r?\n\1```\r?$/m) do
lang = $2 ? $2.strip : nil
id = Digest::SHA1.hexdigest("#{lang}.#{$3}")
cached = check_cache(:code, id)
@codemap[id] = cached ?
{ :output => cached } :
{ :lang => lang, :code => $3, :indent => $1 }
"#{$1}#{id}" # print the SHA1 ID with the proper indentation
end
data
end
# Remove the leading space from a code block. Leading space
# is only removed if every single line in the block has leading
# whitespace.
#
# code - The code block to remove spaces from
# regex - A regex to match whitespace
def remove_leading_space(code, regex)
if code.lines.all? { |line| line =~ /\A\r?\n\Z/ || line =~ regex }
code.gsub!(regex, '')
end
end
# Process all code from the codemap and replace the placeholders with the
# final HTML.
#
# data - The String data (with placeholders).
# encoding - Encoding Constant or String.
#
# Returns the marked up String data.
def process_code(data, encoding = nil)
return data if data.nil? || data.size.zero? || @codemap.size.zero?
blocks = []
@codemap.each do |id, spec|
next if spec[:output] # cached
code = spec[:code]
remove_leading_space(code, /^#{spec[:indent]}/m)
remove_leading_space(code, /^( |\t)/m)
blocks << [spec[:lang], code]
end
highlighted = []
blocks.each do |lang, code|
encoding ||= 'utf-8'
begin
hl_code = Pygments.highlight(code, :lexer => lang, :options => {:encoding => encoding.to_s})
rescue ::RubyPython::PythonError
hl_code = code
end
highlighted << hl_code
end
@codemap.each do |id, spec|
body = spec[:output] || begin
if (body = highlighted.shift.to_s).size > 0
update_cache(:code, id, body)
body
else
"
#{CGI.escapeHTML(spec[:code])}
"
end
end
data.gsub!(id, body)
end
data
end
#########################################################################
#
# Sequence Diagrams
#
#########################################################################
# Extract all sequence diagram blocks into the wsdmap and replace with
# placeholders.
#
# data - The raw String data.
#
# Returns the placeholder'd String data.
def extract_wsd(data)
data.gsub(/^\{\{\{\{\{\{ ?(.+?)\r?\n(.+?)\r?\n\}\}\}\}\}\}\r?$/m) do
id = Digest::SHA1.hexdigest($2)
@wsdmap[id] = { :style => $1, :code => $2 }
id
end
end
# Process all diagrams from the wsdmap and replace the placeholders with
# the final HTML.
#
# data - The String data (with placeholders).
#
# Returns the marked up String data.
def process_wsd(data)
@wsdmap.each do |id, spec|
style = spec[:style]
code = spec[:code]
data.gsub!(id, Gollum::WebSequenceDiagram.new(code, style).to_tag)
end
data
end
#########################################################################
#
# Metadata
#
#########################################################################
# Extract metadata for data and build metadata table. Metadata
# is content found between ` ` markers, and must
# be a valid YAML mapping.
#
# Returns the String of formatted data with metadata removed.
def extract_metadata(data)
@metadata ||= {}
data.gsub(/\<\!--+\s+---(.*?)--+\>/m) do
yaml = @wiki.sanitizer.clean($1)
hash = YAML.load(yaml)
if Hash === hash
@metadata.update(hash)
end
''
end
end
# Hook for getting the formatted value of extracted tag data.
#
# type - Symbol value identifying what type of data is being extracted.
# id - String SHA1 hash of original extracted tag data.
#
# Returns the String cached formatted data, or nil.
def check_cache(type, id)
end
# Hook for caching the formatted value of extracted tag data.
#
# type - Symbol value identifying what type of data is being extracted.
# id - String SHA1 hash of original extracted tag data.
# data - The String formatted value to be cached.
#
# Returns nothing.
def update_cache(type, id, data)
end
end
MarkupGFM = Markup
end