lib/ronn/document.rb in ronn-0.6.6 vs lib/ronn/document.rb in ronn-0.7.0
- old
+ new
@@ -1,6 +1,6 @@
-require 'set'
+require 'time'
require 'cgi'
require 'hpricot'
require 'rdiscount'
require 'ronn/roff'
require 'ronn/template'
@@ -16,12 +16,20 @@
# generated documentation unless overridden by the information
# extracted from the document's name section.
class Document
include Ronn::Utils
- attr_reader :path, :data
+ # Path to the Ronn document. This may be '-' or nil when the Ronn::Document
+ # object is created with a stream.
+ attr_reader :path
+ # The raw input data, read from path or stream and unmodified.
+ attr_reader :data
+
+ # The index used to resolve man and file references.
+ attr_accessor :index
+
# The man pages name: usually a single word name of
# a program or filename; displayed along with the section in
# the left and right portions of the header as well as the bottom
# right section of the footer.
attr_accessor :name
@@ -54,16 +62,26 @@
# the intialize method returns. The attributes hash may contain values
# for any writeable attributes defined on this class.
def initialize(path=nil, attributes={}, &block)
@path = path
@basename = path.to_s =~ /^-?$/ ? nil : File.basename(path)
- @reader = block || Proc.new { |f| File.read(f) }
+ @reader = block ||
+ lambda do |f|
+ if ['-', nil].include?(f)
+ STDIN.read
+ else
+ File.read(f)
+ end
+ end
@data = @reader.call(path)
- @name, @section, @tagline = nil
- @manual, @organization, @date = nil
- @fragment = preprocess
+ @name, @section, @tagline = sniff
+
@styles = %w[man]
+ @manual, @organization, @date = nil
+ @markdown, @input_html, @html = nil
+ @index = Ronn::Index[path || '.']
+ @index.add_manual(self) if path && name
attributes.each { |attr_name,value| send("#{attr_name}=", value) }
end
# Generate a file basename of the form "<name>.<section>.<type>"
@@ -107,11 +125,11 @@
end
# Truthful when the name was extracted from the name section
# of the document.
def name?
- !name.nil?
+ !@name.nil?
end
# Returns the manual page section based first on the document's
# contents and then on the path name.
def section
@@ -119,13 +137,31 @@
end
# True when the section number was extracted from the name
# section of the document.
def section?
- !section.nil?
+ !@section.nil?
end
+ # The name used to reference this manual.
+ def reference_name
+ name + (section && "(#{section})").to_s
+ end
+
+ # Truthful when the document started with an h1 but did not follow
+ # the "<name>(<sect>) -- <tagline>" convention. We assume this is some kind
+ # of custom title.
+ def title?
+ !name? && tagline
+ end
+
+ # The document's title when no name section was defined. When a name section
+ # exists, this value is nil.
+ def title
+ @tagline if !name?
+ end
+
# The date the man page was published. If not set explicitly,
# this is the file's modified time or, if no file is given,
# the current time.
def date
return @date if @date
@@ -134,38 +170,65 @@
end
# Retrieve a list of top-level section headings in the document and return
# as an array of +[id, text]+ tuples, where +id+ is the element's generated
# id and +text+ is the inner text of the heading element.
- def section_heads
- parse_html(to_html_fragment).search('h2[@id]').map do |heading|
- [heading.attributes['id'], heading.inner_text]
- end
+ def toc
+ @toc ||=
+ html.search('h2[@id]').map { |h2| [h2.attributes['id'], h2.inner_text] }
end
+ alias section_heads toc
# Styles to insert in the generated HTML output. This is a simple Array of
# string module names or file paths.
def styles=(styles)
@styles = (%w[man] + styles).uniq
end
+ # Sniff the document header and extract basic document metadata. Return a
+ # tuple of the form: [name, section, description], where missing information
+ # is represented by nil and any element may be missing.
+ def sniff
+ html = Markdown.new(data[0, 512]).to_html
+ heading, html = html.split("</h1>\n", 2)
+ return [nil, nil, nil] if html.nil?
+
+ case heading
+ when /([\w_.\[\]~+=@:-]+)\s*\((\d\w*)\)\s*-+\s*(.*)/
+ # name(section) -- description
+ [$1, $2, $3]
+ when /([\w_.\[\]~+=@:-]+)\s+-+\s+(.*)/
+ # name -- description
+ [$1, nil, $2]
+ else
+ # description
+ [nil, nil, heading.sub('<h1>', '')]
+ end
+ end
+
+ # Preprocessed markdown input text.
+ def markdown
+ @markdown ||= process_markdown!
+ end
+
+ # A Hpricot::Document for the manual content fragment.
+ def html
+ @html ||= process_html!
+ end
+
# Convert the document to :roff, :html, or :html_fragment and
# return the result as a string.
def convert(format)
send "to_#{format}"
end
# Convert the document to roff and return the result as a string.
def to_roff
RoffFilter.new(
to_html_fragment(wrap_class=nil),
- name,
- section,
- tagline,
- manual,
- organization,
- date
+ name, section, tagline,
+ manual, organization, date
).to_s
end
# Convert the document to HTML and return the result as a string.
def to_html
@@ -175,82 +238,139 @@
layout_path = nil
end
end
template = Ronn::Template.new(self)
+ template.context.push :html => to_html_fragment(wrap_class=nil)
template.render(layout_path || 'default')
end
# Convert the document to HTML and return the result
# as a string. The HTML does not include <html>, <head>,
# or <style> tags.
def to_html_fragment(wrap_class='mp')
- wrap_class = nil if wrap_class.to_s.empty?
- buf = []
- buf << "<div class='#{wrap_class}'>" if wrap_class
- if name? && section?
- buf << "<h2 id='NAME'>NAME</h2>"
- buf << "<p><code>#{name}</code> - #{tagline}</p>"
- elsif tagline
- buf << "<h1>#{[name, tagline].compact.join(' - ')}</h1>"
- end
- buf << @fragment.to_s
- buf << "</div>" if wrap_class
- buf.join("\n")
+ return html.to_s if wrap_class.nil?
+ [
+ "<div class='#{wrap_class}'>",
+ html.to_s,
+ "</div>"
+ ].join("\n")
end
+ def to_markdown
+ markdown
+ end
+
+ def to_h
+ %w[name section tagline manual organization date styles toc].
+ inject({}) { |hash, name| hash[name] = send(name); hash }
+ end
+
+ def to_yaml
+ require 'yaml'
+ to_h.to_yaml
+ end
+
+ def to_json
+ require 'json'
+ to_h.merge('date' => date.iso8601).to_json
+ end
+
protected
- # The preprocessed markdown source text.
- attr_reader :markdown
+ ##
+ # Document Processing
- # Parse the document and extract the name, section, and tagline
- # from its contents. This is called while the object is being
- # initialized.
- def preprocess
- [
- :heading_anchor_pre_filter,
- :angle_quote_pre_filter,
- :markdown_filter,
- :angle_quote_post_filter,
- :definition_list_filter,
- :heading_anchor_filter,
- :annotate_bare_links_filter
- ].inject(data) { |res,filter| send(filter, res) }
+ # Parse the document and extract the name, section, and tagline from its
+ # contents. This is called while the object is being initialized.
+ def preprocess!
+ input_html
+ nil
end
- # Add a 'data-bare-link' attribute to hyperlinks
- # whose text labels are the same as their href URLs.
- def annotate_bare_links_filter(html)
- doc = parse_html(html)
- doc.search('a[@href]').each do |node|
- href = node.attributes['href']
- text = node.inner_text
+ def input_html
+ @input_html ||= strip_heading(Markdown.new(markdown).to_html)
+ end
- if href == text ||
- href[0] == ?# ||
- CGI.unescapeHTML(href) == "mailto:#{CGI.unescapeHTML(text)}"
- then
- node.set_attribute('data-bare-link', 'true')
+ def strip_heading(html)
+ heading, html = html.split("</h1>\n", 2)
+ html || heading
+ end
+
+ def process_markdown!
+ markdown = markdown_filter_heading_anchors(self.data)
+ markdown_filter_link_index(markdown)
+ markdown_filter_angle_quotes(markdown)
+ end
+
+ def process_html!
+ @html = Hpricot(input_html)
+ html_filter_angle_quotes
+ html_filter_definition_lists
+ html_filter_inject_name_section
+ html_filter_heading_anchors
+ html_filter_annotate_bare_links
+ html_filter_manual_reference_links
+ @html
+ end
+
+ ##
+ # Filters
+
+ # Appends all index links to the end of the document as Markdown reference
+ # links. This lets us use [foo(3)][] syntax to link to index entries.
+ def markdown_filter_link_index(markdown)
+ return markdown if index.nil? || index.empty?
+ markdown << "\n\n"
+ index.each { |ref| markdown << "[#{ref.name}]: #{ref.url}\n" }
+ end
+
+ # Add [id]: #ANCHOR elements to the markdown source text for all sections.
+ # This lets us use the [SECTION-REF][] syntax
+ def markdown_filter_heading_anchors(markdown)
+ first = true
+ markdown.split("\n").grep(/^[#]{2,5} +[\w '-]+[# ]*$/).each do |line|
+ markdown << "\n\n" if first
+ first = false
+ title = line.gsub(/[^\w -]/, '').strip
+ anchor = title.gsub(/\W+/, '-').gsub(/(^-+|-+$)/, '')
+ markdown << "[#{title}]: ##{anchor} \"#{title}\"\n"
+ end
+ markdown
+ end
+
+ # Convert <WORD> to <var>WORD</var> but only if WORD isn't an HTML tag.
+ def markdown_filter_angle_quotes(markdown)
+ markdown.gsub(/\<([^:.\/]+?)\>/) do |match|
+ contents = $1
+ tag, attrs = contents.split(' ', 2)
+ if attrs =~ /\/=/ || html_element?(tag.sub(/^\//, '')) ||
+ data.include?("</#{tag}>")
+ match.to_s
+ else
+ "<var>#{contents}</var>"
end
end
- doc
end
- # Add URL anchors to all HTML heading elements.
- def heading_anchor_filter(html)
- doc = parse_html(html)
- doc.search('h1|h2|h3|h4|h5|h6').not('[@id]').each do |heading|
- heading.set_attribute('id', heading.inner_text.gsub(/\W+/, '-'))
+ # Perform angle quote (<THESE>) post filtering.
+ def html_filter_angle_quotes
+ # convert all angle quote vars nested in code blocks
+ # back to the original text
+ @html.search('code').search('text()').each do |node|
+ next unless node.to_html.include?('var>')
+ new =
+ node.to_html.
+ gsub('<var>', '<').
+ gsub("</var>", '>')
+ node.swap(new)
end
- doc
end
# Convert special format unordered lists to definition lists.
- def definition_list_filter(html)
- doc = parse_html(html)
+ def html_filter_definition_lists
# process all unordered lists depth-first
- doc.search('ul').to_a.reverse.each do |ul|
+ @html.search('ul').to_a.reverse.each do |ul|
items = ul.search('li')
next if items.any? { |item| item.inner_text.split("\n", 2).first !~ /:$/ }
ul.name = 'dl'
items.each do |item|
@@ -268,88 +388,72 @@
item.name = 'dd'
container.swap(wrap.sub(/></, ">#{definition}<"))
end
end
- doc
end
- # Perform angle quote (<THESE>) post filtering.
- def angle_quote_post_filter(html)
- doc = parse_html(html)
- # convert all angle quote vars nested in code blocks
- # back to the original text
- doc.search('code').search('text()').each do |node|
- next unless node.to_html.include?('var>')
- new =
- node.to_html.
- gsub('<var>', '<').
- gsub("</var>", '>')
- node.swap(new)
+ def html_filter_inject_name_section
+ markup =
+ if title?
+ "<h1>#{title}</h1>"
+ elsif name
+ "<h2>NAME</h2>\n" +
+ "<p class='man-name'>\n <code>#{name}</code>" +
+ (tagline ? " - <span class='man-whatis'>#{tagline}</span>\n" : "\n") +
+ "</p>\n"
+ end
+ if markup
+ if @html.children
+ @html.at("*").before(markup)
+ else
+ @html = Hpricot(markup)
+ end
end
- doc
end
- # Run markdown on the data and extract name, section, and
- # tagline.
- def markdown_filter(data)
- @markdown = data
- html = Markdown.new(data).to_html
- @tagline, html = html.split("</h1>\n", 2)
- if html.nil?
- html = @tagline
- @tagline = nil
- else
- # grab name and section from title
- @tagline.sub!('<h1>', '')
- if @tagline =~ /([\w_.\[\]~+=@:-]+)\s*\((\d\w*)\)\s*-+\s*(.*)/
- @name = $1
- @section = $2
- @tagline = $3
- elsif @tagline =~ /([\w_.\[\]~+=@:-]+)\s+-+\s+(.*)/
- @name = $1
- @tagline = $2
- end
+ # Add URL anchors to all HTML heading elements.
+ def html_filter_heading_anchors
+ @html.search('h2|h3|h4|h5|h6').not('[@id]').each do |heading|
+ heading.set_attribute('id', heading.inner_text.gsub(/\W+/, '-'))
end
-
- html.to_s
end
- # Convert all <WORD> to <var>WORD</var> but only if WORD
- # isn't an HTML tag.
- def angle_quote_pre_filter(data)
- data.gsub(/\<([^:.\/]+?)\>/) do |match|
- contents = $1
- tag, attrs = contents.split(' ', 2)
- if attrs =~ /\/=/ ||
- html_element?(tag.sub(/^\//, '')) ||
- data.include?("</#{tag}>")
- match.to_s
- else
- "<var>#{contents}</var>"
+ # Add a 'data-bare-link' attribute to hyperlinks
+ # whose text labels are the same as their href URLs.
+ def html_filter_annotate_bare_links
+ @html.search('a[@href]').each do |node|
+ href = node.attributes['href']
+ text = node.inner_text
+
+ if href == text ||
+ href[0] == ?# ||
+ CGI.unescapeHTML(href) == "mailto:#{CGI.unescapeHTML(text)}"
+ then
+ node.set_attribute('data-bare-link', 'true')
end
end
end
- # Add [id]: #ANCHOR elements to the markdown source text for all sections.
- # This lets us use the [SECTION-REF][] syntax
- def heading_anchor_pre_filter(data)
- first = true
- data.split("\n").grep(/^[#]{2,5} +[\w '-]+[# ]*$/).each do |line|
- data << "\n\n" if first
- first = false
- title = line.gsub(/[^\w -]/, '').strip
- anchor = title.gsub(/\W+/, '-').gsub(/(^-+|-+$)/, '')
- data << "[#{title}]: ##{anchor} \"#{title}\"\n"
- end
- data
- end
- private
- def parse_html(html)
- if html.respond_to?(:doc?) && html.doc?
- html
- else
- Hpricot(html.to_s)
+ # Convert text of the form "name(section)" to a hyperlink. The URL is
+ # obtaiend from the index.
+ def html_filter_manual_reference_links
+ return if index.nil?
+ @html.search('text()').each do |node|
+ next if !node.content.include?(')')
+ next if %w[pre code h1 h2 h3].include?(node.parent.name)
+ next if child_of?(node, 'a')
+ node.swap(
+ node.content.gsub(/([0-9A-Za-z_:.+=@~-]+)(\(\d+\w*\))/) {
+ name, sect = $1, $2
+ if ref = index["#{name}#{sect}"]
+ "<a class='man-ref' href='#{ref.url}'>#{name}<span class='s'>#{sect}</span></a>"
+ else
+ # warn "warn: manual reference not defined: '#{name}#{sect}'"
+ "<span class='man-ref'>#{name}<span class='s'>#{sect}</span></span>"
+ end
+ }
+ )
end
end
end
end