lib/asciidoctor/converter/manpage.rb in asciidoctor-2.0.12 vs lib/asciidoctor/converter/manpage.rb in asciidoctor-2.0.13
- old
+ new
@@ -1,11 +1,15 @@
# frozen_string_literal: true
module Asciidoctor
# A built-in {Converter} implementation that generates the man page (troff) format.
#
-# The output follows the groff man page definition while also trying to be
-# consistent with the output produced by the a2x tool from AsciiDoc Python.
+# The output of this converter adheres to the man definition as defined by
+# groff and uses the manpage output of the DocBook toolchain as a foundation.
+# That means if you've previously been generating man pages using the a2x tool
+# from AsciiDoc Python, you should be able to achieve a very similar result
+# using this converter. Though you'll also get to enjoy some notable
+# enhancements that have been added since, such as the customizable linkstyle.
#
# See http://www.gnu.org/software/groff/manual/html_node/Man-usage.html#Man-usage
class Converter::ManPageConverter < Converter::Base
register_for 'manpage'
@@ -16,14 +20,17 @@
ESC_FS = %(#{ESC}.) # escaped full stop (indicates troff macro)
LiteralBackslashRx = /\A\\|(#{ESC})?\\/
LeadingPeriodRx = /^\./
EscapedMacroRx = /^(?:#{ESC}\\c\n)?#{ESC}\.((?:URL|MTO) "#{CC_ANY}*?" "#{CC_ANY}*?" )( |[^\s]*)(#{CC_ANY}*?)(?: *#{ESC}\\c)?$/
- MockBoundaryRx = /<\/?BOUNDARY>/
+ MalformedEscapedMacroRx = /(#{ESC}\\c) (#{ESC}\.(?:URL|MTO) )/
+ MockMacroRx = /<\/?(#{ESC}\\[^>]+)>/
EmDashCharRefRx = /—(?:​)?/
EllipsisCharRefRx = /…(?:​)?/
WrappedIndentRx = /#{CG_BLANK}*#{LF}#{CG_BLANK}*/
+ XMLMarkupRx = /&#?[a-z\d]+;|</
+ PCDATAFilterRx = /(&#?[a-z\d]+;|<[^>]+>)|([^&<]+)/
def initialize backend, opts = {}
@backend = backend
init_backend_traits basebackend: 'manpage', filetype: 'man', outfilesuffix: '.man', supports_templates: true
end
@@ -89,21 +96,18 @@
unless node.noheader
if node.attr? 'manpurpose'
mannames = node.attr 'mannames', [manname]
result << %(.SH "#{(node.attr 'manname-title', 'NAME').upcase}"
-#{mannames.map {|n| manify n }.join ', '} \\- #{manify node.attr('manpurpose'), whitespace: :normalize})
+#{mannames.map {|n| (manify n).gsub '\-', '-' }.join ', '} \\- #{manify node.attr('manpurpose'), whitespace: :normalize})
end
end
result << node.content
# QUESTION should NOTES come after AUTHOR(S)?
- if node.footnotes? && !(node.attr? 'nofootnotes')
- result << '.SH "NOTES"'
- result.concat(node.footnotes.map {|fn| %(#{fn.index}. #{fn.text}) })
- end
+ append_footnotes result, node
unless (authors = node.authors).empty?
if authors.size > 1
result << '.SH "AUTHORS"'
authors.each do |author|
@@ -122,14 +126,11 @@
# NOTE embedded doesn't really make sense in the manpage backend
def convert_embedded node
result = [node.content]
- if node.footnotes? && !(node.attr? 'nofootnotes')
- result << '.SH "NOTES"'
- result.concat(node.footnotes.map {|fn| %(#{fn.index}. #{fn.text}) })
- end
+ append_footnotes result, node
# QUESTION should we add an AUTHOR(S) section?
result.join LF
end
@@ -140,11 +141,11 @@
macro = 'SS'
# QUESTION why captioned title? why not when level == 1?
stitle = node.captioned_title
else
macro = 'SH'
- stitle = node.title.upcase
+ stitle = uppercase_pcdata node.title
end
result << %(.#{macro} "#{manify stitle}"
#{node.content})
result.join LF
end
@@ -313,12 +314,13 @@
else
node.content
end
end
- # TODO use Page Control https://www.gnu.org/software/groff/manual/html_node/Page-Control.html#Page-Control
- alias convert_page_break skip
+ def convert_page_break node
+ '.bp'
+ end
def convert_paragraph node
if node.title?
%(.sp
.B #{manify node.title}
@@ -529,16 +531,17 @@
result << '.RE'
end
result.join LF
end
- # FIXME git uses [verse] for the synopsis; detect this special case
def convert_verse node
result = []
- result << (node.title? ? %(.sp
+ if node.title?
+ result << %(.sp
.B #{manify node.title}
-.br) : '.sp')
+.br)
+ end
attribution_line = (node.attr? 'citetitle') ? %(#{node.attr 'citetitle'} ) : nil
attribution_line = (node.attr? 'attribution') ? %[#{attribution_line}\\(em #{node.attr 'attribution'}] : nil
result << %(.sp
.nf
#{manify node.content, whitespace: :preserve}
@@ -607,11 +610,10 @@
def convert_inline_callout node
%(#{ESC_BS}fB(#{node.text})#{ESC_BS}fP)
end
- # TODO supposedly groff has footnotes, but we're in search of an example
def convert_inline_footnote node
if (index = node.attr 'index')
%([#{index}])
elsif node.type == :xref
%([#{node.text}])
@@ -645,23 +647,23 @@
else
%(#{ESC_BS}fI#{menu}#{ESC_BS}fP)
end
end
- # NOTE use fake <BOUNDARY> element to prevent creating artificial word boundaries
+ # NOTE use fake XML elements to prevent creating artificial word boundaries
def convert_inline_quoted node
case node.type
when :emphasis
- %(#{ESC_BS}fI<BOUNDARY>#{node.text}</BOUNDARY>#{ESC_BS}fP)
+ %(<#{ESC_BS}fI>#{node.text}</#{ESC_BS}fP>)
when :strong
- %(#{ESC_BS}fB<BOUNDARY>#{node.text}</BOUNDARY>#{ESC_BS}fP)
+ %(<#{ESC_BS}fB>#{node.text}</#{ESC_BS}fP>)
when :monospaced
- %[#{ESC_BS}f(CR<BOUNDARY>#{node.text}</BOUNDARY>#{ESC_BS}fP]
+ %[<#{ESC_BS}f(CR>#{node.text}</#{ESC_BS}fP>]
when :single
- %[#{ESC_BS}(oq<BOUNDARY>#{node.text}</BOUNDARY>#{ESC_BS}(cq]
+ %[<#{ESC_BS}(oq>#{node.text}</#{ESC_BS}(cq>]
when :double
- %[#{ESC_BS}(lq<BOUNDARY>#{node.text}</BOUNDARY>#{ESC_BS}(rq]
+ %[<#{ESC_BS}(lq>#{node.text}</#{ESC_BS}(rq>]
else
node.text
end
end
@@ -676,10 +678,26 @@
end
end
private
+ def append_footnotes result, node
+ if node.footnotes? && !(node.attr? 'nofootnotes')
+ result << '.SH "NOTES"'
+ node.footnotes.each_with_index do |fn, idx|
+ result << %(.IP [#{fn.index}])
+ # NOTE restore newline in escaped macro that gets removed by normalize_text in substitutor
+ if (text = fn.text).include? %(#{ESC}\\c #{ESC}.)
+ text = (manify %(#{text.gsub MalformedEscapedMacroRx, %(\\1#{LF}\\2)} ), whitespace: :normalize).chomp ' '
+ else
+ text = manify text, whitespace: :normalize
+ end
+ result << text
+ end
+ end
+ end
+
# Converts HTML entity references back to their original form, escapes
# special man characters and strips trailing whitespace.
#
# It's crucial that text only ever pass through manify once.
#
@@ -702,11 +720,11 @@
str = str.
gsub(LiteralBackslashRx) { $1 ? $& : '\\(rs' }. # literal backslash (not a troff escape sequence)
gsub(EllipsisCharRefRx, '...'). # horizontal ellipsis
gsub(LeadingPeriodRx, '\\\&.'). # leading . is used in troff for macro call or other formatting; replace with \&.
# drop orphaned \c escape lines, unescape troff macro, quote adjacent character, isolate macro line
- gsub(EscapedMacroRx) { (rest = $3.lstrip).empty? ? %(.#$1"#$2") : %(.#$1"#$2"#{LF}#{rest}) }.
+ gsub(EscapedMacroRx) { (rest = $3.lstrip).empty? ? %(.#$1"#$2") : %(.#$1"#{$2.rstrip}"#{LF}#{rest}) }.
gsub('-', '\-').
gsub('<', '<').
gsub('>', '>').
gsub(' ', '\~'). # non-breaking space
gsub('©', '\(co'). # copyright sign
@@ -724,14 +742,18 @@
gsub('⇐', '\(lA'). # leftwards double arrow
gsub('⇒', '\(rA'). # rightwards double arrow
gsub('​', '\:'). # zero width space
gsub('&', '&'). # literal ampersand (NOTE must take place after any other replacement that includes &)
gsub('\'', '\(aq'). # apostrophe-quote
- gsub(MockBoundaryRx, ''). # mock boundary
+ gsub(MockMacroRx, '\1'). # mock boundary
gsub(ESC_BS, '\\'). # unescape troff backslash (NOTE update if more escapes are added)
gsub(ESC_FS, '.'). # unescape full stop in troff commands (NOTE must take place after gsub(LeadingPeriodRx))
rstrip # strip trailing space
opts[:append_newline] ? %(#{str}#{LF}) : str
+ end
+
+ def uppercase_pcdata string
+ (XMLMarkupRx.match? string) ? string.gsub(PCDATAFilterRx) { $2 ? $2.upcase : $1 } : string.upcase
end
def enclose_content node
node.content_model == :compound ? node.content : %(.sp#{LF}#{manify node.content, whitespace: :normalize})
end