lib/asciidoctor/converter/manpage.rb in asciidoctor-2.0.12 vs lib/asciidoctor/converter/manpage.rb in asciidoctor-2.0.13

- old
+ new

@@ -1,11 +1,15 @@ # frozen_string_literal: true module Asciidoctor # A built-in {Converter} implementation that generates the man page (troff) format. # -# The output follows the groff man page definition while also trying to be -# consistent with the output produced by the a2x tool from AsciiDoc Python. +# The output of this converter adheres to the man definition as defined by +# groff and uses the manpage output of the DocBook toolchain as a foundation. +# That means if you've previously been generating man pages using the a2x tool +# from AsciiDoc Python, you should be able to achieve a very similar result +# using this converter. Though you'll also get to enjoy some notable +# enhancements that have been added since, such as the customizable linkstyle. # # See http://www.gnu.org/software/groff/manual/html_node/Man-usage.html#Man-usage class Converter::ManPageConverter < Converter::Base register_for 'manpage' @@ -16,14 +20,17 @@ ESC_FS = %(#{ESC}.) # escaped full stop (indicates troff macro) LiteralBackslashRx = /\A\\|(#{ESC})?\\/ LeadingPeriodRx = /^\./ EscapedMacroRx = /^(?:#{ESC}\\c\n)?#{ESC}\.((?:URL|MTO) "#{CC_ANY}*?" "#{CC_ANY}*?" )( |[^\s]*)(#{CC_ANY}*?)(?: *#{ESC}\\c)?$/ - MockBoundaryRx = /<\/?BOUNDARY>/ + MalformedEscapedMacroRx = /(#{ESC}\\c) (#{ESC}\.(?:URL|MTO) )/ + MockMacroRx = /<\/?(#{ESC}\\[^>]+)>/ EmDashCharRefRx = /&#8212;(?:&#8203;)?/ EllipsisCharRefRx = /&#8230;(?:&#8203;)?/ WrappedIndentRx = /#{CG_BLANK}*#{LF}#{CG_BLANK}*/ + XMLMarkupRx = /&#?[a-z\d]+;|</ + PCDATAFilterRx = /(&#?[a-z\d]+;|<[^>]+>)|([^&<]+)/ def initialize backend, opts = {} @backend = backend init_backend_traits basebackend: 'manpage', filetype: 'man', outfilesuffix: '.man', supports_templates: true end @@ -89,21 +96,18 @@ unless node.noheader if node.attr? 'manpurpose' mannames = node.attr 'mannames', [manname] result << %(.SH "#{(node.attr 'manname-title', 'NAME').upcase}" -#{mannames.map {|n| manify n }.join ', '} \\- #{manify node.attr('manpurpose'), whitespace: :normalize}) +#{mannames.map {|n| (manify n).gsub '\-', '-' }.join ', '} \\- #{manify node.attr('manpurpose'), whitespace: :normalize}) end end result << node.content # QUESTION should NOTES come after AUTHOR(S)? - if node.footnotes? && !(node.attr? 'nofootnotes') - result << '.SH "NOTES"' - result.concat(node.footnotes.map {|fn| %(#{fn.index}. #{fn.text}) }) - end + append_footnotes result, node unless (authors = node.authors).empty? if authors.size > 1 result << '.SH "AUTHORS"' authors.each do |author| @@ -122,14 +126,11 @@ # NOTE embedded doesn't really make sense in the manpage backend def convert_embedded node result = [node.content] - if node.footnotes? && !(node.attr? 'nofootnotes') - result << '.SH "NOTES"' - result.concat(node.footnotes.map {|fn| %(#{fn.index}. #{fn.text}) }) - end + append_footnotes result, node # QUESTION should we add an AUTHOR(S) section? result.join LF end @@ -140,11 +141,11 @@ macro = 'SS' # QUESTION why captioned title? why not when level == 1? stitle = node.captioned_title else macro = 'SH' - stitle = node.title.upcase + stitle = uppercase_pcdata node.title end result << %(.#{macro} "#{manify stitle}" #{node.content}) result.join LF end @@ -313,12 +314,13 @@ else node.content end end - # TODO use Page Control https://www.gnu.org/software/groff/manual/html_node/Page-Control.html#Page-Control - alias convert_page_break skip + def convert_page_break node + '.bp' + end def convert_paragraph node if node.title? %(.sp .B #{manify node.title} @@ -529,16 +531,17 @@ result << '.RE' end result.join LF end - # FIXME git uses [verse] for the synopsis; detect this special case def convert_verse node result = [] - result << (node.title? ? %(.sp + if node.title? + result << %(.sp .B #{manify node.title} -.br) : '.sp') +.br) + end attribution_line = (node.attr? 'citetitle') ? %(#{node.attr 'citetitle'} ) : nil attribution_line = (node.attr? 'attribution') ? %[#{attribution_line}\\(em #{node.attr 'attribution'}] : nil result << %(.sp .nf #{manify node.content, whitespace: :preserve} @@ -607,11 +610,10 @@ def convert_inline_callout node %(#{ESC_BS}fB(#{node.text})#{ESC_BS}fP) end - # TODO supposedly groff has footnotes, but we're in search of an example def convert_inline_footnote node if (index = node.attr 'index') %([#{index}]) elsif node.type == :xref %([#{node.text}]) @@ -645,23 +647,23 @@ else %(#{ESC_BS}fI#{menu}#{ESC_BS}fP) end end - # NOTE use fake <BOUNDARY> element to prevent creating artificial word boundaries + # NOTE use fake XML elements to prevent creating artificial word boundaries def convert_inline_quoted node case node.type when :emphasis - %(#{ESC_BS}fI<BOUNDARY>#{node.text}</BOUNDARY>#{ESC_BS}fP) + %(<#{ESC_BS}fI>#{node.text}</#{ESC_BS}fP>) when :strong - %(#{ESC_BS}fB<BOUNDARY>#{node.text}</BOUNDARY>#{ESC_BS}fP) + %(<#{ESC_BS}fB>#{node.text}</#{ESC_BS}fP>) when :monospaced - %[#{ESC_BS}f(CR<BOUNDARY>#{node.text}</BOUNDARY>#{ESC_BS}fP] + %[<#{ESC_BS}f(CR>#{node.text}</#{ESC_BS}fP>] when :single - %[#{ESC_BS}(oq<BOUNDARY>#{node.text}</BOUNDARY>#{ESC_BS}(cq] + %[<#{ESC_BS}(oq>#{node.text}</#{ESC_BS}(cq>] when :double - %[#{ESC_BS}(lq<BOUNDARY>#{node.text}</BOUNDARY>#{ESC_BS}(rq] + %[<#{ESC_BS}(lq>#{node.text}</#{ESC_BS}(rq>] else node.text end end @@ -676,10 +678,26 @@ end end private + def append_footnotes result, node + if node.footnotes? && !(node.attr? 'nofootnotes') + result << '.SH "NOTES"' + node.footnotes.each_with_index do |fn, idx| + result << %(.IP [#{fn.index}]) + # NOTE restore newline in escaped macro that gets removed by normalize_text in substitutor + if (text = fn.text).include? %(#{ESC}\\c #{ESC}.) + text = (manify %(#{text.gsub MalformedEscapedMacroRx, %(\\1#{LF}\\2)} ), whitespace: :normalize).chomp ' ' + else + text = manify text, whitespace: :normalize + end + result << text + end + end + end + # Converts HTML entity references back to their original form, escapes # special man characters and strips trailing whitespace. # # It's crucial that text only ever pass through manify once. # @@ -702,11 +720,11 @@ str = str. gsub(LiteralBackslashRx) { $1 ? $& : '\\(rs' }. # literal backslash (not a troff escape sequence) gsub(EllipsisCharRefRx, '...'). # horizontal ellipsis gsub(LeadingPeriodRx, '\\\&.'). # leading . is used in troff for macro call or other formatting; replace with \&. # drop orphaned \c escape lines, unescape troff macro, quote adjacent character, isolate macro line - gsub(EscapedMacroRx) { (rest = $3.lstrip).empty? ? %(.#$1"#$2") : %(.#$1"#$2"#{LF}#{rest}) }. + gsub(EscapedMacroRx) { (rest = $3.lstrip).empty? ? %(.#$1"#$2") : %(.#$1"#{$2.rstrip}"#{LF}#{rest}) }. gsub('-', '\-'). gsub('&lt;', '<'). gsub('&gt;', '>'). gsub('&#160;', '\~'). # non-breaking space gsub('&#169;', '\(co'). # copyright sign @@ -724,14 +742,18 @@ gsub('&#8656;', '\(lA'). # leftwards double arrow gsub('&#8658;', '\(rA'). # rightwards double arrow gsub('&#8203;', '\:'). # zero width space gsub('&amp;', '&'). # literal ampersand (NOTE must take place after any other replacement that includes &) gsub('\'', '\(aq'). # apostrophe-quote - gsub(MockBoundaryRx, ''). # mock boundary + gsub(MockMacroRx, '\1'). # mock boundary gsub(ESC_BS, '\\'). # unescape troff backslash (NOTE update if more escapes are added) gsub(ESC_FS, '.'). # unescape full stop in troff commands (NOTE must take place after gsub(LeadingPeriodRx)) rstrip # strip trailing space opts[:append_newline] ? %(#{str}#{LF}) : str + end + + def uppercase_pcdata string + (XMLMarkupRx.match? string) ? string.gsub(PCDATAFilterRx) { $2 ? $2.upcase : $1 } : string.upcase end def enclose_content node node.content_model == :compound ? node.content : %(.sp#{LF}#{manify node.content, whitespace: :normalize}) end