converter.rb in asciidoctor-epub3-1.5.0.alpha.7

- old
+ new

@@ -2,13 +2,10 @@
 require_relative 'spine_item_processor'
 require_relative 'font_icon_map'
 
 module Asciidoctor
 module Epub3
-# tried 8288, but it didn't work in older readers
-WordJoiner = [65279].pack 'U*'
-WordJoinerRx = RUBY_ENGINE_JRUBY ? /\uFEFF/ : WordJoiner
 
 # Public: The main converter for the epub3 backend that handles packaging the
 # EPUB3 or KF8 publication file.
 class Converter
   include ::Asciidoctor::Converter
@@ -23,19 +20,27 @@
     htmlsyntax 'xml'
     @validate = false
     @extract = false
   end
 
-  def convert spine_doc, name = nil
-    @validate = true if spine_doc.attr? 'ebook-validate'
-    @extract = true if spine_doc.attr? 'ebook-extract'
-    Packager.new spine_doc, (spine_doc.references[:spine_items] || [spine_doc]), spine_doc.attributes['ebook-format'].to_sym
+  def convert node, name = nil
+    if (name ||= node.node_name) == 'document'
+      @validate = node.attr? 'ebook-validate'
+      @extract = node.attr? 'ebook-extract'
+      @compress = node.attr 'ebook-compress'
+      Packager.new node, (node.references[:spine_items] || [node]), node.attributes['ebook-format'].to_sym
+    # converting an element from the spine document, such as an inline node in the doctitle
+    elsif name.start_with? 'inline_'
+      (@content_converter ||= ::Asciidoctor::Converter::Factory.default.create('epub3-xhtml5')).convert node, name
+    else
+      raise ::ArgumentError, %(Encountered unexpected node in epub3 package converter: #{name})
+    end
   end
 
   # FIXME we have to package in write because we don't have access to target before this point
   def write packager, target
-    packager.package validate: @validate, extract: @extract, target: target
+    packager.package validate: @validate, extract: @extract, compress: @compress, target: target
     nil
   end
 end
 
 # Public: The converter for the epub3 backend that converts the individual
@@ -43,18 +48,18 @@
 class ContentConverter
   include ::Asciidoctor::Converter
 
   register_for 'epub3-xhtml5'
 
-  WordJoiner = Epub3::WordJoiner
-  EOL = "\n"
+  EOL = %(\n)
   NoBreakSpace = '&#xa0;'
   ThinNoBreakSpace = '&#x202f;'
   RightAngleQuote = '&#x203a;'
+  CalloutStartNum = %(\u2460)
 
   XmlElementRx = /<\/?.+?>/
-  CharEntityRx = /&#(\d{2,5});/
+  CharEntityRx = /&#(\d{2,6});/
   NamedEntityRx = /&([A-Z]+);/
   UppercaseTagRx = /<(\/)?([A-Z]+)>/
 
   FromHtmlSpecialCharsMap = {
     '&lt;' => '<',
@@ -78,48 +83,22 @@
   def initialize backend, opts
     super
     basebackend 'html'
     outfilesuffix '.xhtml'
     htmlsyntax 'xml'
-    @xrefs_used = ::Set.new
+    @xrefs_seen = ::Set.new
     @icon_names = []
   end
 
   def convert node, name = nil
     if respond_to?(name ||= node.node_name)
       send name, node
     else
-      warn %(conversion missing in epub3 backend for #{name})
+      warn %(asciidoctor: WARNING: conversion missing in epub3 backend for #{name})
     end
   end
 
-  # TODO aggregate authors of spine document into authors attribute(s) on main document
-  def navigation_document node, spine
-    doctitle_sanitized = (node.doctitle sanitize: true, use_fallback: true).gsub WordJoinerRx, ''
-    lines = [%(<!DOCTYPE html>
-<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="#{lang = (node.attr 'lang', 'en')}" lang="#{lang}">
-<head>
-<meta charset="UTF-8"/>
-<title>#{doctitle_sanitized}</title>
-<link rel="stylesheet" type="text/css" href="styles/epub3.css"/>
-<link rel="stylesheet" type="text/css" href="styles/epub3-css3-only.css" media="(min-device-width: 0px)"/>
-</head>
-<body>
-<h1>#{doctitle_sanitized}</h1>
-<nav epub:type="toc" id="toc">
-<h2>#{node.attr 'toc-title'}</h2>
-<ol>)]
-    spine.each do |item|
-      lines << %(<li><a href="#{item.id || (item.attr 'docname')}.xhtml">#{(item.doctitle sanitize: true, use_fallback: true).gsub WordJoinerRx, ''}</a></li>)
-    end
-    lines << %(</ol>
-</nav>
-</body>
-</html>)
-    lines * EOL
-  end
-
   def document node
     docid = node.id
 
     if (doctitle = node.doctitle partition: true, sanitize: true, use_fallback: true).subtitle?
       title = doctitle.main
@@ -129,23 +108,26 @@
       # HACK until we get proper handling of title-only in CSS
       title = title_upper = ''
       subtitle = doctitle.combined
     end
 
-    doctitle_sanitized = (node.doctitle sanitize: true, use_fallback: true).gsub WordJoinerRx, ''
-    subtitle_formatted = subtitle.gsub(WordJoinerRx, '').split(' ').map {|w| %(<b>#{w}</b>) } * ' '
-    # FIXME make this uppercase routine more intelligent, less fragile
+    doctitle_sanitized = doctitle.combined
+    subtitle_formatted = subtitle.split.map {|w| %(<b>#{w}</b>) } * ' '
+    # FIXME use uppercase pcdata helper to make less fragile (see logic in Asciidoctor PDF)
     subtitle_formatted_upper = subtitle_formatted.upcase
         .gsub(UppercaseTagRx) { %(<#{$1}#{$2.downcase}>) }
         .gsub(NamedEntityRx) { %(&#{$1.downcase};) }
 
-    author = node.attr 'author'
-    username = node.attr 'username', 'default'
-    # FIXME needs to resolve to the imagesdir of the spine document, not this document
-    #imagesdir = (node.attr 'imagesdir', '.').chomp '/'
-    #imagesdir = (imagesdir == '.' ? nil : %(#{imagesdir}/))
-    imagesdir = 'images/'
+    if (node.attr 'publication-type', 'book') == 'book'
+      byline = nil
+    else
+      author = node.attr 'author'
+      username = node.attr 'username', 'default'
+      imagesdir = (node.references[:spine].attr 'imagesdir', '.').chomp '/'
+      imagesdir = (imagesdir == '.' ? nil : %(#{imagesdir}/))
+      byline = %(<p class="byline"><img src="#{imagesdir}avatars/#{username}.jpg"/> <b class="author">#{author}</b></p>#{EOL})
+    end
 
     mark_last_paragraph node
     content = node.content
 
     # NOTE must run after content is resolved
@@ -174,27 +156,24 @@
 <meta charset="UTF-8"/>
 <title>#{doctitle_sanitized}</title>
 <link rel="stylesheet" type="text/css" href="styles/epub3.css"/>
 <link rel="stylesheet" type="text/css" href="styles/epub3-css3-only.css" media="(min-device-width: 0px)"/>
 #{icon_css_head}<script type="text/javascript">
-document.addEventListener('DOMContentLoaded', function(event) {
-  var epubReader = navigator.epubReadingSystem;
-  if (!epubReader) {
-    if (window.parent == window || !(epubReader = window.parent.navigator.epubReadingSystem)) {
-      return;
-    }
+document.addEventListener('DOMContentLoaded', function(event, reader) {
+  if (!(reader = navigator.epubReadingSystem)) {
+    if (navigator.userAgent.indexOf(' calibre/') >= 0) reader = { name: 'calibre-desktop' };
+    else if (window.parent == window || !(reader = window.parent.navigator.epubReadingSystem)) return;
   }
-  document.body.setAttribute('class', epubReader.name.toLowerCase().replace(/ /g, '-'));
+  document.body.setAttribute('class', reader.name.toLowerCase().replace(/ /g, '-'));
 });
 </script>
 </head>
 <body>
 <section class="chapter" title="#{doctitle_sanitized.gsub '"', '&quot;'}" epub:type="chapter" id="#{docid}">
 #{icon_css_scoped}<header>
 <div class="chapter-header">
-<p class="byline"><img src="#{imagesdir}avatars/#{username}.jpg"/> <b class="author">#{author}</b></p>
-<h1 class="chapter-title">#{title_upper}#{subtitle ? %[ <small class="subtitle">#{subtitle_formatted_upper}</small>] : nil}</h1>
+#{byline}<h1 class="chapter-title">#{title_upper}#{subtitle ? %[ <small class="subtitle">#{subtitle_formatted_upper}</small>] : nil}</h1>
 </div>
 </header>
 #{content})]
 
     if node.footnotes?
@@ -217,10 +196,15 @@
 </html>'
 
     lines * EOL
   end
 
+  # NOTE embedded is used for AsciiDoc table cell content
+  def embedded node
+    node.content
+  end
+
   def section node
     hlevel = node.level + 1
     epub_type_attr = node.special ? %( epub:type="#{node.sectname}") : nil
     div_classes = [%(sect#{node.level}), node.role].compact
     title = node.title
@@ -246,13 +230,20 @@
     else
       node.content
     end
   end
 
-  # QUESTION use convert_content?
   def open node
-    node.content
+    id_attr = node.id ? %( id="#{node.id}") : nil
+    class_attr = node.role ? %( class="#{node.role}") : nil
+    if id_attr || class_attr
+      %(<div#{id_attr}#{class_attr}>
+#{convert_content node}
+</div>)
+    else
+      convert_content node
+    end
   end
 
   def abstract node
     %(<div class="abstract" epub:type="preamble">
 #{convert_content node}
@@ -261,15 +252,14 @@
 
   def paragraph node
     role = node.role
     # stack-head is the alternative to the default, inline-head (where inline means "run-in")
     head_stop = node.attr 'head-stop', (role && (node.has_role? 'stack-head') ? nil : '.')
+    # FIXME promote regexp to constant
     head = node.title? ? %(<strong class="head">#{title = node.title}#{head_stop && title !~ /[[:punct:]]$/ ? head_stop : nil}</strong> ) : nil
     if role
-      if node.has_role? 'signature'
-        node.set_option 'hardbreaks'
-      end
+      node.set_option 'hardbreaks' if node.has_role? 'signature'
       %(<p class="#{role}">#{head}#{node.content}</p>)
     else
       %(<p>#{head}#{node.content}</p>)
     end
   end
@@ -328,19 +318,15 @@
   end
 
   def listing node
     figure_classes = ['listing']
     figure_classes << 'coalesce' if node.option? 'unbreakable'
-    pre_classes = if node.style == 'source'
-      ['source', %(language-#{node.attr 'language'})]
-    else
-      ['screen']
-    end
+    pre_classes = node.style == 'source' ? ['source', %(language-#{node.attr 'language'})] : ['screen']
     title_div = node.title? ? %(<figcaption>#{node.captioned_title}</figcaption>
 ) : nil
     # patches conums to fix extra or missing leading space
-    # TODO apply this patch upstream to Asciidoctor
+    # TODO remove patch once upgrading to Asciidoctor 1.5.6
     %(<figure class="#{figure_classes * ' '}">
 #{title_div}<pre class="#{pre_classes * ' '}"><code>#{(node.content || '').gsub(/(?<! )<i class="conum"| +<i class="conum"/, ' <i class="conum"')}</code></pre>
 </figure>)
   end
 
@@ -357,15 +343,15 @@
     '<hr class="thematicbreak"/>'
   end
 
   def quote node
     footer_content = []
-    if attribution = (node.attr 'attribution')
-      footer_content << attribution  
+    if (attribution = node.attr 'attribution')
+      footer_content << attribution
     end
 
-    if citetitle = (node.attr 'citetitle')
+    if (citetitle = node.attr 'citetitle')
       citetitle_sanitized = xml_sanitize citetitle
       footer_content << %(<cite title="#{citetitle_sanitized}">#{citetitle}</cite>)
     end
 
     if node.title?
@@ -384,15 +370,15 @@
 </div>)
   end
 
   def verse node
     footer_content = []
-    if attribution = (node.attr 'attribution')
-      footer_content << attribution  
+    if (attribution = node.attr 'attribution')
+      footer_content << attribution
     end
 
-    if citetitle = (node.attr 'citetitle')
+    if (citetitle = node.attr 'citetitle')
       citetitle_sanitized = xml_sanitize citetitle
       footer_content << %(<cite title="#{citetitle_sanitized}">#{citetitle}</cite>)
     end
 
     footer_tag = footer_content.size > 0 ? %(
@@ -407,10 +393,11 @@
     if node.title?
       classes << 'titled'
       title = node.title
       title_sanitized = xml_sanitize title
       title_attr = %( title="#{title_sanitized}")
+      # FIXME use uppercase pcdata helper to make less fragile (see logic in Asciidoctor PDF)
       title_upper = title.upcase.gsub(NamedEntityRx) { %(&#{$1.downcase};) }
       title_el = %(<h2>#{title_upper}</h2>
 )
     else
       title_attr = nil
@@ -442,14 +429,14 @@
     if (role = node.role)
       table_classes << role
     end
     table_class_attr = %( class="#{table_classes * ' '}")
     table_styles = []
-    unless node.option? 'autowidth'
-      table_styles << %(width: #{node.attr 'tablepcwidth'}%;)
+    unless (node.option? 'autowidth') && !(node.attr? 'width', nil, false)
+      table_styles << %(width: #{node.attr 'tablepcwidth'}%)
     end
-    table_style_attr = table_styles.size > 0 ? %( style="#{table_styles * ' '}") : nil
+    table_style_attr = table_styles.size > 0 ? %( style="#{table_styles * '; '}") : nil
 
     lines << %(<table#{table_id_attr}#{table_class_attr}#{table_style_attr}>)
     lines << %(<caption>#{node.captioned_title}</caption>) if node.title?
     if (node.attr 'rowcount') > 0
       lines << '<colgroup>'
@@ -458,11 +445,11 @@
         node.columns.size.times do
           lines << tag
         end
       #else
       #  node.columns.each do |col|
-      #    lines << %(<col style="width: #{col.attr 'colpcwidth'}%;"/>)
+      #    lines << %(<col style="width: #{col.attr 'colpcwidth'}%"/>)
       #  end
       #end
       lines << '</colgroup>'
       [:head, :foot, :body].select {|tsec| !node.rows[tsec].empty? }.each do |tsec|
         lines << %(<t#{tsec}>)
@@ -472,11 +459,11 @@
             if tsec == :head
               cell_content = cell.text
             else
               case cell.style
               when :asciidoc
-                cell_content = %(<div>#{cell.content}</div>)
+                cell_content = %(<div class="embed">#{cell.content}</div>)
               when :verse
                 cell_content = %(<div class="verse">#{cell.text}</div>)
               when :literal
                 cell_content = %(<div class="literal"><pre>#{cell.text}</pre></div>)
               else
@@ -496,11 +483,11 @@
               cell_classes << 'valign-top'
             end
             cell_class_attr = cell_classes.size > 0 ? %( class="#{cell_classes * ' '}") : nil
             cell_colspan_attr = cell.colspan ? %( colspan="#{cell.colspan}") : nil
             cell_rowspan_attr = cell.rowspan ? %( rowspan="#{cell.rowspan}") : nil
-            cell_style_attr = (node.document.attr? 'cellbgcolor') ? %( style="background-color: #{node.document.attr 'cellbgcolor'};") : nil
+            cell_style_attr = (node.document.attr? 'cellbgcolor') ? %( style="background-color: #{node.document.attr 'cellbgcolor'}") : nil
             lines << %(<#{cell_tag_name}#{cell_class_attr}#{cell_colspan_attr}#{cell_rowspan_attr}#{cell_style_attr}>#{cell_content}</#{cell_tag_name}>)
           end
           lines << '</tr>'
         end
         lines << %(</t#{tsec}>)
@@ -513,11 +500,11 @@
   end
 
   def colist node
     lines = ['<div class="callout-list">
 <ol>']
-    num = "\u2460"
+    num = CalloutStartNum
     node.items.each_with_index do |item, i|
       lines << %(<li><i class="conum" data-value="#{i + 1}">#{num}</i> #{item.text}</li>)
       num = num.next
     end
     lines << '</ol>
@@ -543,11 +530,11 @@
         subject_plain = xml_sanitize subject, :plain
         subject_element = %(<strong class="subject">#{subject}#{subject_stop && subject_plain !~ /[[:punct:]]$/ ? subject_stop : nil}</strong>)
         lines << '<li>'
         if dd
           # NOTE: must wrap remaining text in a span to help webkit justify the text properly
-          lines << %(<span class="principal">#{subject_element}#{dd.text? ? %[ <span class="supporting">#{dd.text}</span>] : nil}</span>) 
+          lines << %(<span class="principal">#{subject_element}#{dd.text? ? %[ <span class="supporting">#{dd.text}</span>] : nil}</span>)
           lines << dd.content if dd.blocks?
         else
           lines << %(<span class="principal">#{subject_element}</span>)
         end
         lines << '</li>'
@@ -609,11 +596,10 @@
   end
 
   def ulist node
     complex = false
     div_classes = ['itemized-list', node.style, node.role].compact
-    # TODO could strip WordJoiner if brief since not using justify
     ul_classes = [node.style, ((node.option? 'brief') ? 'brief' : nil)].compact
     ul_class_attr = ul_classes.empty? ? nil : %( class="#{ul_classes * ' '}")
     id_attribute = node.id ? %( id="#{node.id}") : nil
     lines = [%(<div#{id_attribute} class="#{div_classes * ' '}">)]
     lines << %(<h3 class="list-heading">#{node.title}</h3>) if node.title?
@@ -640,20 +626,20 @@
     target = node.attr 'target'
     type = (::File.extname target)[1..-1]
     img_attrs = [%(alt="#{node.attr 'alt'}")]
     case type
     when 'svg'
-      img_attrs << %(style="width: #{node.attr 'scaledwidth', '100%'};")
+      img_attrs << %(style="width: #{node.attr 'scaledwidth', '100%'}")
       # TODO make this a convenience method on document
       epub_properties = (node.document.attr 'epub-properties') || []
       unless epub_properties.include? 'svg'
         epub_properties << 'svg'
         node.document.attributes['epub-properties'] = epub_properties
       end
     else
       if node.attr? 'scaledwidth'
-        img_attrs << %(style="width: #{node.attr 'scaledwidth'};")
+        img_attrs << %(style="width: #{node.attr 'scaledwidth'}")
       end
     end
 =begin
     # NOTE to set actual width and height, use CSS width and height
     if type == 'svg'
@@ -662,11 +648,11 @@
       # Kindle
       #elsif node.attr? 'scaledheight'
       #  img_attrs << %(width="#{node.attr 'scaledheight'}" height="#{node.attr 'scaledheight'}")
       # ePub3
       elsif node.attr? 'scaledheight'
-        img_attrs << %(height="#{node.attr 'scaledheight'}" style="max-height: #{node.attr 'scaledheight'} !important;")
+        img_attrs << %(height="#{node.attr 'scaledheight'}" style="max-height: #{node.attr 'scaledheight'} !important")
       else
         # Aldiko doesn't not scale width to 100% by default
         img_attrs << %(width="100%")
       end
     end
@@ -680,39 +666,75 @@
   end
 
   def inline_anchor node
     target = node.target
     case node.type
-    when :xref
-      refid = (node.attr 'refid') || target
-      id_attr = unless @xrefs_used.include? refid
-        @xrefs_used << refid
-        %( id="xref-#{refid}")
+    when :xref # TODO would be helpful to know what type the target is (e.g., bibref)
+      doc, refid, text, path = node.document, ((node.attr 'refid') || target), node.text, (node.attr 'path')
+      # NOTE if path is non-nil, we have an inter-document xref
+      # QUESTION should we drop the id attribute for an inter-document xref?
+      if path
+        # ex. chapter-id#section-id
+        if node.attr 'fragment'
+          refdoc_id, refdoc_refid = refid.split '#', 2
+          if refdoc_id == refdoc_refid
+            target = target[0...(target.index '#')]
+            id_attr = %( id="xref--#{refdoc_id}")
+          else
+            id_attr = %( id="xref--#{refdoc_id}--#{refdoc_refid}")
+          end
+        # ex. chapter-id#
+        else
+          refdoc_id = refdoc_refid = refid
+          # inflate key to spine item root (e.g., transform chapter-id to chapter-id#chapter-id)
+          refid = %(#{refid}##{refid})
+          id_attr = %( id="xref--#{refdoc_id}")
+        end
+        id_attr = nil unless @xrefs_seen.add? refid
+        refdoc = doc.references[:spine_items].find {|it| refdoc_id == (it.id || (it.attr 'docname')) }
+        if refdoc
+          if (reftext = refdoc.references[:ids][refdoc_refid])
+            text ||= reftext
+          else
+            warn %(asciidoctor: WARNING: #{::File.basename(doc.attr 'docfile')}: invalid reference to unknown anchor in #{refdoc_id} chapter: #{refdoc_refid})
+          end
+        else
+          warn %(asciidoctor: WARNING: #{::File.basename(doc.attr 'docfile')}: invalid reference to anchor in unknown chapter: #{refdoc_id})
+        end
+      else
+        id_attr = (@xrefs_seen.add? refid) ? %( id="xref-#{refid}") : nil
+        if (reftext = doc.references[:ids][refid])
+          text ||= reftext
+        else
+          # FIXME we get false negatives for reference to bibref
+          warn %(asciidoctor: WARNING: #{::File.basename(doc.attr 'docfile')}: invalid reference to unknown local anchor (or valid bibref): #{refid})
+        end
       end
-      # FIXME seems like text should be prepared already
-      # FIXME would be nice to know what type the target is (e.g., bibref)
-      text = node.text || (node.document.references[:ids][refid] || %([#{refid}]))
-      %(<a#{id_attr} href="#{target}" class="xref">#{text}</a>#{WordJoiner})
+      %(<a#{id_attr} href="#{target}" class="xref">#{text || "[#{refid}]"}</a>)
     when :ref
       %(<a id="#{target}"></a>)
     when :link
-      %(<a href="#{target}" class="link">#{node.text}</a>#{WordJoiner})
+      %(<a href="#{target}" class="link">#{node.text}</a>)
     when :bibref
-      %(<a id="#{target}" href="#xref-#{target}">[#{target}]</a>#{WordJoiner})
+      if @xrefs_seen.include? target
+        %(<a id="#{target}" href="#xref-#{target}">[#{target}]</a>)
+      else
+        %(<a id="#{target}"></a>[#{target}])
+      end
     end
   end
 
   def inline_break node
     %(#{node.text}<br/>)
   end
 
   def inline_button node
-    %(<b class="button">[<span class="label">#{node.text}</span>]</b>#{WordJoiner})
+    %(<b class="button">[<span class="label">#{node.text}</span>]</b>)
   end
 
   def inline_callout node
-    num = "\u2460"
+    num = CalloutStartNum
     int_num = node.text.to_i
     (int_num - 1).times { num = num.next }
     %(<i class="conum" data-value="#{int_num}">#{num}</i>)
   end
 
@@ -723,11 +745,11 @@
       %(<mark class="noteref" title="Unresolved note reference">#{node.text}</mark>)
     end
   end
 
   def inline_image node
-    if (type = node.type) == 'icon'
+    if node.type == 'icon'
       @icon_names << (icon_name = node.target)
       i_classes = ['icon', %(i-#{icon_name})]
       i_classes << %(icon-#{node.attr 'size'}) if node.attr? 'size'
       i_classes << %(icon-flip-#{(node.attr 'flip')[0]}) if node.attr? 'flip'
       i_classes << %(icon-rotate-#{node.attr 'rotate'}) if node.attr? 'rotate'
@@ -746,11 +768,11 @@
 
   def inline_kbd node
     if (keys = node.attr 'keys').size == 1
       %(<kbd>#{keys[0]}</kbd>)
     else
-      key_combo = keys.map {|key| %(<kbd>#{key}</kbd>+) }.join.chop
+      key_combo = keys.map {|key| %(<kbd>#{key}</kbd>) }.join '+'
       %(<span class="keyseq">#{key_combo}</span>)
     end
   end
 
   def inline_menu node
@@ -768,44 +790,42 @@
   end
 
   def inline_quoted node
     case node.type
     when :strong
-      %(<strong>#{node.text}</strong>#{WordJoiner})
+      %(<strong>#{node.text}</strong>)
     when :emphasis
-      %(<em>#{node.text}</em>#{WordJoiner})
+      %(<em>#{node.text}</em>)
     when :monospaced
-      %(<code class="literal">#{node.text}</code>#{WordJoiner})
+      %(<code class="literal">#{node.text}</code>)
     when :double
       #%(&#x201c;#{node.text}&#x201d;)
       %(“#{node.text}”)
     when :single
       #%(&#x2018;#{node.text}&#x2019;)
       %(‘#{node.text}’)
     when :superscript
-      %(<sup>#{node.text}</sup>#{WordJoiner})
+      %(<sup>#{node.text}</sup>)
     when :subscript
-      %(<sub>#{node.text}</sub>#{WordJoiner})
+      %(<sub>#{node.text}</sub>)
     else
       node.text
     end
   end
 
   def convert_content node
-    if node.content_model == :simple
-      %(<p>#{node.content}</p>)
-    else
-      node.content
-    end
+    node.content_model == :simple ? %(<p>#{node.content}</p>) : node.content
   end
 
+  # FIXME merge into with xml_sanitize helper
   def xml_sanitize value, target = :attribute
-    sanitized = (value.include? '<') ? value.gsub(XmlElementRx, '').tr_s(' ', ' ').strip : value
+    sanitized = (value.include? '<') ? value.gsub(XmlElementRx, '').strip.tr_s(' ', ' ') : value
     if target == :plain && (sanitized.include? ';')
-      sanitized = sanitized.gsub(CharEntityRx) { [$1.to_i].pack('U*') }.gsub(FromHtmlSpecialCharsRx, FromHtmlSpecialCharsMap)
+      sanitized = sanitized.gsub(CharEntityRx) { [$1.to_i].pack 'U*' } if sanitized.include? '&#'
+      sanitized = sanitized.gsub(FromHtmlSpecialCharsRx, FromHtmlSpecialCharsMap)
     elsif target == :attribute
-      sanitized = sanitized.gsub(WordJoinerRx, '').gsub('"', '&quot;')
+      sanitized = sanitized.gsub '"', '&quot;' if sanitized.include? '"'
     end
     sanitized
   end
 
   # TODO make check for last content paragraph a feature of Asciidoctor
@@ -820,21 +840,62 @@
     nil
   end
 end
 
 class DocumentIdGenerator
+  ReservedIds = %w(cover nav ncx)
+  CharRefRx = /&(?:([a-zA-Z]{2,})|#(\d{2,6})|#x([a-fA-F0-9]{2,5}));/
+  if defined? __dir__
+    InvalidIdCharsRx = /[^\p{Word}]+/
+    LeadingDigitRx = /^\p{Nd}/
+  else
+    InvalidIdCharsRx = /[^[:word:]]+/
+    LeadingDigitRx = /^[[:digit:]]/
+  end
   class << self
-    def generate_id doc
+    def generate_id doc, pre = nil, sep = nil
+      synthetic = false
       unless (id = doc.id)
-        id = if doc.header?
-          doc.doctitle(sanitize: true).gsub(WordJoinerRx, '').downcase.delete(':').tr_s(' ', '-').tr_s('-', '-')
+        # NOTE we assume pre is a valid ID prefix and that pre and sep only contain valid ID chars
+        pre ||= '_'
+        sep = sep ? sep.chr : '_'
+        if doc.header?
+          id = doc.doctitle sanitize: true
+          id = id.gsub CharRefRx do
+            $1 ? ($1 == 'amp' ? 'and' : sep) : ((d = $2 ? $2.to_i : $3.hex) == 8217 ? '' : ([d].pack 'U*'))
+          end if id.include? '&'
+          id = id.downcase.gsub InvalidIdCharsRx, sep
+          if id.empty?
+            id, synthetic = nil, true
+          else
+            unless sep.empty?
+              if (id = id.tr_s sep, sep).end_with? sep
+                if id == sep
+                  id, synthetic = nil, true
+                else
+                  id = (id.start_with? sep) ? id[1..-2] : id.chop
+                end
+              elsif id.start_with? sep
+                id = id[1..-1]
+              end
+            end
+            unless synthetic
+              if pre.empty?
+                id = %(_#{id}) if LeadingDigitRx =~ id
+              elsif !(id.start_with? pre)
+                id = %(#{pre}#{id})
+              end
+            end
+          end
         elsif (first_section = doc.first_section)
-          first_section.id
+          id = first_section.id
         else
-          %(document-#{doc.object_id})
+          synthetic = true
         end
+        id = %(#{pre}document#{sep}#{doc.object_id}) if synthetic
       end
+      warn %(asciidoctor: ERROR: chapter uses a reserved ID: #{id}) if !synthetic && (ReservedIds.include? id)
       id
     end
   end
 end
 
@@ -852,17 +913,23 @@
     end
     case (ebook_format = document.attributes['ebook-format'])
     when 'epub3', 'kf8'
       # all good
     when 'mobi'
-      document.attributes['ebook-format'] = 'kf8'
+      ebook_format = document.attributes['ebook-format'] = 'kf8'
     else
+      # QUESTION should we display a warning?
       ebook_format = document.attributes['ebook-format'] = 'epub3'
     end
     document.attributes[%(ebook-format-#{ebook_format})] = ''
     # Only fire SpineItemProcessor for top-level include directives
     include_processor SpineItemProcessor.new(document)
-    treeprocessor { process {|doc| doc.id = DocumentIdGenerator.generate_id doc } }
+    treeprocessor do
+      process do |doc|
+        doc.id = DocumentIdGenerator.generate_id doc, (doc.attr 'idprefix'), (doc.attr 'idseparator')
+        nil
+      end
+    end
   end
 end
 end
 end