lib/asciidoctor-epub3/converter.rb in asciidoctor-epub3-1.5.0.alpha.6 vs lib/asciidoctor-epub3/converter.rb in asciidoctor-epub3-1.5.0.alpha.7
- old
+ new
@@ -2,13 +2,10 @@
require_relative 'spine_item_processor'
require_relative 'font_icon_map'
module Asciidoctor
module Epub3
-# tried 8288, but it didn't work in older readers
-WordJoiner = [65279].pack 'U*'
-WordJoinerRx = RUBY_ENGINE_JRUBY ? /\uFEFF/ : WordJoiner
# Public: The main converter for the epub3 backend that handles packaging the
# EPUB3 or KF8 publication file.
class Converter
include ::Asciidoctor::Converter
@@ -23,19 +20,27 @@
htmlsyntax 'xml'
@validate = false
@extract = false
end
- def convert spine_doc, name = nil
- @validate = true if spine_doc.attr? 'ebook-validate'
- @extract = true if spine_doc.attr? 'ebook-extract'
- Packager.new spine_doc, (spine_doc.references[:spine_items] || [spine_doc]), spine_doc.attributes['ebook-format'].to_sym
+ def convert node, name = nil
+ if (name ||= node.node_name) == 'document'
+ @validate = node.attr? 'ebook-validate'
+ @extract = node.attr? 'ebook-extract'
+ @compress = node.attr 'ebook-compress'
+ Packager.new node, (node.references[:spine_items] || [node]), node.attributes['ebook-format'].to_sym
+ # converting an element from the spine document, such as an inline node in the doctitle
+ elsif name.start_with? 'inline_'
+ (@content_converter ||= ::Asciidoctor::Converter::Factory.default.create('epub3-xhtml5')).convert node, name
+ else
+ raise ::ArgumentError, %(Encountered unexpected node in epub3 package converter: #{name})
+ end
end
# FIXME we have to package in write because we don't have access to target before this point
def write packager, target
- packager.package validate: @validate, extract: @extract, target: target
+ packager.package validate: @validate, extract: @extract, compress: @compress, target: target
nil
end
end
# Public: The converter for the epub3 backend that converts the individual
@@ -43,18 +48,18 @@
class ContentConverter
include ::Asciidoctor::Converter
register_for 'epub3-xhtml5'
- WordJoiner = Epub3::WordJoiner
- EOL = "\n"
+ EOL = %(\n)
NoBreakSpace = ' '
ThinNoBreakSpace = ' '
RightAngleQuote = '›'
+ CalloutStartNum = %(\u2460)
XmlElementRx = /<\/?.+?>/
- CharEntityRx = /&#(\d{2,5});/
+ CharEntityRx = /&#(\d{2,6});/
NamedEntityRx = /&([A-Z]+);/
UppercaseTagRx = /<(\/)?([A-Z]+)>/
FromHtmlSpecialCharsMap = {
'<' => '<',
@@ -78,48 +83,22 @@
def initialize backend, opts
super
basebackend 'html'
outfilesuffix '.xhtml'
htmlsyntax 'xml'
- @xrefs_used = ::Set.new
+ @xrefs_seen = ::Set.new
@icon_names = []
end
def convert node, name = nil
if respond_to?(name ||= node.node_name)
send name, node
else
- warn %(conversion missing in epub3 backend for #{name})
+ warn %(asciidoctor: WARNING: conversion missing in epub3 backend for #{name})
end
end
- # TODO aggregate authors of spine document into authors attribute(s) on main document
- def navigation_document node, spine
- doctitle_sanitized = (node.doctitle sanitize: true, use_fallback: true).gsub WordJoinerRx, ''
- lines = [%(<!DOCTYPE html>
-<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="#{lang = (node.attr 'lang', 'en')}" lang="#{lang}">
-<head>
-<meta charset="UTF-8"/>
-<title>#{doctitle_sanitized}</title>
-<link rel="stylesheet" type="text/css" href="styles/epub3.css"/>
-<link rel="stylesheet" type="text/css" href="styles/epub3-css3-only.css" media="(min-device-width: 0px)"/>
-</head>
-<body>
-<h1>#{doctitle_sanitized}</h1>
-<nav epub:type="toc" id="toc">
-<h2>#{node.attr 'toc-title'}</h2>
-<ol>)]
- spine.each do |item|
- lines << %(<li><a href="#{item.id || (item.attr 'docname')}.xhtml">#{(item.doctitle sanitize: true, use_fallback: true).gsub WordJoinerRx, ''}</a></li>)
- end
- lines << %(</ol>
-</nav>
-</body>
-</html>)
- lines * EOL
- end
-
def document node
docid = node.id
if (doctitle = node.doctitle partition: true, sanitize: true, use_fallback: true).subtitle?
title = doctitle.main
@@ -129,23 +108,26 @@
# HACK until we get proper handling of title-only in CSS
title = title_upper = ''
subtitle = doctitle.combined
end
- doctitle_sanitized = (node.doctitle sanitize: true, use_fallback: true).gsub WordJoinerRx, ''
- subtitle_formatted = subtitle.gsub(WordJoinerRx, '').split(' ').map {|w| %(<b>#{w}</b>) } * ' '
- # FIXME make this uppercase routine more intelligent, less fragile
+ doctitle_sanitized = doctitle.combined
+ subtitle_formatted = subtitle.split.map {|w| %(<b>#{w}</b>) } * ' '
+ # FIXME use uppercase pcdata helper to make less fragile (see logic in Asciidoctor PDF)
subtitle_formatted_upper = subtitle_formatted.upcase
.gsub(UppercaseTagRx) { %(<#{$1}#{$2.downcase}>) }
.gsub(NamedEntityRx) { %(&#{$1.downcase};) }
- author = node.attr 'author'
- username = node.attr 'username', 'default'
- # FIXME needs to resolve to the imagesdir of the spine document, not this document
- #imagesdir = (node.attr 'imagesdir', '.').chomp '/'
- #imagesdir = (imagesdir == '.' ? nil : %(#{imagesdir}/))
- imagesdir = 'images/'
+ if (node.attr 'publication-type', 'book') == 'book'
+ byline = nil
+ else
+ author = node.attr 'author'
+ username = node.attr 'username', 'default'
+ imagesdir = (node.references[:spine].attr 'imagesdir', '.').chomp '/'
+ imagesdir = (imagesdir == '.' ? nil : %(#{imagesdir}/))
+ byline = %(<p class="byline"><img src="#{imagesdir}avatars/#{username}.jpg"/> <b class="author">#{author}</b></p>#{EOL})
+ end
mark_last_paragraph node
content = node.content
# NOTE must run after content is resolved
@@ -174,27 +156,24 @@
<meta charset="UTF-8"/>
<title>#{doctitle_sanitized}</title>
<link rel="stylesheet" type="text/css" href="styles/epub3.css"/>
<link rel="stylesheet" type="text/css" href="styles/epub3-css3-only.css" media="(min-device-width: 0px)"/>
#{icon_css_head}<script type="text/javascript">
-document.addEventListener('DOMContentLoaded', function(event) {
- var epubReader = navigator.epubReadingSystem;
- if (!epubReader) {
- if (window.parent == window || !(epubReader = window.parent.navigator.epubReadingSystem)) {
- return;
- }
+document.addEventListener('DOMContentLoaded', function(event, reader) {
+ if (!(reader = navigator.epubReadingSystem)) {
+ if (navigator.userAgent.indexOf(' calibre/') >= 0) reader = { name: 'calibre-desktop' };
+ else if (window.parent == window || !(reader = window.parent.navigator.epubReadingSystem)) return;
}
- document.body.setAttribute('class', epubReader.name.toLowerCase().replace(/ /g, '-'));
+ document.body.setAttribute('class', reader.name.toLowerCase().replace(/ /g, '-'));
});
</script>
</head>
<body>
<section class="chapter" title="#{doctitle_sanitized.gsub '"', '"'}" epub:type="chapter" id="#{docid}">
#{icon_css_scoped}<header>
<div class="chapter-header">
-<p class="byline"><img src="#{imagesdir}avatars/#{username}.jpg"/> <b class="author">#{author}</b></p>
-<h1 class="chapter-title">#{title_upper}#{subtitle ? %[ <small class="subtitle">#{subtitle_formatted_upper}</small>] : nil}</h1>
+#{byline}<h1 class="chapter-title">#{title_upper}#{subtitle ? %[ <small class="subtitle">#{subtitle_formatted_upper}</small>] : nil}</h1>
</div>
</header>
#{content})]
if node.footnotes?
@@ -217,10 +196,15 @@
</html>'
lines * EOL
end
+ # NOTE embedded is used for AsciiDoc table cell content
+ def embedded node
+ node.content
+ end
+
def section node
hlevel = node.level + 1
epub_type_attr = node.special ? %( epub:type="#{node.sectname}") : nil
div_classes = [%(sect#{node.level}), node.role].compact
title = node.title
@@ -246,13 +230,20 @@
else
node.content
end
end
- # QUESTION use convert_content?
def open node
- node.content
+ id_attr = node.id ? %( id="#{node.id}") : nil
+ class_attr = node.role ? %( class="#{node.role}") : nil
+ if id_attr || class_attr
+ %(<div#{id_attr}#{class_attr}>
+#{convert_content node}
+</div>)
+ else
+ convert_content node
+ end
end
def abstract node
%(<div class="abstract" epub:type="preamble">
#{convert_content node}
@@ -261,15 +252,14 @@
def paragraph node
role = node.role
# stack-head is the alternative to the default, inline-head (where inline means "run-in")
head_stop = node.attr 'head-stop', (role && (node.has_role? 'stack-head') ? nil : '.')
+ # FIXME promote regexp to constant
head = node.title? ? %(<strong class="head">#{title = node.title}#{head_stop && title !~ /[[:punct:]]$/ ? head_stop : nil}</strong> ) : nil
if role
- if node.has_role? 'signature'
- node.set_option 'hardbreaks'
- end
+ node.set_option 'hardbreaks' if node.has_role? 'signature'
%(<p class="#{role}">#{head}#{node.content}</p>)
else
%(<p>#{head}#{node.content}</p>)
end
end
@@ -328,19 +318,15 @@
end
def listing node
figure_classes = ['listing']
figure_classes << 'coalesce' if node.option? 'unbreakable'
- pre_classes = if node.style == 'source'
- ['source', %(language-#{node.attr 'language'})]
- else
- ['screen']
- end
+ pre_classes = node.style == 'source' ? ['source', %(language-#{node.attr 'language'})] : ['screen']
title_div = node.title? ? %(<figcaption>#{node.captioned_title}</figcaption>
) : nil
# patches conums to fix extra or missing leading space
- # TODO apply this patch upstream to Asciidoctor
+ # TODO remove patch once upgrading to Asciidoctor 1.5.6
%(<figure class="#{figure_classes * ' '}">
#{title_div}<pre class="#{pre_classes * ' '}"><code>#{(node.content || '').gsub(/(?<! )<i class="conum"| +<i class="conum"/, ' <i class="conum"')}</code></pre>
</figure>)
end
@@ -357,15 +343,15 @@
'<hr class="thematicbreak"/>'
end
def quote node
footer_content = []
- if attribution = (node.attr 'attribution')
- footer_content << attribution
+ if (attribution = node.attr 'attribution')
+ footer_content << attribution
end
- if citetitle = (node.attr 'citetitle')
+ if (citetitle = node.attr 'citetitle')
citetitle_sanitized = xml_sanitize citetitle
footer_content << %(<cite title="#{citetitle_sanitized}">#{citetitle}</cite>)
end
if node.title?
@@ -384,15 +370,15 @@
</div>)
end
def verse node
footer_content = []
- if attribution = (node.attr 'attribution')
- footer_content << attribution
+ if (attribution = node.attr 'attribution')
+ footer_content << attribution
end
- if citetitle = (node.attr 'citetitle')
+ if (citetitle = node.attr 'citetitle')
citetitle_sanitized = xml_sanitize citetitle
footer_content << %(<cite title="#{citetitle_sanitized}">#{citetitle}</cite>)
end
footer_tag = footer_content.size > 0 ? %(
@@ -407,10 +393,11 @@
if node.title?
classes << 'titled'
title = node.title
title_sanitized = xml_sanitize title
title_attr = %( title="#{title_sanitized}")
+ # FIXME use uppercase pcdata helper to make less fragile (see logic in Asciidoctor PDF)
title_upper = title.upcase.gsub(NamedEntityRx) { %(&#{$1.downcase};) }
title_el = %(<h2>#{title_upper}</h2>
)
else
title_attr = nil
@@ -442,14 +429,14 @@
if (role = node.role)
table_classes << role
end
table_class_attr = %( class="#{table_classes * ' '}")
table_styles = []
- unless node.option? 'autowidth'
- table_styles << %(width: #{node.attr 'tablepcwidth'}%;)
+ unless (node.option? 'autowidth') && !(node.attr? 'width', nil, false)
+ table_styles << %(width: #{node.attr 'tablepcwidth'}%)
end
- table_style_attr = table_styles.size > 0 ? %( style="#{table_styles * ' '}") : nil
+ table_style_attr = table_styles.size > 0 ? %( style="#{table_styles * '; '}") : nil
lines << %(<table#{table_id_attr}#{table_class_attr}#{table_style_attr}>)
lines << %(<caption>#{node.captioned_title}</caption>) if node.title?
if (node.attr 'rowcount') > 0
lines << '<colgroup>'
@@ -458,11 +445,11 @@
node.columns.size.times do
lines << tag
end
#else
# node.columns.each do |col|
- # lines << %(<col style="width: #{col.attr 'colpcwidth'}%;"/>)
+ # lines << %(<col style="width: #{col.attr 'colpcwidth'}%"/>)
# end
#end
lines << '</colgroup>'
[:head, :foot, :body].select {|tsec| !node.rows[tsec].empty? }.each do |tsec|
lines << %(<t#{tsec}>)
@@ -472,11 +459,11 @@
if tsec == :head
cell_content = cell.text
else
case cell.style
when :asciidoc
- cell_content = %(<div>#{cell.content}</div>)
+ cell_content = %(<div class="embed">#{cell.content}</div>)
when :verse
cell_content = %(<div class="verse">#{cell.text}</div>)
when :literal
cell_content = %(<div class="literal"><pre>#{cell.text}</pre></div>)
else
@@ -496,11 +483,11 @@
cell_classes << 'valign-top'
end
cell_class_attr = cell_classes.size > 0 ? %( class="#{cell_classes * ' '}") : nil
cell_colspan_attr = cell.colspan ? %( colspan="#{cell.colspan}") : nil
cell_rowspan_attr = cell.rowspan ? %( rowspan="#{cell.rowspan}") : nil
- cell_style_attr = (node.document.attr? 'cellbgcolor') ? %( style="background-color: #{node.document.attr 'cellbgcolor'};") : nil
+ cell_style_attr = (node.document.attr? 'cellbgcolor') ? %( style="background-color: #{node.document.attr 'cellbgcolor'}") : nil
lines << %(<#{cell_tag_name}#{cell_class_attr}#{cell_colspan_attr}#{cell_rowspan_attr}#{cell_style_attr}>#{cell_content}</#{cell_tag_name}>)
end
lines << '</tr>'
end
lines << %(</t#{tsec}>)
@@ -513,11 +500,11 @@
end
def colist node
lines = ['<div class="callout-list">
<ol>']
- num = "\u2460"
+ num = CalloutStartNum
node.items.each_with_index do |item, i|
lines << %(<li><i class="conum" data-value="#{i + 1}">#{num}</i> #{item.text}</li>)
num = num.next
end
lines << '</ol>
@@ -543,11 +530,11 @@
subject_plain = xml_sanitize subject, :plain
subject_element = %(<strong class="subject">#{subject}#{subject_stop && subject_plain !~ /[[:punct:]]$/ ? subject_stop : nil}</strong>)
lines << '<li>'
if dd
# NOTE: must wrap remaining text in a span to help webkit justify the text properly
- lines << %(<span class="principal">#{subject_element}#{dd.text? ? %[ <span class="supporting">#{dd.text}</span>] : nil}</span>)
+ lines << %(<span class="principal">#{subject_element}#{dd.text? ? %[ <span class="supporting">#{dd.text}</span>] : nil}</span>)
lines << dd.content if dd.blocks?
else
lines << %(<span class="principal">#{subject_element}</span>)
end
lines << '</li>'
@@ -609,11 +596,10 @@
end
def ulist node
complex = false
div_classes = ['itemized-list', node.style, node.role].compact
- # TODO could strip WordJoiner if brief since not using justify
ul_classes = [node.style, ((node.option? 'brief') ? 'brief' : nil)].compact
ul_class_attr = ul_classes.empty? ? nil : %( class="#{ul_classes * ' '}")
id_attribute = node.id ? %( id="#{node.id}") : nil
lines = [%(<div#{id_attribute} class="#{div_classes * ' '}">)]
lines << %(<h3 class="list-heading">#{node.title}</h3>) if node.title?
@@ -640,20 +626,20 @@
target = node.attr 'target'
type = (::File.extname target)[1..-1]
img_attrs = [%(alt="#{node.attr 'alt'}")]
case type
when 'svg'
- img_attrs << %(style="width: #{node.attr 'scaledwidth', '100%'};")
+ img_attrs << %(style="width: #{node.attr 'scaledwidth', '100%'}")
# TODO make this a convenience method on document
epub_properties = (node.document.attr 'epub-properties') || []
unless epub_properties.include? 'svg'
epub_properties << 'svg'
node.document.attributes['epub-properties'] = epub_properties
end
else
if node.attr? 'scaledwidth'
- img_attrs << %(style="width: #{node.attr 'scaledwidth'};")
+ img_attrs << %(style="width: #{node.attr 'scaledwidth'}")
end
end
=begin
# NOTE to set actual width and height, use CSS width and height
if type == 'svg'
@@ -662,11 +648,11 @@
# Kindle
#elsif node.attr? 'scaledheight'
# img_attrs << %(width="#{node.attr 'scaledheight'}" height="#{node.attr 'scaledheight'}")
# ePub3
elsif node.attr? 'scaledheight'
- img_attrs << %(height="#{node.attr 'scaledheight'}" style="max-height: #{node.attr 'scaledheight'} !important;")
+ img_attrs << %(height="#{node.attr 'scaledheight'}" style="max-height: #{node.attr 'scaledheight'} !important")
else
# Aldiko doesn't not scale width to 100% by default
img_attrs << %(width="100%")
end
end
@@ -680,39 +666,75 @@
end
def inline_anchor node
target = node.target
case node.type
- when :xref
- refid = (node.attr 'refid') || target
- id_attr = unless @xrefs_used.include? refid
- @xrefs_used << refid
- %( id="xref-#{refid}")
+ when :xref # TODO would be helpful to know what type the target is (e.g., bibref)
+ doc, refid, text, path = node.document, ((node.attr 'refid') || target), node.text, (node.attr 'path')
+ # NOTE if path is non-nil, we have an inter-document xref
+ # QUESTION should we drop the id attribute for an inter-document xref?
+ if path
+ # ex. chapter-id#section-id
+ if node.attr 'fragment'
+ refdoc_id, refdoc_refid = refid.split '#', 2
+ if refdoc_id == refdoc_refid
+ target = target[0...(target.index '#')]
+ id_attr = %( id="xref--#{refdoc_id}")
+ else
+ id_attr = %( id="xref--#{refdoc_id}--#{refdoc_refid}")
+ end
+ # ex. chapter-id#
+ else
+ refdoc_id = refdoc_refid = refid
+ # inflate key to spine item root (e.g., transform chapter-id to chapter-id#chapter-id)
+ refid = %(#{refid}##{refid})
+ id_attr = %( id="xref--#{refdoc_id}")
+ end
+ id_attr = nil unless @xrefs_seen.add? refid
+ refdoc = doc.references[:spine_items].find {|it| refdoc_id == (it.id || (it.attr 'docname')) }
+ if refdoc
+ if (reftext = refdoc.references[:ids][refdoc_refid])
+ text ||= reftext
+ else
+ warn %(asciidoctor: WARNING: #{::File.basename(doc.attr 'docfile')}: invalid reference to unknown anchor in #{refdoc_id} chapter: #{refdoc_refid})
+ end
+ else
+ warn %(asciidoctor: WARNING: #{::File.basename(doc.attr 'docfile')}: invalid reference to anchor in unknown chapter: #{refdoc_id})
+ end
+ else
+ id_attr = (@xrefs_seen.add? refid) ? %( id="xref-#{refid}") : nil
+ if (reftext = doc.references[:ids][refid])
+ text ||= reftext
+ else
+ # FIXME we get false negatives for reference to bibref
+ warn %(asciidoctor: WARNING: #{::File.basename(doc.attr 'docfile')}: invalid reference to unknown local anchor (or valid bibref): #{refid})
+ end
end
- # FIXME seems like text should be prepared already
- # FIXME would be nice to know what type the target is (e.g., bibref)
- text = node.text || (node.document.references[:ids][refid] || %([#{refid}]))
- %(<a#{id_attr} href="#{target}" class="xref">#{text}</a>#{WordJoiner})
+ %(<a#{id_attr} href="#{target}" class="xref">#{text || "[#{refid}]"}</a>)
when :ref
%(<a id="#{target}"></a>)
when :link
- %(<a href="#{target}" class="link">#{node.text}</a>#{WordJoiner})
+ %(<a href="#{target}" class="link">#{node.text}</a>)
when :bibref
- %(<a id="#{target}" href="#xref-#{target}">[#{target}]</a>#{WordJoiner})
+ if @xrefs_seen.include? target
+ %(<a id="#{target}" href="#xref-#{target}">[#{target}]</a>)
+ else
+ %(<a id="#{target}"></a>[#{target}])
+ end
end
end
def inline_break node
%(#{node.text}<br/>)
end
def inline_button node
- %(<b class="button">[<span class="label">#{node.text}</span>]</b>#{WordJoiner})
+ %(<b class="button">[<span class="label">#{node.text}</span>]</b>)
end
def inline_callout node
- num = "\u2460"
+ num = CalloutStartNum
int_num = node.text.to_i
(int_num - 1).times { num = num.next }
%(<i class="conum" data-value="#{int_num}">#{num}</i>)
end
@@ -723,11 +745,11 @@
%(<mark class="noteref" title="Unresolved note reference">#{node.text}</mark>)
end
end
def inline_image node
- if (type = node.type) == 'icon'
+ if node.type == 'icon'
@icon_names << (icon_name = node.target)
i_classes = ['icon', %(i-#{icon_name})]
i_classes << %(icon-#{node.attr 'size'}) if node.attr? 'size'
i_classes << %(icon-flip-#{(node.attr 'flip')[0]}) if node.attr? 'flip'
i_classes << %(icon-rotate-#{node.attr 'rotate'}) if node.attr? 'rotate'
@@ -746,11 +768,11 @@
def inline_kbd node
if (keys = node.attr 'keys').size == 1
%(<kbd>#{keys[0]}</kbd>)
else
- key_combo = keys.map {|key| %(<kbd>#{key}</kbd>+) }.join.chop
+ key_combo = keys.map {|key| %(<kbd>#{key}</kbd>) }.join '+'
%(<span class="keyseq">#{key_combo}</span>)
end
end
def inline_menu node
@@ -768,44 +790,42 @@
end
def inline_quoted node
case node.type
when :strong
- %(<strong>#{node.text}</strong>#{WordJoiner})
+ %(<strong>#{node.text}</strong>)
when :emphasis
- %(<em>#{node.text}</em>#{WordJoiner})
+ %(<em>#{node.text}</em>)
when :monospaced
- %(<code class="literal">#{node.text}</code>#{WordJoiner})
+ %(<code class="literal">#{node.text}</code>)
when :double
#%(“#{node.text}”)
%(“#{node.text}”)
when :single
#%(‘#{node.text}’)
%(‘#{node.text}’)
when :superscript
- %(<sup>#{node.text}</sup>#{WordJoiner})
+ %(<sup>#{node.text}</sup>)
when :subscript
- %(<sub>#{node.text}</sub>#{WordJoiner})
+ %(<sub>#{node.text}</sub>)
else
node.text
end
end
def convert_content node
- if node.content_model == :simple
- %(<p>#{node.content}</p>)
- else
- node.content
- end
+ node.content_model == :simple ? %(<p>#{node.content}</p>) : node.content
end
+ # FIXME merge into with xml_sanitize helper
def xml_sanitize value, target = :attribute
- sanitized = (value.include? '<') ? value.gsub(XmlElementRx, '').tr_s(' ', ' ').strip : value
+ sanitized = (value.include? '<') ? value.gsub(XmlElementRx, '').strip.tr_s(' ', ' ') : value
if target == :plain && (sanitized.include? ';')
- sanitized = sanitized.gsub(CharEntityRx) { [$1.to_i].pack('U*') }.gsub(FromHtmlSpecialCharsRx, FromHtmlSpecialCharsMap)
+ sanitized = sanitized.gsub(CharEntityRx) { [$1.to_i].pack 'U*' } if sanitized.include? '&#'
+ sanitized = sanitized.gsub(FromHtmlSpecialCharsRx, FromHtmlSpecialCharsMap)
elsif target == :attribute
- sanitized = sanitized.gsub(WordJoinerRx, '').gsub('"', '"')
+ sanitized = sanitized.gsub '"', '"' if sanitized.include? '"'
end
sanitized
end
# TODO make check for last content paragraph a feature of Asciidoctor
@@ -820,21 +840,62 @@
nil
end
end
class DocumentIdGenerator
+ ReservedIds = %w(cover nav ncx)
+ CharRefRx = /&(?:([a-zA-Z]{2,})|#(\d{2,6})|#x([a-fA-F0-9]{2,5}));/
+ if defined? __dir__
+ InvalidIdCharsRx = /[^\p{Word}]+/
+ LeadingDigitRx = /^\p{Nd}/
+ else
+ InvalidIdCharsRx = /[^[:word:]]+/
+ LeadingDigitRx = /^[[:digit:]]/
+ end
class << self
- def generate_id doc
+ def generate_id doc, pre = nil, sep = nil
+ synthetic = false
unless (id = doc.id)
- id = if doc.header?
- doc.doctitle(sanitize: true).gsub(WordJoinerRx, '').downcase.delete(':').tr_s(' ', '-').tr_s('-', '-')
+ # NOTE we assume pre is a valid ID prefix and that pre and sep only contain valid ID chars
+ pre ||= '_'
+ sep = sep ? sep.chr : '_'
+ if doc.header?
+ id = doc.doctitle sanitize: true
+ id = id.gsub CharRefRx do
+ $1 ? ($1 == 'amp' ? 'and' : sep) : ((d = $2 ? $2.to_i : $3.hex) == 8217 ? '' : ([d].pack 'U*'))
+ end if id.include? '&'
+ id = id.downcase.gsub InvalidIdCharsRx, sep
+ if id.empty?
+ id, synthetic = nil, true
+ else
+ unless sep.empty?
+ if (id = id.tr_s sep, sep).end_with? sep
+ if id == sep
+ id, synthetic = nil, true
+ else
+ id = (id.start_with? sep) ? id[1..-2] : id.chop
+ end
+ elsif id.start_with? sep
+ id = id[1..-1]
+ end
+ end
+ unless synthetic
+ if pre.empty?
+ id = %(_#{id}) if LeadingDigitRx =~ id
+ elsif !(id.start_with? pre)
+ id = %(#{pre}#{id})
+ end
+ end
+ end
elsif (first_section = doc.first_section)
- first_section.id
+ id = first_section.id
else
- %(document-#{doc.object_id})
+ synthetic = true
end
+ id = %(#{pre}document#{sep}#{doc.object_id}) if synthetic
end
+ warn %(asciidoctor: ERROR: chapter uses a reserved ID: #{id}) if !synthetic && (ReservedIds.include? id)
id
end
end
end
@@ -852,17 +913,23 @@
end
case (ebook_format = document.attributes['ebook-format'])
when 'epub3', 'kf8'
# all good
when 'mobi'
- document.attributes['ebook-format'] = 'kf8'
+ ebook_format = document.attributes['ebook-format'] = 'kf8'
else
+ # QUESTION should we display a warning?
ebook_format = document.attributes['ebook-format'] = 'epub3'
end
document.attributes[%(ebook-format-#{ebook_format})] = ''
# Only fire SpineItemProcessor for top-level include directives
include_processor SpineItemProcessor.new(document)
- treeprocessor { process {|doc| doc.id = DocumentIdGenerator.generate_id doc } }
+ treeprocessor do
+ process do |doc|
+ doc.id = DocumentIdGenerator.generate_id doc, (doc.attr 'idprefix'), (doc.attr 'idseparator')
+ nil
+ end
+ end
end
end
end
end