# encoding: utf-8
require_relative 'spine_item_processor'
require_relative 'font_icon_map'
module Asciidoctor
module Epub3
# Public: The main converter for the epub3 backend that handles packaging the
# EPUB3 or KF8 publication file.
class Converter
include ::Asciidoctor::Converter
include ::Asciidoctor::Writer
register_for 'epub3'
def initialize backend, opts
super
basebackend 'html'
outfilesuffix '.epub' # dummy outfilesuffix since it may be .mobi
htmlsyntax 'xml'
@validate = false
@extract = false
end
def convert node, name = nil
if (name ||= node.node_name) == 'document'
@validate = node.attr? 'ebook-validate'
@extract = node.attr? 'ebook-extract'
@compress = node.attr 'ebook-compress'
Packager.new node, (node.references[:spine_items] || [node]), node.attributes['ebook-format'].to_sym
# converting an element from the spine document, such as an inline node in the doctitle
elsif name.start_with? 'inline_'
(@content_converter ||= ::Asciidoctor::Converter::Factory.default.create('epub3-xhtml5')).convert node, name
else
raise ::ArgumentError, %(Encountered unexpected node in epub3 package converter: #{name})
end
end
# FIXME we have to package in write because we don't have access to target before this point
def write packager, target
packager.package validate: @validate, extract: @extract, compress: @compress, target: target
nil
end
end
# Public: The converter for the epub3 backend that converts the individual
# content documents in an EPUB3 publication.
class ContentConverter
include ::Asciidoctor::Converter
register_for 'epub3-xhtml5'
EOL = %(\n)
NoBreakSpace = ' '
ThinNoBreakSpace = ' '
RightAngleQuote = '›'
CalloutStartNum = %(\u2460)
XmlElementRx = /<\/?.+?>/
CharEntityRx = /(\d{2,6});/
FromHtmlSpecialCharsMap = {
'<' => '<',
'>' => '>',
'&' => '&'
}
FromHtmlSpecialCharsRx = /(?:#{FromHtmlSpecialCharsMap.keys * '|'})/
ToHtmlSpecialCharsMap = {
'&' => '&',
'<' => '<',
'>' => '>'
}
ToHtmlSpecialCharsRx = /[#{ToHtmlSpecialCharsMap.keys.join}]/
OpenParagraphTagRx = /^
/
CloseParagraphTagRx = /<\/p>$/
def initialize backend, opts
super
basebackend 'html'
outfilesuffix '.xhtml'
htmlsyntax 'xml'
@xrefs_seen = ::Set.new
@icon_names = []
end
def convert node, name = nil
if respond_to?(name ||= node.node_name)
send name, node
else
warn %(asciidoctor: WARNING: conversion missing in epub3 backend for #{name})
end
end
def document node
docid = node.id
if (doctitle = node.doctitle partition: true, use_fallback: true).subtitle?
title = %(#{doctitle.main} )
subtitle = doctitle.subtitle
else
# HACK until we get proper handling of title-only in CSS
title = ''
subtitle = doctitle.combined
end
doctitle_sanitized = (node.doctitle sanitize: true, use_fallback: true).to_s
subtitle_formatted = subtitle.split.map {|w| %(#{w}) } * ' '
if (node.attr 'publication-type', 'book') == 'book'
byline = nil
else
author = node.attr 'author'
username = node.attr 'username', 'default'
imagesdir = (node.references[:spine].attr 'imagesdir', '.').chomp '/'
imagesdir = (imagesdir == '.' ? nil : %(#{imagesdir}/))
byline = %(
#{author}
#{EOL})
end
mark_last_paragraph node
content = node.content
# NOTE must run after content is resolved
# TODO perhaps create dynamic CSS file?
if @icon_names.empty?
icon_css_head = icon_css_scoped = nil
else
icon_defs = @icon_names.map {|name|
%(.i-#{name}::before { content: "#{FontIconMap[name.tr('-', '_').to_sym]}"; })
} * EOL
icon_css_head = %(
)
# NOTE Namo Pubtree requires icon CSS to be repeated inside (or in a linked stylesheet); wrap in div to hide from Aldiko
icon_css_scoped = (node.attr? 'ebook-format', 'kf8') ? nil : %(
)
end
# NOTE kindlegen seems to mangle the element, so we wrap its content in a div
lines = [%(
#{doctitle_sanitized}
#{icon_css_head}
#{icon_css_scoped}
#{content})]
if node.footnotes?
# NOTE kindlegen seems to mangle the
'
lines * EOL
end
# NOTE embedded is used for AsciiDoc table cell content
def embedded node
node.content
end
def section node
hlevel = node.level + 1
epub_type_attr = node.special ? %( epub:type="#{node.sectname}") : nil
div_classes = [%(sect#{node.level}), node.role].compact
title = node.title
title_sanitized = xml_sanitize title
if node.document.header? || node.level != 1 || node != node.document.first_section
%(#{title}#{(content = node.content).empty? ? nil : %[
#{content}]}
)
else
# document has no level-0 heading and this heading serves as the document title
node.content
end
end
# TODO support use of quote block as abstract
def preamble node
if (first_block = node.blocks[0]) && first_block.style == 'abstract'
abstract first_block
# REVIEW should we treat the preamble as an abstract in general?
elsif first_block && node.blocks.size == 1
abstract first_block
else
node.content
end
end
def open node
id_attr = node.id ? %( id="#{node.id}") : nil
class_attr = node.role ? %( class="#{node.role}") : nil
if id_attr || class_attr
%(
#{convert_content node}
)
else
convert_content node
end
end
def abstract node
%(
#{convert_content node}
)
end
def paragraph node
role = node.role
# stack-head is the alternative to the default, inline-head (where inline means "run-in")
head_stop = node.attr 'head-stop', (role && (node.has_role? 'stack-head') ? nil : '.')
# FIXME promote regexp to constant
head = node.title? ? %(#{title = node.title}#{head_stop && title !~ /[[:punct:]]$/ ? head_stop : nil} ) : nil
if role
node.set_option 'hardbreaks' if node.has_role? 'signature'
%(
#{head}#{node.content}
)
else
%(
#{head}#{node.content}
)
end
end
def pass node
content = node.content
if content == ''
''
else
content
end
end
def admonition node
id_attr = node.id ? %( id="#{node.id}") : nil
if node.title?
title = node.title
title_sanitized = xml_sanitize title
title_attr = %( title="#{node.caption}: #{title_sanitized}")
title_el = %(
#{title}
)
else
title_attr = %( title="#{node.caption}")
title_el = nil
end
type = node.attr 'name'
epub_type = case type
when 'tip'
'help'
when 'note'
'note'
when 'important', 'warning', 'caution'
'warning'
end
%()
end
def example node
id_attr = node.id ? %( id="#{node.id}") : nil
title_div = node.title? ? %(
#{node.title}
) : nil
%(
#{title_div}
#{convert_content node}
)
end
def floating_title node
tag_name = %(h#{node.level + 1})
id_attribute = node.id ? %( id="#{node.id}") : nil
%(<#{tag_name}#{id_attribute} class="#{['discrete', node.role].compact * ' '}">#{node.title}#{tag_name}>)
end
def listing node
figure_classes = ['listing']
figure_classes << 'coalesce' if node.option? 'unbreakable'
pre_classes = node.style == 'source' ? ['source', %(language-#{node.attr 'language'})] : ['screen']
title_div = node.title? ? %(#{node.captioned_title}
) : nil
# patches conums to fix extra or missing leading space
# TODO remove patch once upgrading to Asciidoctor 1.5.6
%()
end
# QUESTION should we wrap the
in either
or
?
def literal node
%(
#{node.content}
)
end
def page_break node
''
end
def thematic_break node
''
end
def quote node
id_attr = %( id="#{node.id}") if node.id
class_attr = (role = node.role) ? %( class="blockquote #{role}") : ' class="blockquote"'
footer_content = []
if (attribution = node.attr 'attribution')
footer_content << attribution
end
if (citetitle = node.attr 'citetitle')
citetitle_sanitized = xml_sanitize citetitle
footer_content << %(#{citetitle})
end
if node.title?
footer_content << %(#{node.title})
end
footer_tag = footer_content.empty? ? nil : %(
)
content = (convert_content node).strip.
sub(OpenParagraphTagRx, '
']
num = CalloutStartNum
node.items.each_with_index do |item, i|
lines << %(
#{num} #{item.text}
)
num = num.next
end
lines << '
'
end
# TODO add complex class if list has nested blocks
def dlist node
lines = []
case (style = node.style)
when 'itemized', 'ordered'
list_tag_name = (style == 'itemized' ? 'ul' : 'ol')
role = node.role
subject_stop = node.attr 'subject-stop', (role && (node.has_role? 'stack') ? nil : ':')
# QUESTION should we just use itemized-list and ordered-list as the class here? or just list?
div_classes = [%(#{style}-list), role].compact
list_class_attr = (node.option? 'brief') ? ' class="brief"' : nil
lines << %(
<#{list_tag_name}#{list_class_attr}#{list_tag_name == 'ol' && (node.option? 'reversed') ? ' reversed="reversed"' : nil}>)
node.items.each do |subjects, dd|
# consists of one term (a subject) and supporting content
subject = [*subjects].first.text
subject_plain = xml_sanitize subject, :plain
subject_element = %(#{subject}#{subject_stop && subject_plain !~ /[[:punct:]]$/ ? subject_stop : nil})
lines << '
'
if dd
# NOTE: must wrap remaining text in a span to help webkit justify the text properly
lines << %(#{subject_element}#{dd.text? ? %[ #{dd.text}] : nil})
lines << dd.content if dd.blocks?
else
lines << %(#{subject_element})
end
lines << '
'
end
lines << %(#{list_tag_name}>
)
else
lines << '
'
node.items.each do |terms, dd|
[*terms].each do |dt|
lines << %(
#{dt.text}
)
end
if dd
lines << '
'
if dd.blocks?
lines << %(#{dd.text}) if dd.text?
lines << dd.content
else
lines << %(#{dd.text})
end
lines << '
'
end
if complex
div_classes << 'complex'
lines[0] = %(
)
end
lines << '
'
lines * EOL
end
def image node
target = node.attr 'target'
type = (::File.extname target)[1..-1]
id_attr = node.id ? %( id="#{node.id}") : ''
img_attrs = [%(alt="#{node.attr 'alt'}")]
case type
when 'svg'
img_attrs << %(style="width: #{node.attr 'scaledwidth', '100%'}")
# TODO make this a convenience method on document
epub_properties = (node.document.attributes['epub-properties'] ||= [])
epub_properties << 'svg' unless epub_properties.include? 'svg'
else
if node.attr? 'scaledwidth'
img_attrs << %(style="width: #{node.attr 'scaledwidth'}")
end
end
=begin
# NOTE to set actual width and height, use CSS width and height
if type == 'svg'
if node.attr? 'scaledwidth'
img_attrs << %(width="#{node.attr 'scaledwidth'}")
# Kindle
#elsif node.attr? 'scaledheight'
# img_attrs << %(width="#{node.attr 'scaledheight'}" height="#{node.attr 'scaledheight'}")
# ePub3
elsif node.attr? 'scaledheight'
img_attrs << %(height="#{node.attr 'scaledheight'}" style="max-height: #{node.attr 'scaledheight'} !important")
else
# Aldiko doesn't not scale width to 100% by default
img_attrs << %(width="100%")
end
end
=end
%()
end
def inline_anchor node
target = node.target
case node.type
when :xref # TODO would be helpful to know what type the target is (e.g., bibref)
doc, refid, text, path = node.document, ((node.attr 'refid') || target), node.text, (node.attr 'path')
# NOTE if path is non-nil, we have an inter-document xref
# QUESTION should we drop the id attribute for an inter-document xref?
if path
# ex. chapter-id#section-id
if node.attr 'fragment'
refdoc_id, refdoc_refid = refid.split '#', 2
if refdoc_id == refdoc_refid
target = target[0...(target.index '#')]
id_attr = %( id="xref--#{refdoc_id}")
else
id_attr = %( id="xref--#{refdoc_id}--#{refdoc_refid}")
end
# ex. chapter-id#
else
refdoc_id = refdoc_refid = refid
# inflate key to spine item root (e.g., transform chapter-id to chapter-id#chapter-id)
refid = %(#{refid}##{refid})
id_attr = %( id="xref--#{refdoc_id}")
end
id_attr = nil unless @xrefs_seen.add? refid
refdoc = doc.references[:spine_items].find {|it| refdoc_id == (it.id || (it.attr 'docname')) }
if refdoc
# QUESTION should we invoke xreftext for references in other documents?
if (refs = refdoc.references[:refs]) && ::Asciidoctor::Document === (ref = refs[refdoc_refid])
text ||= (ref.attr 'docreftext') || ref.doctitle
elsif (xreftext = refdoc.references[:ids][refdoc_refid])
text ||= xreftext
else
warn %(asciidoctor: WARNING: #{::File.basename(doc.attr 'docfile')}: invalid reference to unknown anchor in #{refdoc_id} chapter: #{refdoc_refid})
end
else
warn %(asciidoctor: WARNING: #{::File.basename(doc.attr 'docfile')}: invalid reference to anchor in unknown chapter: #{refdoc_id})
end
else
id_attr = (@xrefs_seen.add? refid) ? %( id="xref-#{refid}") : nil
if (refs = doc.references[:refs])
if ::Asciidoctor::AbstractNode === (ref = refs[refid])
xreftext = text || ref.xreftext((@xrefstyle ||= (doc.attr 'xrefstyle')))
end
else
xreftext = doc.references[:ids][refid]
end
if xreftext
text ||= xreftext
else
# FIXME we get false negatives for reference to bibref when using Asciidoctor < 1.5.6
warn %(asciidoctor: WARNING: #{::File.basename(doc.attr 'docfile')}: invalid reference to unknown local anchor (or valid bibref): #{refid})
end
end
%(#{text || "[#{refid}]"})
when :ref
%()
when :link
%(#{node.text})
when :bibref
if @xrefs_seen.include? target
%([#{target}])
else
%([#{target}])
end
end
end
def inline_break node
%(#{node.text} )
end
def inline_button node
%([#{node.text}])
end
def inline_callout node
num = CalloutStartNum
int_num = node.text.to_i
(int_num - 1).times { num = num.next }
%(#{num})
end
def inline_footnote node
if (index = node.attr 'index')
%([#{index}])
elsif node.type == :xref
%(#{node.text})
end
end
def inline_image node
if node.type == 'icon'
@icon_names << (icon_name = node.target)
i_classes = ['icon', %(i-#{icon_name})]
i_classes << %(icon-#{node.attr 'size'}) if node.attr? 'size'
i_classes << %(icon-flip-#{(node.attr 'flip')[0]}) if node.attr? 'flip'
i_classes << %(icon-rotate-#{node.attr 'rotate'}) if node.attr? 'rotate'
i_classes << node.role if node.role?
%()
else
target = node.image_uri node.target
img_attrs = [%(alt="#{node.attr 'alt'}"), %(class="inline#{node.role? ? " #{node.role}" : ''}")]
if target.end_with? '.svg'
img_attrs << %(style="width: #{node.attr 'scaledwidth', '100%'}")
# TODO make this a convenience method on document
epub_properties = (node.document.attributes['epub-properties'] ||= [])
epub_properties << 'svg' unless epub_properties.include? 'svg'
elsif node.attr? 'scaledwidth'
img_attrs << %(style="width: #{node.attr 'scaledwidth'}")
end
%()
end
end
def inline_indexterm node
node.type == :visible ? node.text : ''
end
def inline_kbd node
if (keys = node.attr 'keys').size == 1
%(#{keys[0]})
else
key_combo = keys.map {|key| %(#{key}) }.join '+'
%(#{key_combo})
end
end
def inline_menu node
menu = node.attr 'menu'
# NOTE we swap right angle quote with chevron right from FontAwesome using CSS
caret = %(#{NoBreakSpace}#{RightAngleQuote} )
if !(submenus = node.attr 'submenus').empty?
submenu_path = submenus.map {|submenu| %(#{submenu}#{caret}) }.join.chop
%(#{menu}#{caret}#{submenu_path} #{node.attr 'menuitem'})
elsif (menuitem = node.attr 'menuitem')
%(#{menu}#{caret}#{menuitem})
else
%(#{menu})
end
end
def inline_quoted node
case node.type
when :strong
%(#{node.text})
when :emphasis
%(#{node.text})
when :monospaced
%(#{node.text})
when :double
#%(“#{node.text}”)
%(“#{node.text}”)
when :single
#%(‘#{node.text}’)
%(‘#{node.text}’)
when :superscript
%(#{node.text})
when :subscript
%(#{node.text})
else
node.text
end
end
def convert_content node
node.content_model == :simple ? %(
#{node.content}
) : node.content
end
# FIXME merge into with xml_sanitize helper
def xml_sanitize value, target = :attribute
sanitized = (value.include? '<') ? value.gsub(XmlElementRx, '').strip.tr_s(' ', ' ') : value
if target == :plain && (sanitized.include? ';')
sanitized = sanitized.gsub(CharEntityRx) { [$1.to_i].pack 'U*' } if sanitized.include? ''
sanitized = sanitized.gsub(FromHtmlSpecialCharsRx, FromHtmlSpecialCharsMap)
elsif target == :attribute
sanitized = sanitized.gsub '"', '"' if sanitized.include? '"'
end
sanitized
end
# TODO make check for last content paragraph a feature of Asciidoctor
def mark_last_paragraph root
return unless (last_block = root.blocks[-1])
while last_block.context == :section && last_block.blocks?
last_block = last_block.blocks[-1]
end
if last_block.context == :paragraph
last_block.attributes['role'] = last_block.role? ? %(#{last_block.role} last) : 'last'
end
nil
end
# Prepend a space to the value if it's non-nil, otherwise return empty string.
def prepend_space value
value ? %( #{value}) : ''
end
end
class DocumentIdGenerator
ReservedIds = %w(cover nav ncx)
CharRefRx = /&(?:([a-zA-Z][a-zA-Z]+\d{0,2})|#(\d\d\d{0,4})|#x([\da-fA-F][\da-fA-F][\da-fA-F]{0,3}));/
if defined? __dir__
InvalidIdCharsRx = /[^\p{Word}]+/
LeadingDigitRx = /^\p{Nd}/
else
InvalidIdCharsRx = /[^[:word:]]+/
LeadingDigitRx = /^[[:digit:]]/
end
class << self
def generate_id doc, pre = nil, sep = nil
synthetic = false
unless (id = doc.id)
# NOTE we assume pre is a valid ID prefix and that pre and sep only contain valid ID chars
pre ||= '_'
sep = sep ? sep.chr : '_'
if doc.header?
id = doc.doctitle sanitize: true
id = id.gsub CharRefRx do
$1 ? ($1 == 'amp' ? 'and' : sep) : ((d = $2 ? $2.to_i : $3.hex) == 8217 ? '' : ([d].pack 'U*'))
end if id.include? '&'
id = id.downcase.gsub InvalidIdCharsRx, sep
if id.empty?
id, synthetic = nil, true
else
unless sep.empty?
if (id = id.tr_s sep, sep).end_with? sep
if id == sep
id, synthetic = nil, true
else
id = (id.start_with? sep) ? id[1..-2] : id.chop
end
elsif id.start_with? sep
id = id[1..-1]
end
end
unless synthetic
if pre.empty?
id = %(_#{id}) if LeadingDigitRx =~ id
elsif !(id.start_with? pre)
id = %(#{pre}#{id})
end
end
end
elsif (first_section = doc.first_section)
id = first_section.id
else
synthetic = true
end
id = %(#{pre}document#{sep}#{doc.object_id}) if synthetic
end
warn %(asciidoctor: ERROR: chapter uses a reserved ID: #{id}) if !synthetic && (ReservedIds.include? id)
id
end
end
end
require_relative 'packager'
Extensions.register do
if (document = @document).backend == 'epub3'
document.attributes['spine'] = ''
document.set_attribute 'listing-caption', 'Listing'
if !(defined? ::AsciidoctorJ) && (::Gem::try_activate 'pygments.rb')
if document.set_attribute 'source-highlighter', 'pygments'
document.set_attribute 'pygments-css', 'style'
document.set_attribute 'pygments-style', 'bw'
end
end
case (ebook_format = document.attributes['ebook-format'])
when 'epub3', 'kf8'
# all good
when 'mobi'
ebook_format = document.attributes['ebook-format'] = 'kf8'
else
# QUESTION should we display a warning?
ebook_format = document.attributes['ebook-format'] = 'epub3'
end
document.attributes[%(ebook-format-#{ebook_format})] = ''
# Only fire SpineItemProcessor for top-level include directives
include_processor SpineItemProcessor.new(document)
treeprocessor do
process do |doc|
doc.id = DocumentIdGenerator.generate_id doc, (doc.attr 'idprefix'), (doc.attr 'idseparator')
nil
end
end
end
end
end
end