# encoding: UTF-8 require_relative 'spine_item_processor' require_relative 'font_icon_map' module Asciidoctor module Epub3 # tried 8288, but it didn't work in older readers WordJoiner = [65279].pack 'U*' WordJoinerRx = RUBY_ENGINE_JRUBY ? /\uFEFF/ : WordJoiner # Public: The main converter for the epub3 backend that handles packaging the # EPUB3 or KF8 publication file. class Converter include ::Asciidoctor::Converter include ::Asciidoctor::Writer register_for 'epub3' def initialize backend, opts super basebackend 'html' outfilesuffix '.epub' # dummy outfilesuffix since it may be .mobi htmlsyntax 'xml' @validate = false @extract = false end def convert spine_doc, name = nil @validate = true if spine_doc.attr? 'ebook-validate' @extract = true if spine_doc.attr? 'ebook-extract' Packager.new spine_doc, (spine_doc.references[:spine_items] || [spine_doc]), spine_doc.attributes['ebook-format'].to_sym end # FIXME we have to package in write because we don't have access to target before this point def write packager, target packager.package validate: @validate, extract: @extract, target: target nil end end # Public: The converter for the epub3 backend that converts the individual # content documents in an EPUB3 publication. class ContentConverter include ::Asciidoctor::Converter register_for 'epub3-xhtml5' WordJoiner = Epub3::WordJoiner EOL = "\n" NoBreakSpace = ' ' ThinNoBreakSpace = ' ' RightAngleQuote = '›' XmlElementRx = /<\/?.+?>/ CharEntityRx = /&#(\d{2,5});/ NamedEntityRx = /&([A-Z]+);/ UppercaseTagRx = /<(\/)?([A-Z]+)>/ FromHtmlSpecialCharsMap = { '<' => '<', '>' => '>', '&' => '&' } FromHtmlSpecialCharsRx = /(?:#{FromHtmlSpecialCharsMap.keys * '|'})/ ToHtmlSpecialCharsMap = { '&' => '&', '<' => '<', '>' => '>' } ToHtmlSpecialCharsRx = /[#{ToHtmlSpecialCharsMap.keys.join}]/ OpenParagraphTagRx = /^

/ CloseParagraphTagRx = /<\/p>$/ def initialize backend, opts super basebackend 'html' outfilesuffix '.xhtml' htmlsyntax 'xml' @xrefs_used = ::Set.new @icon_names = [] end def convert node, name = nil if respond_to?(name ||= node.node_name) send name, node else warn %(conversion missing in epub3 backend for #{name}) end end # TODO aggregate authors of spine document into authors attribute(s) on main document def navigation_document node, spine doctitle_sanitized = (node.doctitle sanitize: true, use_fallback: true).gsub WordJoinerRx, '' lines = [%( #{doctitle_sanitized}

#{doctitle_sanitized}

) lines * EOL end def document node docid = node.id if (doctitle = node.doctitle partition: true, sanitize: true, use_fallback: true).subtitle? title = doctitle.main title_upper = title.upcase subtitle = doctitle.subtitle else # HACK until we get proper handling of title-only in CSS title = title_upper = '' subtitle = doctitle.combined end doctitle_sanitized = (node.doctitle sanitize: true, use_fallback: true).gsub WordJoinerRx, '' subtitle_formatted = subtitle.gsub(WordJoinerRx, '').split(' ').map {|w| %(#{w}) } * ' ' # FIXME make this uppercase routine more intelligent, less fragile subtitle_formatted_upper = subtitle_formatted.upcase .gsub(UppercaseTagRx) { %(<#{$1}#{$2.downcase}>) } .gsub(NamedEntityRx) { %(&#{$1.downcase};) } author = node.attr 'author' username = node.attr 'username', 'default' # FIXME needs to resolve to the imagesdir of the spine document, not this document #imagesdir = (node.attr 'imagesdir', '.').chomp '/' #imagesdir = (imagesdir == '.' ? nil : %(#{imagesdir}/)) imagesdir = 'images/' mark_last_paragraph node content = node.content # NOTE must run after content is resolved # TODO perhaps create dynamic CSS file? if @icon_names.empty? icon_css_head = icon_css_scoped = nil else icon_defs = @icon_names.map {|name| %(.i-#{name}::before { content: "#{FontIconMap[name.tr('-', '_').to_sym]}"; }) } * EOL icon_css_head = %( ) # NOTE Namo Pubtree requires icon CSS to be repeated inside (or in a linked stylesheet); wrap in div to hide from Aldiko icon_css_scoped = (node.attr? 'ebook-format', 'kf8') ? nil : %( ) end # NOTE kindlegen seems to mangle the
element, so we wrap its content in a div lines = [%( #{doctitle_sanitized} #{icon_css_head}
#{icon_css_scoped}

#{title_upper}#{subtitle ? %[ #{subtitle_formatted_upper}] : nil}

#{content})] if node.footnotes? # NOTE kindlegen seems to mangle the
element, so we wrap its content in a div lines << '
' end lines << '
' lines * EOL end def section node hlevel = node.level + 1 epub_type_attr = node.special ? %( epub:type="#{node.sectname}") : nil div_classes = [%(sect#{node.level}), node.role].compact title = node.title title_sanitized = xml_sanitize title if node.document.header? || node.level != 1 || node != node.document.first_section %(
#{title}#{(content = node.content).empty? ? nil : %[ #{content}]}
) else # document has no level-0 heading and this heading serves as the document title node.content end end # TODO support use of quote block as abstract def preamble node if (first_block = node.blocks[0]) && first_block.style == 'abstract' abstract first_block # REVIEW should we treat the preamble as an abstract in general? elsif first_block && node.blocks.size == 1 abstract first_block else node.content end end # QUESTION use convert_content? def open node node.content end def abstract node %(
#{convert_content node}
) end def paragraph node role = node.role # stack-head is the alternative to the default, inline-head (where inline means "run-in") head_stop = node.attr 'head-stop', (role && (node.has_role? 'stack-head') ? nil : '.') head = node.title? ? %(#{title = node.title}#{head_stop && title !~ /[[:punct:]]$/ ? head_stop : nil} ) : nil if role if node.has_role? 'signature' node.set_option 'hardbreaks' end %(

#{head}#{node.content}

) else %(

#{head}#{node.content}

) end end def pass node content = node.content if content == '' '
' else content end end def admonition node if node.title? title = node.title title_sanitized = xml_sanitize title title_attr = %( title="#{node.caption}: #{title_sanitized}") title_el = %(

#{title}

) else title_attr = %( title="#{node.caption}") title_el = nil end type = node.attr 'name' epub_type = case type when 'tip' 'help' when 'note' 'note' when 'important', 'warning', 'caution' 'warning' end %() end def example node title_div = node.title? ? %(
#{node.title}
) : nil %(
#{title_div}
#{convert_content node}
) end def floating_title node tag_name = %(h#{node.level + 1}) id_attribute = node.id ? %( id="#{node.id}") : nil %(<#{tag_name}#{id_attribute} class="#{['discrete', node.role].compact * ' '}">#{node.title}) end def listing node figure_classes = ['listing'] figure_classes << 'coalesce' if node.option? 'unbreakable' pre_classes = if node.style == 'source' ['source', %(language-#{node.attr 'language'})] else ['screen'] end title_div = node.title? ? %(
#{node.captioned_title}
) : nil # patches conums to fix extra or missing leading space # TODO apply this patch upstream to Asciidoctor %(
#{title_div}
#{(node.content || '').gsub(/(?
) end # QUESTION should we wrap the
 in either 
or
? def literal node %(
#{node.content}
) end def page_break node '
' end def thematic_break node '
' end def quote node footer_content = [] if attribution = (node.attr 'attribution') footer_content << attribution end if citetitle = (node.attr 'citetitle') citetitle_sanitized = xml_sanitize citetitle footer_content << %(#{citetitle}) end if node.title? footer_content << %(#{node.title}) end footer_tag = footer_content.empty? ? nil : %(
~ #{footer_content * ' '}
) content = (convert_content node).strip. sub(OpenParagraphTagRx, '

'). sub(CloseParagraphTagRx, '

') %(
#{content}#{footer_tag}
) end def verse node footer_content = [] if attribution = (node.attr 'attribution') footer_content << attribution end if citetitle = (node.attr 'citetitle') citetitle_sanitized = xml_sanitize citetitle footer_content << %(#{citetitle}) end footer_tag = footer_content.size > 0 ? %( ~ #{footer_content * ', '}) : nil %(
#{node.content}#{footer_tag}
) end def sidebar node classes = ['sidebar'] if node.title? classes << 'titled' title = node.title title_sanitized = xml_sanitize title title_attr = %( title="#{title_sanitized}") title_upper = title.upcase.gsub(NamedEntityRx) { %(&#{$1.downcase};) } title_el = %(

#{title_upper}

) else title_attr = nil title_el = nil end %() end def table node lines = [%(
)] lines << %(
) table_id_attr = node.id ? %( id="#{node.id}") : nil frame_class = { 'all' => 'table-framed', 'topbot' => 'table-framed-topbot', 'sides' => 'table-framed-sides' } grid_class = { 'all' => 'table-grid', 'rows' => 'table-grid-rows', 'cols' => 'table-grid-cols' } table_classes = %W(table #{frame_class[(node.attr 'frame')] || frame_class['topbot']} #{grid_class[(node.attr 'grid')] || grid_class['rows']}) if (role = node.role) table_classes << role end table_class_attr = %( class="#{table_classes * ' '}") table_styles = [] unless node.option? 'autowidth' table_styles << %(width: #{node.attr 'tablepcwidth'}%;) end table_style_attr = table_styles.size > 0 ? %( style="#{table_styles * ' '}") : nil lines << %() lines << %(#{node.captioned_title}) if node.title? if (node.attr 'rowcount') > 0 lines << '' #if node.option? 'autowidth' tag = %() node.columns.size.times do lines << tag end #else # node.columns.each do |col| # lines << %() # end #end lines << '' [:head, :foot, :body].select {|tsec| !node.rows[tsec].empty? }.each do |tsec| lines << %() node.rows[tsec].each do |row| lines << '' row.each do |cell| if tsec == :head cell_content = cell.text else case cell.style when :asciidoc cell_content = %(
#{cell.content}
) when :verse cell_content = %(
#{cell.text}
) when :literal cell_content = %(
#{cell.text}
) else cell_content = '' cell.content.each do |text| cell_content = %(#{cell_content}

#{text}

) end end end cell_tag_name = (tsec == :head || cell.style == :header ? 'th' : 'td') cell_classes = [] if (halign = cell.attr 'halign') && halign != 'left' cell_classes << 'halign-left' end if (halign = cell.attr 'valign') && halign != 'top' cell_classes << 'valign-top' end cell_class_attr = cell_classes.size > 0 ? %( class="#{cell_classes * ' '}") : nil cell_colspan_attr = cell.colspan ? %( colspan="#{cell.colspan}") : nil cell_rowspan_attr = cell.rowspan ? %( rowspan="#{cell.rowspan}") : nil cell_style_attr = (node.document.attr? 'cellbgcolor') ? %( style="background-color: #{node.document.attr 'cellbgcolor'};") : nil lines << %(<#{cell_tag_name}#{cell_class_attr}#{cell_colspan_attr}#{cell_rowspan_attr}#{cell_style_attr}>#{cell_content}) end lines << '' end lines << %(
) end end lines << '
' lines * EOL end def colist node lines = ['
    '] num = "\u2460" node.items.each_with_index do |item, i| lines << %(
  1. #{num} #{item.text}
  2. ) num = num.next end lines << '
' end # TODO add complex class if list has nested blocks def dlist node lines = [] case (style = node.style) when 'itemized', 'ordered' list_tag_name = (style == 'itemized' ? 'ul' : 'ol') role = node.role subject_stop = node.attr 'subject-stop', (role && (node.has_role? 'stack') ? nil : ':') # QUESTION should we just use itemized-list and ordered-list as the class here? or just list? div_classes = [%(#{style}-list), role].compact list_class_attr = (node.option? 'brief') ? ' class="brief"' : nil lines << %(
<#{list_tag_name}#{list_class_attr}#{list_tag_name == 'ol' && (node.option? 'reversed') ? ' reversed="reversed"' : nil}>) node.items.each do |subjects, dd| # consists of one term (a subject) and supporting content subject = [*subjects].first.text subject_plain = xml_sanitize subject, :plain subject_element = %(#{subject}#{subject_stop && subject_plain !~ /[[:punct:]]$/ ? subject_stop : nil}) lines << '
  • ' if dd # NOTE: must wrap remaining text in a span to help webkit justify the text properly lines << %(#{subject_element}#{dd.text? ? %[ #{dd.text}] : nil}) lines << dd.content if dd.blocks? else lines << %(#{subject_element}) end lines << '
  • ' end lines << %(
    ) else lines << '
    ' node.items.each do |terms, dd| [*terms].each do |dt| lines << %(
    #{dt.text}
    ) end if dd lines << '
    ' if dd.blocks? lines << %(#{dd.text}) if dd.text? lines << dd.content else lines << %(#{dd.text}) end lines << '
    ' end end lines << '
    ' end lines * EOL end # TODO support start attribute def olist node complex = false div_classes = ['ordered-list', node.style, node.role].compact ol_classes = [node.style, ((node.option? 'brief') ? 'brief' : nil)].compact ol_class_attr = ol_classes.empty? ? nil : %( class="#{ol_classes * ' '}") id_attribute = node.id ? %( id="#{node.id}") : nil lines = [%()] lines << %(

    #{node.title}

    ) if node.title? lines << %() node.items.each do |item| lines << %(
  • #{item.text}) if item.blocks? lines << item.content complex = true unless item.blocks.size == 1 && ::Asciidoctor::List === item.blocks[0] end lines << '
  • ' end if complex div_classes << 'complex' lines[0] = %(
    ) end lines << '
    ' lines * EOL end def ulist node complex = false div_classes = ['itemized-list', node.style, node.role].compact # TODO could strip WordJoiner if brief since not using justify ul_classes = [node.style, ((node.option? 'brief') ? 'brief' : nil)].compact ul_class_attr = ul_classes.empty? ? nil : %( class="#{ul_classes * ' '}") id_attribute = node.id ? %( id="#{node.id}") : nil lines = [%()] lines << %(

    #{node.title}

    ) if node.title? lines << %() node.items.each do |item| lines << %(
  • #{item.text}) if item.blocks? lines << item.content complex = true unless item.blocks.size == 1 && ::Asciidoctor::List === item.blocks[0] end lines << '
  • ' end if complex div_classes << 'complex' lines[0] = %(
    ) end lines << '
    ' lines * EOL end def image node target = node.attr 'target' type = (::File.extname target)[1..-1] img_attrs = [%(alt="#{node.attr 'alt'}")] case type when 'svg' img_attrs << %(style="width: #{node.attr 'scaledwidth', '100%'};") # TODO make this a convenience method on document epub_properties = (node.document.attr 'epub-properties') || [] unless epub_properties.include? 'svg' epub_properties << 'svg' node.document.attributes['epub-properties'] = epub_properties end else if node.attr? 'scaledwidth' img_attrs << %(style="width: #{node.attr 'scaledwidth'};") end end =begin # NOTE to set actual width and height, use CSS width and height if type == 'svg' if node.attr? 'scaledwidth' img_attrs << %(width="#{node.attr 'scaledwidth'}") # Kindle #elsif node.attr? 'scaledheight' # img_attrs << %(width="#{node.attr 'scaledheight'}" height="#{node.attr 'scaledheight'}") # ePub3 elsif node.attr? 'scaledheight' img_attrs << %(height="#{node.attr 'scaledheight'}" style="max-height: #{node.attr 'scaledheight'} !important;") else # Aldiko doesn't not scale width to 100% by default img_attrs << %(width="100%") end end =end %(
    #{node.title? ? %[
    #{node.captioned_title}
    ] : nil}
    ) end def inline_anchor node target = node.target case node.type when :xref refid = (node.attr 'refid') || target id_attr = unless @xrefs_used.include? refid @xrefs_used << refid %( id="xref-#{refid}") end # FIXME seems like text should be prepared already # FIXME would be nice to know what type the target is (e.g., bibref) text = node.text || (node.document.references[:ids][refid] || %([#{refid}])) %(#{text}#{WordJoiner}) when :ref %() when :link %(#{node.text}#{WordJoiner}) when :bibref %([#{target}]#{WordJoiner}) end end def inline_break node %(#{node.text}
    ) end def inline_button node %([#{node.text}]#{WordJoiner}) end def inline_callout node num = "\u2460" int_num = node.text.to_i (int_num - 1).times { num = num.next } %(#{num}) end def inline_footnote node if (index = node.attr 'index') %([#{index}]) elsif node.type == :xref %(#{node.text}) end end def inline_image node if (type = node.type) == 'icon' @icon_names << (icon_name = node.target) i_classes = ['icon', %(i-#{icon_name})] i_classes << %(icon-#{node.attr 'size'}) if node.attr? 'size' i_classes << %(icon-flip-#{(node.attr 'flip')[0]}) if node.attr? 'flip' i_classes << %(icon-rotate-#{node.attr 'rotate'}) if node.attr? 'rotate' i_classes << node.role if node.role? %() else target = node.image_uri node.target class_attr = %( class="#{node.role}") if node.role? %(#{node.attr 'alt'}) end end def inline_indexterm node node.type == :visible ? node.text : '' end def inline_kbd node if (keys = node.attr 'keys').size == 1 %(#{keys[0]}) else key_combo = keys.map {|key| %(#{key}+) }.join.chop %(#{key_combo}) end end def inline_menu node menu = node.attr 'menu' # NOTE we swap right angle quote with chevron right from FontAwesome using CSS caret = %(#{NoBreakSpace}#{RightAngleQuote} ) if !(submenus = node.attr 'submenus').empty? submenu_path = submenus.map {|submenu| %(#{submenu}#{caret}) }.join.chop %(#{menu}#{caret}#{submenu_path} #{node.attr 'menuitem'}) elsif (menuitem = node.attr 'menuitem') %(#{menu}#{caret}#{menuitem}) else %(#{menu}) end end def inline_quoted node case node.type when :strong %(#{node.text}#{WordJoiner}) when :emphasis %(#{node.text}#{WordJoiner}) when :monospaced %(#{node.text}#{WordJoiner}) when :double #%(“#{node.text}”) %(“#{node.text}”) when :single #%(‘#{node.text}’) %(‘#{node.text}’) when :superscript %(#{node.text}#{WordJoiner}) when :subscript %(#{node.text}#{WordJoiner}) else node.text end end def convert_content node if node.content_model == :simple %(

    #{node.content}

    ) else node.content end end def xml_sanitize value, target = :attribute sanitized = (value.include? '<') ? value.gsub(XmlElementRx, '').tr_s(' ', ' ').strip : value if target == :plain && (sanitized.include? ';') sanitized = sanitized.gsub(CharEntityRx) { [$1.to_i].pack('U*') }.gsub(FromHtmlSpecialCharsRx, FromHtmlSpecialCharsMap) elsif target == :attribute sanitized = sanitized.gsub(WordJoinerRx, '').gsub('"', '"') end sanitized end # TODO make check for last content paragraph a feature of Asciidoctor def mark_last_paragraph root return unless (last_block = root.blocks[-1]) while last_block.context == :section && last_block.blocks? last_block = last_block.blocks[-1] end if last_block.context == :paragraph last_block.attributes['role'] = last_block.role? ? %(#{last_block.role} last) : 'last' end nil end end class DocumentIdGenerator class << self def generate_id doc unless (id = doc.id) id = if doc.header? doc.doctitle(sanitize: true).gsub(WordJoinerRx, '').downcase.delete(':').tr_s(' ', '-').tr_s('-', '-') elsif (first_section = doc.first_section) first_section.id else %(document-#{doc.object_id}) end end id end end end require_relative 'packager' Extensions.register do if (document = @document).backend == 'epub3' document.attributes['spine'] = '' document.set_attribute 'listing-caption', 'Listing' if !(defined? ::AsciidoctorJ) && (::Gem::try_activate 'pygments.rb') if document.set_attribute 'source-highlighter', 'pygments' document.set_attribute 'pygments-css', 'style' document.set_attribute 'pygments-style', 'bw' end end case (ebook_format = document.attributes['ebook-format']) when 'epub3', 'kf8' # all good when 'mobi' document.attributes['ebook-format'] = 'kf8' else ebook_format = document.attributes['ebook-format'] = 'epub3' end document.attributes[%(ebook-format-#{ebook_format})] = '' # Only fire SpineItemProcessor for top-level include directives include_processor SpineItemProcessor.new(document) treeprocessor { process {|doc| doc.id = DocumentIdGenerator.generate_id doc } } end end end end