# encoding: UTF-8 require_relative 'spine_item_processor' require_relative 'font_icon_map' module Asciidoctor module Epub3 # tried 8288, but it didn't work in older readers WordJoiner = [65279].pack 'U*' WordJoinerRx = RUBY_ENGINE_JRUBY ? /\uFEFF/ : WordJoiner # Public: The main converter for the epub3 backend that handles packaging the # EPUB3 or KF8 publication file. class Converter include ::Asciidoctor::Converter include ::Asciidoctor::Writer register_for 'epub3' def initialize backend, opts super basebackend 'html' outfilesuffix '.epub' # dummy outfilesuffix since it may be .mobi htmlsyntax 'xml' @validate = false @extract = false end def convert spine_doc, name = nil @validate = true if spine_doc.attr? 'ebook-validate' @extract = true if spine_doc.attr? 'ebook-extract' Packager.new spine_doc, (spine_doc.references[:spine_items] || [spine_doc]), spine_doc.attributes['ebook-format'].to_sym end # FIXME we have to package in write because we don't have access to target before this point def write packager, target packager.package validate: @validate, extract: @extract, target: target nil end end # Public: The converter for the epub3 backend that converts the individual # content documents in an EPUB3 publication. class ContentConverter include ::Asciidoctor::Converter register_for 'epub3-xhtml5' WordJoiner = Epub3::WordJoiner EOL = "\n" NoBreakSpace = ' ' ThinNoBreakSpace = ' ' RightAngleQuote = '›' XmlElementRx = /<\/?.+?>/ CharEntityRx = /&#(\d{2,5});/ NamedEntityRx = /&([A-Z]+);/ UppercaseTagRx = /<(\/)?([A-Z]+)>/ FromHtmlSpecialCharsMap = { '<' => '<', '>' => '>', '&' => '&' } FromHtmlSpecialCharsRx = /(?:#{FromHtmlSpecialCharsMap.keys * '|'})/ ToHtmlSpecialCharsMap = { '&' => '&', '<' => '<', '>' => '>' } ToHtmlSpecialCharsRx = /[#{ToHtmlSpecialCharsMap.keys.join}]/ OpenParagraphTagRx = /^

/ CloseParagraphTagRx = /<\/p>$/ def initialize backend, opts super basebackend 'html' outfilesuffix '.xhtml' htmlsyntax 'xml' @xrefs_used = ::Set.new @icon_names = [] end def convert node, name = nil if respond_to?(name ||= node.node_name) send name, node else warn %(conversion missing in epub3 backend for #{name}) end end # TODO aggregate authors of spine document into authors attribute(s) on main document def navigation_document node, spine doctitle_sanitized = (node.doctitle sanitize: true, use_fallback: true).gsub WordJoinerRx, '' lines = [%( #{doctitle_sanitized}

#{doctitle_sanitized}

) lines * EOL end def document node docid = node.id if (doctitle = node.doctitle partition: true, sanitize: true, use_fallback: true).subtitle? title = doctitle.main title_upper = title.upcase subtitle = doctitle.subtitle else # HACK until we get proper handling of title-only in CSS title = title_upper = '' subtitle = doctitle.combined end doctitle_sanitized = (node.doctitle sanitize: true, use_fallback: true).gsub WordJoinerRx, '' subtitle_formatted = subtitle.gsub(WordJoinerRx, '').split(' ').map {|w| %(#{w}) } * ' ' # FIXME make this uppercase routine more intelligent, less fragile subtitle_formatted_upper = subtitle_formatted.upcase .gsub(UppercaseTagRx) { %(<#{$1}#{$2.downcase}>) } .gsub(NamedEntityRx) { %(&#{$1.downcase};) } author = node.attr 'author' username = node.attr 'username', 'default' # FIXME needs to resolve to the imagesdir of the spine document, not this document #imagesdir = (node.attr 'imagesdir', '.').chomp '/' #imagesdir = (imagesdir == '.' ? nil : %(#{imagesdir}/)) imagesdir = 'images/' mark_last_paragraph node content = node.content # NOTE must run after content is resolved # TODO perhaps create dynamic CSS file? if @icon_names.empty? icon_css_head = icon_css_scoped = nil else icon_defs = @icon_names.map {|name| %(.i-#{name}::before { content: "#{FontIconMap[name.tr('-', '_').to_sym]}"; }) } * EOL icon_css_head = %( ) # NOTE Namo Pubtree requires icon CSS to be repeated inside (or in a linked stylesheet); wrap in div to hide from Aldiko icon_css_scoped = (node.attr? 'ebook-format', 'kf8') ? nil : %( ) end # NOTE kindlegen seems to mangle the

element, so we wrap its content in a div lines = [%( #{doctitle_sanitized} #{icon_css_head}

#{icon_css_scoped}

#{author}

#{title_upper}#{subtitle ? %[ #{subtitle_formatted_upper}] : nil}

#{content})] if node.footnotes? # NOTE kindlegen seems to mangle the

' lines * EOL end def section node hlevel = node.level + 1 epub_type_attr = node.special ? %( epub:type="#{node.sectname}") : nil div_classes = [%(sect#{node.level}), node.role].compact title = node.title title_sanitized = xml_sanitize title if node.document.header? || node.level != 1 || node != node.document.first_section %(

#{title}#{(content = node.content).empty? ? nil : %[ #{content}]}

) else # document has no level-0 heading and this heading serves as the document title node.content end end # TODO support use of quote block as abstract def preamble node if (first_block = node.blocks[0]) && first_block.style == 'abstract' abstract first_block # REVIEW should we treat the preamble as an abstract in general? elsif first_block && node.blocks.size == 1 abstract first_block else node.content end end # QUESTION use convert_content? def open node node.content end def abstract node %(

#{convert_content node}

) end def paragraph node role = node.role # stack-head is the alternative to the default, inline-head (where inline means "run-in") head_stop = node.attr 'head-stop', (role && (node.has_role? 'stack-head') ? nil : '.') head = node.title? ? %(#{title = node.title}#{head_stop && title !~ /[[:punct:]]$/ ? head_stop : nil} ) : nil if role if node.has_role? 'signature' node.set_option 'hardbreaks' end %(

#{head}#{node.content}

) else %(

#{head}#{node.content}

) end end def pass node content = node.content if content == '' '

' else content end end def admonition node if node.title? title = node.title title_sanitized = xml_sanitize title title_attr = %( title="#{node.caption}: #{title_sanitized}") title_el = %(

#{title}

) else title_attr = %( title="#{node.caption}") title_el = nil end type = node.attr 'name' epub_type = case type when 'tip' 'help' when 'note' 'note' when 'important', 'warning', 'caution' 'warning' end %() end def example node title_div = node.title? ? %(

#{node.title}

) : nil %(

#{title_div}

#{convert_content node}

) end def floating_title node tag_name = %(h#{node.level + 1}) id_attribute = node.id ? %( id="#{node.id}") : nil %(<#{tag_name}#{id_attribute} class="#{['discrete', node.role].compact * ' '}">#{node.title}) end def listing node figure_classes = ['listing'] figure_classes << 'coalesce' if node.option? 'unbreakable' pre_classes = if node.style == 'source' ['source', %(language-#{node.attr 'language'})] else ['screen'] end title_div = node.title? ? %(

#{node.captioned_title}

) : nil # patches conums to fix extra or missing leading space # TODO apply this patch upstream to Asciidoctor %(

#{title_div}

#{(node.content || '').gsub(/(?

)
  end

  # QUESTION should we wrap the  in either  or ?
  def literal node
    %(#{node.content})
  end

  def page_break node
    ''
  end

  def thematic_break node
    ''
  end

  def quote node
    footer_content = []
    if attribution = (node.attr 'attribution')
      footer_content << attribution  
    end

    if citetitle = (node.attr 'citetitle')
      citetitle_sanitized = xml_sanitize citetitle
      footer_content << %(#{citetitle})
    end

    if node.title?
      footer_content << %(#{node.title})
    end

    footer_tag = footer_content.empty? ? nil : %(
~ #{footer_content * ' '})
    content = (convert_content node).strip.
      sub(OpenParagraphTagRx, '“').
      sub(CloseParagraphTagRx, '”')
    %(

#{content}#{footer_tag}

)
  end

  def verse node
    footer_content = []
    if attribution = (node.attr 'attribution')
      footer_content << attribution  
    end

    if citetitle = (node.attr 'citetitle')
      citetitle_sanitized = xml_sanitize citetitle
      footer_content << %(#{citetitle})
    end

    footer_tag = footer_content.size > 0 ? %(
~ #{footer_content * ', '}) : nil
    %(
#{node.content}#{footer_tag}
)
  end

  def sidebar node
    classes = ['sidebar']
    if node.title?
      classes << 'titled'
      title = node.title
      title_sanitized = xml_sanitize title
      title_attr = %( title="#{title_sanitized}")
      title_upper = title.upcase.gsub(NamedEntityRx) { %(&#{$1.downcase};) }
      title_el = %(#{title_upper}
)
    else
      title_attr = nil
      title_el = nil
    end

    %(
#{title_el}
#{convert_content node}

)
  end

  def table node
    lines = [%()]
    lines << %()
    table_id_attr = node.id ? %( id="#{node.id}") : nil
    frame_class = {
      'all' => 'table-framed',
      'topbot' => 'table-framed-topbot',
      'sides' => 'table-framed-sides'
    }
    grid_class = {
      'all' => 'table-grid',
      'rows' => 'table-grid-rows',
      'cols' => 'table-grid-cols'
    }
    table_classes = %W(table #{frame_class[(node.attr 'frame')] || frame_class['topbot']} #{grid_class[(node.attr 'grid')] || grid_class['rows']})
    if (role = node.role)
      table_classes << role
    end
    table_class_attr = %( class="#{table_classes * ' '}")
    table_styles = []
    unless node.option? 'autowidth'
      table_styles << %(width: #{node.attr 'tablepcwidth'}%;)
    end
    table_style_attr = table_styles.size > 0 ? %( style="#{table_styles * ' '}") : nil

    lines << %()
    lines << %(#{node.captioned_title}) if node.title?
    if (node.attr 'rowcount') > 0
      lines << ''
      #if node.option? 'autowidth'
        tag = %()
        node.columns.size.times do
          lines << tag
        end
      #else
      #  node.columns.each do |col|
      #    lines << %()
      #  end
      #end
      lines << ''
      [:head, :foot, :body].select {|tsec| !node.rows[tsec].empty? }.each do |tsec|
        lines << %()
        node.rows[tsec].each do |row|
          lines << ''
          row.each do |cell|
            if tsec == :head
              cell_content = cell.text
            else
              case cell.style
              when :asciidoc
                cell_content = %(#{cell.content})
              when :verse
                cell_content = %(#{cell.text})
              when :literal
                cell_content = %(#{cell.text})
              else
                cell_content = ''
                cell.content.each do |text|
                  cell_content = %(#{cell_content}#{text})
                end
              end
            end

            cell_tag_name = (tsec == :head || cell.style == :header ? 'th' : 'td')
            cell_classes = []
            if (halign = cell.attr 'halign') && halign != 'left'
              cell_classes << 'halign-left'
            end
            if (halign = cell.attr 'valign') && halign != 'top'
              cell_classes << 'valign-top'
            end
            cell_class_attr = cell_classes.size > 0 ? %( class="#{cell_classes * ' '}") : nil
            cell_colspan_attr = cell.colspan ? %( colspan="#{cell.colspan}") : nil
            cell_rowspan_attr = cell.rowspan ? %( rowspan="#{cell.rowspan}") : nil
            cell_style_attr = (node.document.attr? 'cellbgcolor') ? %( style="background-color: #{node.document.attr 'cellbgcolor'};") : nil
            lines << %(<#{cell_tag_name}#{cell_class_attr}#{cell_colspan_attr}#{cell_rowspan_attr}#{cell_style_attr}>#{cell_content})
          end
          lines << ''
        end
        lines << %()
      end
    end
    lines << '

'
    lines * EOL
  end

  def colist node
    lines = ['
']
    num = "\u2460"
    node.items.each_with_index do |item, i|
      lines << %(#{num} #{item.text})
      num = num.next
    end
    lines << '
'
  end

  # TODO add complex class if list has nested blocks
  def dlist node
    lines = []
    case (style = node.style)
    when 'itemized', 'ordered'
      list_tag_name = (style == 'itemized' ? 'ul' : 'ol')
      role = node.role
      subject_stop = node.attr 'subject-stop', (role && (node.has_role? 'stack') ? nil : ':')
      # QUESTION should we just use itemized-list and ordered-list as the class here? or just list?
      div_classes = [%(#{style}-list), role].compact
      list_class_attr = (node.option? 'brief') ? ' class="brief"' : nil
      lines << %(
<#{list_tag_name}#{list_class_attr}#{list_tag_name == 'ol' && (node.option? 'reversed') ? ' reversed="reversed"' : nil}>)
      node.items.each do |subjects, dd|
        # consists of one term (a subject) and supporting content
        subject = [*subjects].first.text
        subject_plain = xml_sanitize subject, :plain
        subject_element = %(#{subject}#{subject_stop && subject_plain !~ /[[:punct:]]$/ ? subject_stop : nil})
        lines << ''
        if dd
          # NOTE: must wrap remaining text in a span to help webkit justify the text properly
          lines << %(#{subject_element}#{dd.text? ? %[ #{dd.text}] : nil}) 
          lines << dd.content if dd.blocks?
        else
          lines << %(#{subject_element})
        end
        lines << ''
      end
      lines << %(
)
    else
      lines << '
'
      node.items.each do |terms, dd|
        [*terms].each do |dt|
          lines << %(
#{dt.text}
)
        end
        if dd
          lines << ''
          if dd.blocks?
            lines << %(#{dd.text}) if dd.text?
            lines << dd.content
          else
            lines << %(#{dd.text})
          end
          lines << ''
        end
      end
      lines << '
'
    end
    lines * EOL
  end

  # TODO support start attribute
  def olist node
    complex = false
    div_classes = ['ordered-list', node.style, node.role].compact
    ol_classes = [node.style, ((node.option? 'brief') ? 'brief' : nil)].compact
    ol_class_attr = ol_classes.empty? ? nil : %( class="#{ol_classes * ' '}")
    id_attribute = node.id ? %( id="#{node.id}") : nil
    lines = [%()]
    lines << %(#{node.title}) if node.title?
    lines << %()
    node.items.each do |item|
      lines << %(
#{item.text})
      if item.blocks?
        lines << item.content
        complex = true unless item.blocks.size == 1 && ::Asciidoctor::List === item.blocks[0]
      end
      lines << ''
    end
    if complex
      div_classes << 'complex'
      lines[0] = %()
    end
    lines << '
'
    lines * EOL
  end

  def ulist node
    complex = false
    div_classes = ['itemized-list', node.style, node.role].compact
    # TODO could strip WordJoiner if brief since not using justify
    ul_classes = [node.style, ((node.option? 'brief') ? 'brief' : nil)].compact
    ul_class_attr = ul_classes.empty? ? nil : %( class="#{ul_classes * ' '}")
    id_attribute = node.id ? %( id="#{node.id}") : nil
    lines = [%()]
    lines << %(#{node.title}) if node.title?
    lines << %()
    node.items.each do |item|
      lines << %(
#{item.text})
      if item.blocks?
        lines << item.content
        complex = true unless item.blocks.size == 1 && ::Asciidoctor::List === item.blocks[0]
      end
      lines << ''
    end
    if complex
      div_classes << 'complex'
      lines[0] = %()
    end
    lines << '
'
    lines * EOL
  end

  def image node
    target = node.attr 'target'
    type = (::File.extname target)[1..-1]
    img_attrs = [%(alt="#{node.attr 'alt'}")]
    case type
    when 'svg'
      img_attrs << %(style="width: #{node.attr 'scaledwidth', '100%'};")
      # TODO make this a convenience method on document
      epub_properties = (node.document.attr 'epub-properties') || []
      unless epub_properties.include? 'svg'
        epub_properties << 'svg'
        node.document.attributes['epub-properties'] = epub_properties
      end
    else
      if node.attr? 'scaledwidth'
        img_attrs << %(style="width: #{node.attr 'scaledwidth'};")
      end
    end
=begin
    # NOTE to set actual width and height, use CSS width and height
    if type == 'svg'
      if node.attr? 'scaledwidth'
        img_attrs << %(width="#{node.attr 'scaledwidth'}")
      # Kindle
      #elsif node.attr? 'scaledheight'
      #  img_attrs << %(width="#{node.attr 'scaledheight'}" height="#{node.attr 'scaledheight'}")
      # ePub3
      elsif node.attr? 'scaledheight'
        img_attrs << %(height="#{node.attr 'scaledheight'}" style="max-height: #{node.attr 'scaledheight'} !important;")
      else
        # Aldiko doesn't not scale width to 100% by default
        img_attrs << %(width="100%")
      end
    end
=end
    %(


#{node.title? ? %[
#{node.captioned_title}] : nil}
)
  end

  def inline_anchor node
    target = node.target
    case node.type
    when :xref
      refid = (node.attr 'refid') || target
      id_attr = unless @xrefs_used.include? refid
        @xrefs_used << refid
        %( id="xref-#{refid}")
      end
      # FIXME seems like text should be prepared already
      # FIXME would be nice to know what type the target is (e.g., bibref)
      text = node.text || (node.document.references[:ids][refid] || %([#{refid}]))
      %(#{text}#{WordJoiner})
    when :ref
      %()
    when :link
      %(#{node.text}#{WordJoiner})
    when :bibref
      %([#{target}]#{WordJoiner})
    end
  end

  def inline_break node
    %(#{node.text}
)
  end

  def inline_button node
    %([#{node.text}]#{WordJoiner})
  end

  def inline_callout node
    num = "\u2460"
    int_num = node.text.to_i
    (int_num - 1).times { num = num.next }
    %(#{num})
  end

  def inline_footnote node
    if (index = node.attr 'index')
      %(^[#{index}])
    elsif node.type == :xref
      %(#{node.text})
    end
  end

  def inline_image node
    if (type = node.type) == 'icon'
      @icon_names << (icon_name = node.target)
      i_classes = ['icon', %(i-#{icon_name})]
      i_classes << %(icon-#{node.attr 'size'}) if node.attr? 'size'
      i_classes << %(icon-flip-#{(node.attr 'flip')[0]}) if node.attr? 'flip'
      i_classes << %(icon-rotate-#{node.attr 'rotate'}) if node.attr? 'rotate'
      i_classes << node.role if node.role?
      %()
    else
      target = node.image_uri node.target
      class_attr = %( class="#{node.role}") if node.role?
      %()
    end
  end

  def inline_indexterm node
    node.type == :visible ? node.text : ''
  end

  def inline_kbd node
    if (keys = node.attr 'keys').size == 1
      %(#{keys[0]})
    else
      key_combo = keys.map {|key| %(#{key}+) }.join.chop
      %(#{key_combo})
    end
  end

  def inline_menu node
    menu = node.attr 'menu'
    # NOTE we swap right angle quote with chevron right from FontAwesome using CSS
    caret = %(#{NoBreakSpace}#{RightAngleQuote} )
    if !(submenus = node.attr 'submenus').empty?
      submenu_path = submenus.map {|submenu| %(#{submenu}#{caret}) }.join.chop
      %(#{menu}#{caret}#{submenu_path} #{node.attr 'menuitem'})
    elsif (menuitem = node.attr 'menuitem')
      %(#{menu}#{caret}#{menuitem})
    else
      %(#{menu})
    end
  end

  def inline_quoted node
    case node.type
    when :strong
      %(#{node.text}#{WordJoiner})
    when :emphasis
      %(#{node.text}#{WordJoiner})
    when :monospaced
      %(#{node.text}#{WordJoiner})
    when :double
      #%(“#{node.text}”)
      %(“#{node.text}”)
    when :single
      #%(‘#{node.text}’)
      %(‘#{node.text}’)
    when :superscript
      %(^#{node.text}#{WordJoiner})
    when :subscript
      %(_#{node.text}#{WordJoiner})
    else
      node.text
    end
  end

  def convert_content node
    if node.content_model == :simple
      %(#{node.content})
    else
      node.content
    end
  end

  def xml_sanitize value, target = :attribute
    sanitized = (value.include? '<') ? value.gsub(XmlElementRx, '').tr_s(' ', ' ').strip : value
    if target == :plain && (sanitized.include? ';')
      sanitized = sanitized.gsub(CharEntityRx) { [$1.to_i].pack('U*') }.gsub(FromHtmlSpecialCharsRx, FromHtmlSpecialCharsMap)
    elsif target == :attribute
      sanitized = sanitized.gsub(WordJoinerRx, '').gsub('"', '"')
    end
    sanitized
  end

  # TODO make check for last content paragraph a feature of Asciidoctor
  def mark_last_paragraph root
    return unless (last_block = root.blocks[-1])
    while last_block.context == :section && last_block.blocks?
      last_block = last_block.blocks[-1]
    end
    if last_block.context == :paragraph
      last_block.attributes['role'] = last_block.role? ? %(#{last_block.role} last) : 'last'
    end
    nil
  end
end

class DocumentIdGenerator
  class << self
    def generate_id doc
      unless (id = doc.id)
        id = if doc.header?
          doc.doctitle(sanitize: true).gsub(WordJoinerRx, '').downcase.delete(':').tr_s(' ', '-').tr_s('-', '-')
        elsif (first_section = doc.first_section)
          first_section.id
        else
          %(document-#{doc.object_id})
        end
      end
      id
    end
  end
end

require_relative 'packager'

Extensions.register do
  if (document = @document).backend == 'epub3'
    document.attributes['spine'] = ''
    document.set_attribute 'listing-caption', 'Listing'
    if !(defined? ::AsciidoctorJ) && (::Gem::try_activate 'pygments.rb')
      if document.set_attribute 'source-highlighter', 'pygments'
        document.set_attribute 'pygments-css', 'style'
        document.set_attribute 'pygments-style', 'bw'
      end
    end
    case (ebook_format = document.attributes['ebook-format'])
    when 'epub3', 'kf8'
      # all good
    when 'mobi'
      document.attributes['ebook-format'] = 'kf8'
    else
      ebook_format = document.attributes['ebook-format'] = 'epub3'
    end
    document.attributes[%(ebook-format-#{ebook_format})] = ''
    # Only fire SpineItemProcessor for top-level include directives
    include_processor SpineItemProcessor.new(document)
    treeprocessor { process {|doc| doc.id = DocumentIdGenerator.generate_id doc } }
  end
end
end
end