lib/asciidoctor-epub3/packager.rb in asciidoctor-epub3-1.5.0.alpha.6 vs lib/asciidoctor-epub3/packager.rb in asciidoctor-epub3-1.5.0.alpha.7

- old
+ new

@@ -6,30 +6,50 @@ module Asciidoctor module Epub3 module GepubBuilderMixin DATA_DIR = ::File.expand_path(::File.join ::File.dirname(__FILE__), '..', '..', 'data') SAMPLES_DIR = ::File.join DATA_DIR, 'samples' - WordJoinerRx = Epub3::WordJoinerRx + CharEntityRx = ContentConverter::CharEntityRx + XmlElementRx = ContentConverter::XmlElementRx FromHtmlSpecialCharsMap = ContentConverter::FromHtmlSpecialCharsMap FromHtmlSpecialCharsRx = ContentConverter::FromHtmlSpecialCharsRx CsvDelimiterRx = /\s*,\s*/ DefaultCoverImage = 'images/default-cover.png' - InlineImageMacroRx = /^image:(.*?)\[(.*?)\]$/ + ImageMacroRx = /^image::?(.*?)\[(.*?)\]$/ + ImgSrcScanRx = /<img src="(.+?)"/ + SvgImgSniffRx = /<img src=".+?\.svg"/ - def sanitized_doctitle doc, target = :plain - return (doc.attr 'untitled-label') unless doc.header? - title = case target + attr_reader :book, :format, :spine + + # FIXME move to Asciidoctor::Helpers + def sanitize_doctitle_xml doc, content_spec + doctitle = doc.header? ? doc.doctitle : (doc.attr 'untitled-label') + sanitize_xml doctitle, content_spec + end + + # FIXME move to Asciidoctor::Helpers + def sanitize_xml content, content_spec + if content_spec != :pcdata && (content.include? '<') + if (content = (content.gsub XmlElementRx, '').strip).include? ' ' + content = content.tr_s ' ', ' ' + end + end + + case content_spec when :attribute_cdata - doc.doctitle(sanitize: true).gsub('"', '&quot;') - when :element_cdata - doc.doctitle sanitize: true - when :pcdata - doc.doctitle - when :plain - doc.doctitle(sanitize: true).gsub(FromHtmlSpecialCharsRx, FromHtmlSpecialCharsMap) + content = content.gsub '"', '&quot;' if content.include? '"' + when :cdata, :pcdata + # noop + when :plain_text + if content.include? ';' + content = content.gsub(CharEntityRx) { [$1.to_i].pack 'U*' } if content.include? '&#' + content = content.gsub FromHtmlSpecialCharsRx, FromHtmlSpecialCharsMap + end + else + raise ::ArgumentError, %(Unknown content spec: #{content_spec}) end - title.gsub WordJoinerRx, '' + content end def add_theme_assets doc builder = self format = @format @@ -79,54 +99,65 @@ file font_file => ::File.join(DATA_DIR, font_file) end end end end + nil end def add_cover_image doc imagesdir = (doc.attr 'imagesdir', '.').chomp '/' imagesdir = (imagesdir == '.' ? nil : %(#{imagesdir}/)) - if (front_cover_image = doc.attr 'front-cover-image') - if front_cover_image =~ InlineImageMacroRx - front_cover_image = %(#{imagesdir}#{$1}) + if (image_path = doc.attr 'front-cover-image') + image_attrs = {} + if (image_path.include? ':') && image_path =~ ImageMacroRx + if image_path.start_with? 'image::' + warn %(asciidoctor: WARNING: deprecated block macro syntax detected in front-cover-image attribute) + end + image_path = %(#{imagesdir}#{$1}) + (::Asciidoctor::AttributeList.new $2).parse_into image_attrs, %w(alt width height) unless $2.empty? end - workdir = doc.attr 'docdir', '.' - workdir = '.' if workdir.empty? - else - front_cover_image = DefaultCoverImage - workdir = DATA_DIR + workdir = (workdir = doc.attr 'docdir').nil_or_empty? ? '.' : workdir + if ::File.readable?(::File.join workdir, image_path) + unless !image_attrs.empty? && (width = image_attrs['width']) && (height = image_attrs['height']) + width, height = 1050, 1600 + end + else + warn %(asciidoctor: ERROR: front cover image not found or readable: #{image_path}) + image_path = nil + end end + unless image_path + image_path, workdir, width, height = DefaultCoverImage, DATA_DIR, 1050, 1600 + end + resources do - cover_image %(#{imagesdir}jacket/cover#{::File.extname front_cover_image}) => ::File.join(workdir, front_cover_image) + cover_image %(#{imagesdir}jacket/cover#{::File.extname image_path}) => (::File.join workdir, image_path) + @last_defined_item.tap do |last_item| + last_item['width'] = width + last_item['height'] = height + end end + nil end # NOTE must be called within the ordered block - def add_cover_page doc, spine_builder, book - imagesdir = (doc.attr 'imagesdir', '.').chomp '/' - imagesdir = (imagesdir == '.' ? nil : %(#{imagesdir}/)) + def add_cover_page doc, spine_builder, manifest + cover_item_attrs = manifest.items['item_cover'].instance_variable_get :@attributes + href = cover_item_attrs['href'] + # NOTE we only store width and height temporarily to pass through the values + width = cover_item_attrs.delete 'width' + height = cover_item_attrs.delete 'height' - img = (doc.attr 'front-cover-image') || DefaultCoverImage - - if img =~ InlineImageMacroRx - img = %(#{imagesdir}#{$1}) - # TODO use proper attribute parser - _, w, h = $2.split ',', 3 - end - - w ||= 1050 - h ||= 1600 - img_path = %(#{imagesdir}jacket/cover#{::File.extname img}) # NOTE SVG wrapper maintains aspect ratio and confines image to view box content = %(<!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="en" lang="en"> <head> <meta charset="UTF-8"/> -<title>#{sanitized_doctitle doc, :element_cdata}</title> +<title>#{sanitize_doctitle_xml doc, :cdata}</title> <style type="text/css"> @page { margin: 0; } html { @@ -143,139 +174,226 @@ display: block; } </style> </head> <body epub:type="cover"><svg version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" - width="100%" height="100%" viewBox="0 0 #{w} #{h}" preserveAspectRatio="xMidYMid meet"> -<image width="#{w}" height="#{h}" xlink:href="#{img_path}"/> + width="100%" height="100%" viewBox="0 0 #{width} #{height}" preserveAspectRatio="xMidYMid meet"> +<image width="#{width}" height="#{height}" xlink:href="#{href}"/> </svg></body> </html>).to_ios - # GitDen expects a cover.xhtml, so add it to the spine + # Gitden expects a cover.xhtml, so add it to the spine spine_builder.file 'cover.xhtml' => content + assigned_id = (spine_builder.instance_variable_get :@last_defined_item).item.id spine_builder.id 'cover' # clearly a deficiency of gepub that it does not match the id correctly - book.spine.itemref_by_id['item_cover1'].idref = 'cover' + # FIXME can we move this hack elsewhere? + @book.spine.itemref_by_id[assigned_id].idref = 'cover' + nil end def add_images_from_front_matter - if ::File.exist? 'front-matter.html' - ::File.read('front-matter.html').scan(/<img src="(.+?)"/) do - resources do - file $1 - end + (::File.read 'front-matter.html').scan ImgSrcScanRx do + resources do + file $1 end - end + end if ::File.file? 'front-matter.html' + nil end - def add_front_matter_page doc, spine_builder, builder, format - if ::File.exist? 'front-matter.html' - spine_builder.file 'front-matter.html' => (builder.postprocess_xhtml_file 'front-matter.html', format) - (spine_builder.instance_variable_get :@last_defined_item).properties << 'svg' + def add_front_matter_page doc, spine_builder + if ::File.file? 'front-matter.html' + front_matter_content = ::File.read 'front-matter.html' + spine_builder.file 'front-matter.xhtml' => (postprocess_xhtml front_matter_content, @format) + unless (spine_builder.property? 'svg') || SvgImgSniffRx !~ front_matter_content + spine_builder.add_property 'svg' + end end + nil end - # FIXME don't add same image more than once # FIXME add inline images def add_content_images doc, images docimagesdir = (doc.attr 'imagesdir', '.').chomp '/' docimagesdir = (docimagesdir == '.' ? nil : %(#{docimagesdir}/)) - workdir = doc.attr 'docdir', '.' - workdir = '.' if workdir.empty? + workdir = (workdir = doc.attr 'docdir').nil_or_empty? ? '.' : workdir resources workdir: workdir do images.each do |image| imagesdir = (image.document.attr 'imagesdir', '.').chomp '/' imagesdir = (imagesdir == '.' ? nil : %(#{imagesdir}/)) image_path = %(#{imagesdir}#{image.attr 'target'}) if image_path.start_with? %(#{docimagesdir}jacket/cover.) - warn %(The image path #{image_path} is reserved for the cover artwork. Ignoring conflicting image from content.) + warn %(asciidoctor: WARNING: image path is reserved for cover artwork: #{image_path}; skipping image found in content) elsif ::File.readable? image_path file image_path else - warn %(Image not found or not readable: #{image_path}) + warn %(asciidoctor: ERROR: image not found or not readable: #{image_path}) end end end + nil end def add_profile_images doc, usernames - spine = @spine imagesdir = (doc.attr 'imagesdir', '.').chomp '/' imagesdir = (imagesdir == '.' ? nil : %(#{imagesdir}/)) resources do file %(#{imagesdir}avatars/default.jpg) => ::File.join(DATA_DIR, 'images/default-avatar.jpg') file %(#{imagesdir}headshots/default.jpg) => ::File.join(DATA_DIR, 'images/default-headshot.jpg') end - workdir = doc.attr 'docdir', '.' - workdir = '.' if workdir.empty? + workdir = (workdir = doc.attr 'docdir').nil_or_empty? ? '.' : workdir resources do usernames.each do |username| avatar = %(#{imagesdir}avatars/#{username}.jpg) if ::File.readable?(resolved_avatar = ::File.join(workdir, avatar)) file avatar => resolved_avatar else - warn %(Avatar #{avatar} not found or readable. Falling back to default avatar for #{username}.) + warn %(asciidoctor: ERROR: avatar for #{username} not found or readable: #{avatar}; falling back to default avatar) file avatar => ::File.join(DATA_DIR, 'images/default-avatar.jpg') end headshot = %(#{imagesdir}headshots/#{username}.jpg) if ::File.readable?(resolved_headshot = ::File.join(workdir, headshot)) file headshot => resolved_headshot elsif doc.attr? 'builder', 'editions' - warn %(Headshot #{headshot} not found or readable. Falling back to default headshot for #{username}.) + warn %(asciidoctor: ERROR: headshot for #{username} not found or readable: #{headshot}; falling back to default headshot) file headshot => ::File.join(DATA_DIR, 'images/default-headshot.jpg') end end -=begin - spine.each do |item| - username = (item.attr 'username') || 'default' - avatar_target = %(#{imagesdir}avatars/#{username}.jpg) - if ::File.readable?(avatar = %(#{item.attr 'docname'}/avatar.jpg)) - file avatar_target => avatar - else - warn %(Avatar #{avatar} not found or not readable. Falling back to default avatar for #{username}.) - ::Dir.chdir DATA_DIR do - file avatar_target => %(images/default-avatar.jpg) - end - end - if ::File.readable? (headshot = %(#{item.attr 'docname'}/headshot.jpg)) - file headshot - # TODO default headshot? - end - end -=end end + nil end def add_content doc - builder = self - spine = @spine - format = @format - workdir = doc.attr 'docdir', '.' - workdir = '.' if workdir.empty? + builder, spine, format = self, @spine, @format + workdir = (doc.attr 'docdir').nil_or_empty? ? '.' : workdir resources workdir: workdir do + extend GepubResourceBuilderMixin builder.add_images_from_front_matter - # QUESTION should we move navigation_document to the Packager class? seems to make sense - #nav 'nav.xhtml' => (builder.postprocess_xhtml doc.converter.navigation_document(doc, spine), format) - nav 'nav.xhtml' => (builder.postprocess_xhtml ::Asciidoctor::Converter::Factory.default.create('epub3-xhtml5').navigation_document(doc, spine), format) + builder.add_nav_doc doc, self, spine, format + builder.add_ncx_doc doc, self, spine ordered do - builder.add_cover_page doc, self, @book unless format == :kf8 - builder.add_front_matter_page doc, self, builder, format + builder.add_cover_page doc, self, @book.manifest unless format == :kf8 + builder.add_front_matter_page doc, self spine.each_with_index do |item, i| - content_path = %(#{item.id || (item.attr 'docname')}.xhtml) - file content_path => (builder.postprocess_xhtml item.convert, format) - # NOTE heading for ePub2 navigation file; toc.ncx requires headings to be plain text - heading builder.sanitized_doctitle(item) - @last_defined_item.properties << 'svg' if ((item.attr 'epub-properties') || []).include? 'svg' + file %(#{item.id || (item.attr 'docname')}.xhtml) => (builder.postprocess_xhtml item.convert, format) + add_property 'svg' if ((item.attr 'epub-properties') || []).include? 'svg' + # QUESTION reenable? #linear 'yes' if i == 0 end end end + nil end + def add_nav_doc doc, spine_builder, spine, format + spine_builder.nav 'nav.xhtml' => (postprocess_xhtml nav_doc(doc, spine), format) + spine_builder.id 'nav' + nil + end + + # TODO aggregate authors of spine document into authors attribute(s) on main document + def nav_doc doc, spine + lines = [%(<!DOCTYPE html> +<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="#{lang = (doc.attr 'lang', 'en')}" lang="#{lang}"> +<head> +<meta charset="UTF-8"/> +<title>#{sanitize_doctitle_xml doc, :cdata}</title> +<link rel="stylesheet" type="text/css" href="styles/epub3.css"/> +<link rel="stylesheet" type="text/css" href="styles/epub3-css3-only.css" media="(min-device-width: 0px)"/> +</head> +<body> +<h1>#{sanitize_doctitle_xml doc, :pcdata}</h1> +<nav epub:type="toc" id="toc"> +<h2>#{doc.attr 'toc-title'}</h2>)] + lines << (nav_level spine, [(doc.attr 'toclevels', 1).to_i, 0].max) + lines << %(</nav> +</body> +</html>) + lines * EOL + end + + def nav_level items, depth, state = {} + lines = [] + lines << '<ol>' + items.each do |item| + #index = (state[:index] = (state.fetch :index, 0) + 1) + if item.context == :document + # NOTE we sanitize the chapter titles because we use formatting to control layout + item_label = sanitize_doctitle_xml item, :cdata + item_href = (state[:content_doc_href] = %(#{item.id || (item.attr 'docname')}.xhtml)) + else + item_label = sanitize_xml item.title, :pcdata + item_href = %(#{state[:content_doc_href]}##{item.id}) + end + lines << %(<li><a href="#{item_href}">#{item_label}</a>) + unless depth == 0 || (child_sections = item.sections).empty? + lines << (nav_level child_sections, depth - 1, state) + lines << '</li>' + else + lines[-1] = %(#{lines[-1]}</li>) + end + state.delete :content_doc_href if item.context == :document + end + lines << '</ol>' + lines * EOL + end + + # NOTE gepub doesn't support building a ncx TOC with depth > 1, so do it ourselves + def add_ncx_doc doc, spine_builder, spine + spine_builder.file 'toc.ncx' => (ncx_doc doc, spine).to_ios + spine_builder.id 'ncx' + nil + end + + def ncx_doc doc, spine + # TODO populate docAuthor element based on unique authors in work + lines = [%(<?xml version="1.0" encoding="utf-8"?> +<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="#{doc.attr 'lang', 'en'}"> +<head> +<meta name="dtb:uid" content="#{@book.identifier}"/> +%{depth} +<meta name="dtb:totalPageCount" content="0"/> +<meta name="dtb:maxPageNumber" content="0"/> +</head> +<docTitle><text>#{sanitize_doctitle_xml doc, :cdata}</text></docTitle> +<navMap>)] + lines << (ncx_level spine, [(doc.attr 'toclevels', 1).to_i, 0].max, state = {}) + lines[0] = lines[0].sub '%{depth}', %(<meta name="dtb:depth" content="#{state[:max_depth]}"/>) + lines << %(</navMap> +</ncx>) + lines * EOL + end + + def ncx_level items, depth, state = {} + lines = [] + state[:max_depth] = (state.fetch :max_depth, 0) + 1 + items.each do |item| + index = (state[:index] = (state.fetch :index, 0) + 1) + if item.context == :document + item_id = %(nav_#{index}) + item_label = sanitize_doctitle_xml item, :cdata + item_href = (state[:content_doc_href] = %(#{item.id || (item.attr 'docname')}.xhtml)) + else + item_id = %(nav_#{index}) + item_label = sanitize_xml item.title, :cdata + item_href = %(#{state[:content_doc_href]}##{item.id}) + end + lines << %(<navPoint id="#{item_id}" playOrder="#{index}">) + lines << %(<navLabel><text>#{item_label}</text></navLabel>) + lines << %(<content src="#{item_href}"/>) + unless depth == 0 || (child_sections = item.sections).empty? + lines << (ncx_level child_sections, depth - 1, state) + end + lines << %(</navPoint>) + state.delete :content_doc_href if item.context == :document + end + lines * EOL + end + def collect_keywords doc, spine ([doc] + spine).map do |item| if item.attr? 'keywords' (item.attr 'keywords').split CsvDelimiterRx else @@ -325,14 +443,27 @@ .gsub(/<script type="text\/javascript">.*?<\/script>\n?/m, '') .to_ios end end +module GepubResourceBuilderMixin + # Add missing method to builder to add a property to last defined item + def add_property property + @last_defined_item.add_property property + end + + # Add helper method to builder to check if property is set on last defined item + def property? property + (@last_defined_item['properties'] || []).include? property + end +end + class Packager KINDLEGEN = ENV['KINDLEGEN'] || 'kindlegen' EPUBCHECK = ENV['EPUBCHECK'] || %(epubcheck#{::Gem.win_platform? ? '.bat' : '.sh'}) EpubExtensionRx = /\.epub$/i + KindlegenCompression = ::Hash['0', '-c0', '1', '-c1', '2', '-c2', 'none', '-c0', 'standard', '-c1', 'huffdic', '-c2'] def initialize spine_doc, spine, format = :epub3, options = {} @document = spine_doc @spine = spine || [] @format = format @@ -343,12 +474,12 @@ spine = @spine fmt = @format target = options[:target] dest = File.dirname target - images = spine.map {|item| (item.find_by context: :image) || [] }.flatten - usernames = spine.map {|item| item.attr 'username' }.compact.uniq + images = spine.map {|item| item.find_by context: :image }.compact.flatten + .uniq {|img| %(#{(img.document.attr 'imagesdir', '.').chomp '/'}/#{img.attr 'target'}) } # FIXME authors should be aggregated already on parent document authors = if doc.attr? 'authors' (doc.attr 'authors').split(GepubBuilderMixin::CsvDelimiterRx).concat(spine.map {|item| item.attr 'author' }).uniq else [] @@ -370,11 +501,12 @@ unique_identifier doc.id, 'pub-identifier', 'uuid' end # replace with next line once the attributes argument is supported #unique_identifier doc.id, 'pub-id', 'uuid', 'scheme' => 'xsd:string' - title sanitized_doctitle(doc) + # NOTE we must use :plain_text here since gepub reencodes + title(sanitize_doctitle_xml doc, :plain_text) id 'pub-title' # FIXME this logic needs some work if doc.attr? 'publisher' publisher(publisher_name = doc.attr('publisher')) @@ -427,15 +559,18 @@ if doc.attr? 'copyright' rights(doc.attr 'copyright') end - #add_metadata 'ibooks:specified-fonts', true + #add_metadata 'ibooks:specified-fonts', true add_theme_assets doc add_cover_image doc - add_profile_images doc, usernames + if (doc.attr 'publication-type', 'book') != 'book' + usernames = spine.map {|item| item.attr 'username' }.compact.uniq + add_profile_images doc, usernames + end # QUESTION move add_content_images to add_content method? add_content_images doc, images add_content doc end @@ -462,33 +597,34 @@ puts %(Extracted #{fmt.upcase} to #{extract_dir}) if $VERBOSE end if fmt == :kf8 # QUESTION shouldn't we validate this epub file too? - distill_epub_to_mobi epub_file, target + distill_epub_to_mobi epub_file, target, options[:compress] elsif options[:validate] validate_epub epub_file end end - # QUESTION how to enable the -c2 flag? (enables ~3-5% compression) - def distill_epub_to_mobi epub_file, target + def distill_epub_to_mobi epub_file, target, compress kindlegen_cmd = KINDLEGEN unless ::File.executable? kindlegen_cmd require 'kindlegen' unless defined? ::Kindlegen kindlegen_cmd = ::Kindlegen.command end mobi_file = ::File.basename(target.sub EpubExtensionRx, '.mobi') - ::Open3.popen2e(::Shellwords.join [kindlegen_cmd, '-dont_append_source', '-o', mobi_file, epub_file]) {|input, output, wait_thr| + compress_flag = KindlegenCompression[compress ? (compress.empty? ? '1' : compress.to_s) : '0'] + cmd = [kindlegen_cmd, '-dont_append_source', compress_flag, '-o', mobi_file, epub_file].compact + ::Open3.popen2e(::Shellwords.join cmd) {|input, output, wait_thr| output.each {|line| puts line } unless $VERBOSE.nil? } puts %(Wrote MOBI to #{::File.join ::File.dirname(epub_file), mobi_file}) if $VERBOSE end def validate_epub epub_file epubcheck_cmd = EPUBCHECK unless ::File.executable? epubcheck_cmd - epubcheck_cmd = ::Gem.bin_path 'epubcheck', 'epubcheck' + epubcheck_cmd = ::Gem.bin_path 'epubcheck', 'epubcheck' end # NOTE epubcheck gem doesn't support epubcheck command options; enable -quiet once supported ::Open3.popen2e(::Shellwords.join [epubcheck_cmd, epub_file]) {|input, output, wait_thr| output.each {|line| puts line } unless $VERBOSE.nil? }