module Softcover module Builders class Epub < Builder include Softcover::Output def build!(options={}) @preview = options[:preview] Softcover::Builders::Html.new.build!(preserve_tex: true) if manifest.markdown? self.manifest = Softcover::BookManifest.new(source: :polytex, origin: :markdown) end remove_html create_directories write_mimetype write_container_xml write_toc write_nav copy_image_files write_html(options) write_contents create_style_files make_epub(options) move_epub end # Returns true if generating a book preview. def preview? !!@preview end # Removes HTML. # All the HTML is generated, so this clears out any unused files. def remove_html FileUtils.rm(Dir.glob(path('epub/OEBPS/html/*.html'))) end def create_directories mkdir('epub') mkdir(path('epub/OEBPS')) mkdir(path('epub/OEBPS/styles')) mkdir(path('epub/META-INF')) mkdir('ebooks') end # Writes the mimetype file. # This is required by the EPUB standard. def write_mimetype File.write(path('epub/mimetype'), 'application/epub+zip') end # Writes the container XML file. # This is required by the EPUB standard. def write_container_xml File.write(path('epub/META-INF/container.xml'), container_xml) end # Writes the content.opf file. # This is required by the EPUB standard. def write_contents File.write(path('epub/OEBPS/content.opf'), content_opf) end # Returns the chapters to write (accounting for previews). def chapters preview? ? manifest.preview_chapters : manifest.chapters end # Writes the HTML for the EPUB. # Included is a math detector that processes the page with MathJax # (via page.js) so that math can be included in EPUB (and thence MOBI). def write_html(options={}) images_dir = File.join('epub', 'OEBPS', 'images') texmath_dir = File.join(images_dir, 'texmath') mkdir images_dir mkdir texmath_dir File.write(path('epub/OEBPS/cover.html'), cover_page) pngs = [] chapters.each_with_index do |chapter, i| target_filename = path("epub/OEBPS/#{chapter.fragment_name}") File.open(target_filename, 'w') do |f| content = File.read(path("html/#{chapter.fragment_name}")) doc = strip_attributes(Nokogiri::HTML(content)) inner_html = doc.at_css('body').children.to_xhtml if math?(inner_html) html = html_with_math(chapter, images_dir, texmath_dir, pngs, options) html ||= inner_html # handle case of spurious math detection else html = inner_html end f.write(chapter_template("Chapter #{i}", html)) end end # Clean up unused PNGs. png_files = Dir[path("#{texmath_dir}/*.png")] (png_files - pngs).each do |f| if File.exist?(f) puts "Removing unused PNG #{f}" unless options[:silent] FileUtils.rm(f) end end end # Returns HTML for HTML source that includes math. # As a side-effect, html_with_math creates PNGs corresponding to any # math in the given source. The technique involves using PhantomJS to # hit the HTML source for each page containing math to create SVGs # for every math element. Since ereader support for SVGs is spotty, # they are then converted to PNGs using Inkscape. The filenames are # SHAs of their contents, which arranges both for unique filenames # and for automatic disk caching. def html_with_math(chapter, images_dir, texmath_dir, pngs, options={}) content = File.read(File.join("html", "#{chapter.slug}.html")) pagejs = "#{File.dirname(__FILE__)}/utils/page.js" url = "file://#{Dir.pwd}/html/#{chapter.slug}.html" cmd = "#{phantomjs} #{pagejs} #{url}" silence { silence_stream(STDERR) { system cmd } } # Sometimes in tests the phantomjs_source.html file is missing. # It shouldn't ever happen, but it does no harm to skip it. return nil unless File.exist?('phantomjs_source.html') raw_source = File.read('phantomjs_source.html') source = strip_attributes(Nokogiri::HTML(raw_source)) rm 'phantomjs_source.html' # Remove the first body div, which is the hidden MathJax SVGs. if (mathjax_svgs = source.at_css('body div')) mathjax_svgs.remove else # There's not actually any math, so return nil. return nil end # Remove all the unneeded raw TeX displays. source.css('script').each(&:remove) # Remove all the MathJax preview spans. source.css('MathJax_Preview').each(&:remove) # Suck out all the SVGs svgs = source.css('div#book svg') frames = source.css('span.MathJax_SVG') svgs.zip(frames).each do |svg, frame| # Save the SVG file. svg['viewBox'] = svg['viewbox'] svg.remove_attribute('viewbox') first_child = frame.children.first first_child.replace(svg) unless svg == first_child output = svg.to_xhtml svg_filename = File.join(texmath_dir, "#{digest(output)}.svg") File.write(svg_filename, output) # Convert to PNG. png_filename = svg_filename.sub('.svg', '.png') pngs << png_filename unless File.exist?(png_filename) unless options[:silent] || options[:quiet] puts "Creating #{png_filename}" end svg_height = svg['style'].scan(/height: (.*?);/).flatten.first scale_factor = 8 # This scale factor turns out to look good. h = scale_factor * svg_height.to_f cmd = "#{inkscape} -f #{svg_filename} -e #{png_filename} -h #{h}pt" if options[:silent] silence { silence_stream(STDERR) { system cmd } } else silence_stream(STDERR) { system cmd } end end rm svg_filename png = Nokogiri::XML::Node.new('img', source) png['src'] = File.join('images', 'texmath', File.basename(png_filename)) png['alt'] = png_filename.sub('.png', '') svg.replace(png) end source.at_css('div#book').children.to_xhtml end # Returns the PhantomJS executable (if available). def phantomjs filename = `which phantomjs`.chomp message = "Install PhantomJS (http://phantomjs.org/)" @phantomjs ||= executable(filename, message) end # Returns the Inkscape executable (if available). def inkscape filename = '/Applications/Inkscape.app/Contents/Resources/bin/inkscape' message = "Install Inkscape (http://inkscape.org/)" @inkscape ||= executable(filename, message) end # Strip attributes that are invalid in EPUB documents. def strip_attributes(doc) attrs = %w[data-tralics-id data-label data-number data-chapter role aria-readonly] doc.tap do attrs.each do |attr| doc.xpath("//@#{attr}").remove end end end # Returns true if a string appears to have LaTeX math. # We detect math via opening math commands: \(, \[, and \begin{equation} # This gives a false positive when math is included in verbatim # environments and nowhere else, but it does little harm (requiring only # an unnecessary call to page.js). def math?(string) !!string.match(/(?:\\\(|\\\[|\\begin{equation})/) end def create_style_files html_styles = File.join('html', 'stylesheets') epub_styles = File.join('epub', 'OEBPS', 'styles') FileUtils.cp(File.join(html_styles, 'pygments.css'), epub_styles) # Copy over the EPUB-specific CSS. template_dir = File.join(File.dirname(__FILE__), '..', 'template') epub_css = File.join(template_dir, epub_styles, 'epub.css') FileUtils.cp(epub_css, epub_styles) # For some reason, EPUB books hate the #book ids in the stylesheet # (i.e., such books fail to validate), so remove them. polytexnic_css = File.read(File.join(html_styles, 'softcover.css')) polytexnic_css.gsub!(/\s*#book\s+/, '') File.write(File.join(epub_styles, 'softcover.css'), polytexnic_css) end # Copies the image files from the HTML version of the document. # We remove PDF images, which are valid in PDF documents but not in EPUB. def copy_image_files FileUtils.cp_r(File.join('html', 'images'), File.join('epub', 'OEBPS')) File.delete(*Dir['epub/OEBPS/images/**/*.pdf']) end # Make the EPUB, which is basically just a zipped HTML file. def make_epub(options={}) filename = manifest.filename zip_filename = filename + '.zip' base_file = "zip -X0 #{zip_filename} mimetype" zip = "zip -rDXg9" meta_info = "#{zip} #{zip_filename} META-INF -x \*.DS_Store -x mimetype" main_info = "#{zip} #{zip_filename} OEBPS -x \*.DS_Store \*.gitkeep" rename = "mv #{zip_filename} #{filename}.epub" commands = [base_file, meta_info, main_info, rename] command = commands.join(' && ') Dir.chdir('epub') do if Softcover.test? || options[:quiet] || options[:silent] silence { system(command) } else system(command) end end end # Move the completed EPUB book to the `ebooks` directory. # Note that we handle the case of a preview book as well. def move_epub origin = manifest.filename target = preview? ? origin + '-preview' : origin FileUtils.mv(File.join('epub', "#{origin}.epub"), File.join('ebooks', "#{target}.epub")) end # Writes the Table of Contents. # This is required by the EPUB standard. def write_toc File.write('epub/OEBPS/toc.ncx', toc_ncx) end # Writes the navigation file. # This is required by the EPUB standard. def write_nav File.write('epub/OEBPS/nav.html', nav_html) end def container_xml %( ) end # Returns the content configuration file. def content_opf title = manifest.title author = manifest.author copyright = manifest.copyright uuid = manifest.uuid man_ch = chapters.map do |chapter| %() end toc_ch = chapters.map do |chapter| %() end image_files = Dir['epub/OEBPS/images/**/*'].select { |f| File.file?(f) } images = image_files.map do |image| ext = File.extname(image).sub('.', '') # e.g., 'png' # Strip off the leading 'epub/OEBPS'. sep = File::SEPARATOR href = image.split(sep)[2..-1].join(sep) # Define an id based on the filename. # Prefix with 'img-' in case the filname starts with an # invalid character such as a number. label = File.basename(image).gsub('.', '-') id = "img-#{label}" %() end %( #{title} en Copyright (c) #{copyright} #{author} #{author} Softcover urn:uuid:#{uuid} #{Time.now.strftime('%Y-%m-%dT%H:%M:%S')}Z #{man_ch.join("\n")} #{images.join("\n")} #{toc_ch.join("\n")} ) end def cover_page %( Cover
cover image
) end # Returns the Table of Contents for the spine. def toc_ncx title = manifest.title chapter_nav = [] offset = preview? ? manifest.preview_chapter_range.first : 0 chapters.each_with_index do |chapter, i| n = i + offset chapter_nav << %() chapter_nav << %( #{chapter_name(n)}) chapter_nav << %( ) chapter_nav << %() end %( #{title} #{chapter_nav.join("\n")} ) end def chapter_name(n) n == 0 ? "Frontmatter" : "Chapter #{n}" end # Returns the nav HTML content. def nav_html title = manifest.title nav_list = manifest.chapters.map do |chapter| %(
  • #{chapter.title}
  • ) end %( #{title} ) end # Returns the HTML template for a chapter. def chapter_template(title, content) %( #{title} #{content} ) end end end end