module Softcover
module Builders
class Epub < Builder
include Softcover::Output
def build!(options={})
@preview = options[:preview]
Softcover::Builders::Html.new.build!(preserve_tex: true)
if manifest.markdown?
self.manifest = Softcover::BookManifest.new(source: :polytex,
origin: :markdown)
end
remove_html
create_directories
write_mimetype
write_container_xml
write_toc
write_nav
copy_image_files
write_html(options)
write_contents
create_style_files
make_epub(options)
move_epub
end
# Returns true if generating a book preview.
def preview?
!!@preview
end
# Removes HTML.
# All the HTML is generated, so this clears out any unused files.
def remove_html
FileUtils.rm(Dir.glob(path('epub/OEBPS/html/*.html')))
end
def create_directories
mkdir('epub')
mkdir(path('epub/OEBPS'))
mkdir(path('epub/OEBPS/styles'))
mkdir(path('epub/META-INF'))
mkdir('ebooks')
end
# Writes the mimetype file.
# This is required by the EPUB standard.
def write_mimetype
File.write(path('epub/mimetype'), 'application/epub+zip')
end
# Writes the container XML file.
# This is required by the EPUB standard.
def write_container_xml
File.write(path('epub/META-INF/container.xml'), container_xml)
end
# Writes the content.opf file.
# This is required by the EPUB standard.
def write_contents
File.write(path('epub/OEBPS/content.opf'), content_opf)
end
# Returns the chapters to write (accounting for previews).
def chapters
preview? ? manifest.preview_chapters : manifest.chapters
end
# Writes the HTML for the EPUB.
# Included is a math detector that processes the page with MathJax
# (via page.js) so that math can be included in EPUB (and thence MOBI).
def write_html(options={})
images_dir = File.join('epub', 'OEBPS', 'images')
texmath_dir = File.join(images_dir, 'texmath')
mkdir images_dir
mkdir texmath_dir
File.write(path('epub/OEBPS/cover.html'), cover_page)
pngs = []
chapters.each_with_index do |chapter, i|
target_filename = path("epub/OEBPS/#{chapter.fragment_name}")
File.open(target_filename, 'w') do |f|
content = File.read(path("html/#{chapter.fragment_name}"))
doc = strip_attributes(Nokogiri::HTML(content))
inner_html = doc.at_css('body').children.to_xhtml
if math?(inner_html)
html = html_with_math(chapter, images_dir, texmath_dir, pngs,
options)
html ||= inner_html # handle case of spurious math detection
else
html = inner_html
end
f.write(chapter_template("Chapter #{i}", html))
end
end
# Clean up unused PNGs.
png_files = Dir[path("#{texmath_dir}/*.png")]
(png_files - pngs).each do |f|
if File.exist?(f)
puts "Removing unused PNG #{f}" unless options[:silent]
FileUtils.rm(f)
end
end
end
# Returns HTML for HTML source that includes math.
# As a side-effect, html_with_math creates PNGs corresponding to any
# math in the given source. The technique involves using PhantomJS to
# hit the HTML source for each page containing math to create SVGs
# for every math element. Since ereader support for SVGs is spotty,
# they are then converted to PNGs using Inkscape. The filenames are
# SHAs of their contents, which arranges both for unique filenames
# and for automatic disk caching.
def html_with_math(chapter, images_dir, texmath_dir, pngs, options={})
content = File.read(File.join("html", "#{chapter.slug}.html"))
pagejs = "#{File.dirname(__FILE__)}/utils/page.js"
url = "file://#{Dir.pwd}/html/#{chapter.slug}.html"
cmd = "#{phantomjs} #{pagejs} #{url}"
silence { silence_stream(STDERR) { system cmd } }
# Sometimes in tests the phantomjs_source.html file is missing.
# It shouldn't ever happen, but it does no harm to skip it.
return nil unless File.exist?('phantomjs_source.html')
raw_source = File.read('phantomjs_source.html')
source = strip_attributes(Nokogiri::HTML(raw_source))
rm 'phantomjs_source.html'
# Remove the first body div, which is the hidden MathJax SVGs.
if (mathjax_svgs = source.at_css('body div'))
mathjax_svgs.remove
else
# There's not actually any math, so return nil.
return nil
end
# Remove all the unneeded raw TeX displays.
source.css('script').each(&:remove)
# Remove all the MathJax preview spans.
source.css('MathJax_Preview').each(&:remove)
# Suck out all the SVGs
svgs = source.css('div#book svg')
frames = source.css('span.MathJax_SVG')
svgs.zip(frames).each do |svg, frame|
# Save the SVG file.
svg['viewBox'] = svg['viewbox']
svg.remove_attribute('viewbox')
first_child = frame.children.first
first_child.replace(svg) unless svg == first_child
output = svg.to_xhtml
svg_filename = File.join(texmath_dir, "#{digest(output)}.svg")
File.write(svg_filename, output)
# Convert to PNG.
png_filename = svg_filename.sub('.svg', '.png')
pngs << png_filename
unless File.exist?(png_filename)
unless options[:silent] || options[:quiet]
puts "Creating #{png_filename}"
end
svg_height = svg['style'].scan(/height: (.*?);/).flatten.first
scale_factor = 8 # This scale factor turns out to look good.
h = scale_factor * svg_height.to_f
cmd = "#{inkscape} -f #{svg_filename} -e #{png_filename} -h #{h}pt"
if options[:silent]
silence { silence_stream(STDERR) { system cmd } }
else
silence_stream(STDERR) { system cmd }
end
end
rm svg_filename
png = Nokogiri::XML::Node.new('img', source)
png['src'] = File.join('images', 'texmath',
File.basename(png_filename))
png['alt'] = png_filename.sub('.png', '')
svg.replace(png)
end
source.at_css('div#book').children.to_xhtml
end
# Returns the PhantomJS executable (if available).
def phantomjs
filename = `which phantomjs`.chomp
message = "Install PhantomJS (http://phantomjs.org/)"
@phantomjs ||= executable(filename, message)
end
# Returns the Inkscape executable (if available).
def inkscape
filename = '/Applications/Inkscape.app/Contents/Resources/bin/inkscape'
message = "Install Inkscape (http://inkscape.org/)"
@inkscape ||= executable(filename, message)
end
# Strip attributes that are invalid in EPUB documents.
def strip_attributes(doc)
attrs = %w[data-tralics-id data-label data-number data-chapter
role aria-readonly]
doc.tap do
attrs.each do |attr|
doc.xpath("//@#{attr}").remove
end
end
end
# Returns true if a string appears to have LaTeX math.
# We detect math via opening math commands: \(, \[, and \begin{equation}
# This gives a false positive when math is included in verbatim
# environments and nowhere else, but it does little harm (requiring only
# an unnecessary call to page.js).
def math?(string)
!!string.match(/(?:\\\(|\\\[|\\begin{equation})/)
end
def create_style_files
html_styles = File.join('html', 'stylesheets')
epub_styles = File.join('epub', 'OEBPS', 'styles')
FileUtils.cp(File.join(html_styles, 'pygments.css'), epub_styles)
# Copy over the EPUB-specific CSS.
template_dir = File.join(File.dirname(__FILE__), '..', 'template')
epub_css = File.join(template_dir, epub_styles, 'epub.css')
FileUtils.cp(epub_css, epub_styles)
# For some reason, EPUB books hate the #book ids in the stylesheet
# (i.e., such books fail to validate), so remove them.
polytexnic_css = File.read(File.join(html_styles, 'softcover.css'))
polytexnic_css.gsub!(/\s*#book\s+/, '')
File.write(File.join(epub_styles, 'softcover.css'), polytexnic_css)
end
# Copies the image files from the HTML version of the document.
# We remove PDF images, which are valid in PDF documents but not in EPUB.
def copy_image_files
FileUtils.cp_r(File.join('html', 'images'),
File.join('epub', 'OEBPS'))
File.delete(*Dir['epub/OEBPS/images/**/*.pdf'])
end
# Make the EPUB, which is basically just a zipped HTML file.
def make_epub(options={})
filename = manifest.filename
zip_filename = filename + '.zip'
base_file = "zip -X0 #{zip_filename} mimetype"
zip = "zip -rDXg9"
meta_info = "#{zip} #{zip_filename} META-INF -x \*.DS_Store -x mimetype"
main_info = "#{zip} #{zip_filename} OEBPS -x \*.DS_Store \*.gitkeep"
rename = "mv #{zip_filename} #{filename}.epub"
commands = [base_file, meta_info, main_info, rename]
command = commands.join(' && ')
Dir.chdir('epub') do
if Softcover.test? || options[:quiet] || options[:silent]
silence { system(command) }
else
system(command)
end
end
end
# Move the completed EPUB book to the `ebooks` directory.
# Note that we handle the case of a preview book as well.
def move_epub
origin = manifest.filename
target = preview? ? origin + '-preview' : origin
FileUtils.mv(File.join('epub', "#{origin}.epub"),
File.join('ebooks', "#{target}.epub"))
end
# Writes the Table of Contents.
# This is required by the EPUB standard.
def write_toc
File.write('epub/OEBPS/toc.ncx', toc_ncx)
end
# Writes the navigation file.
# This is required by the EPUB standard.
def write_nav
File.write('epub/OEBPS/nav.html', nav_html)
end
def container_xml
%(