module Softcover
module EpubUtils
# Returns the name of the cover file.
# We support (in order) JPG/JPEG, PNG, and TIFF.
def cover_img
extensions = %w[jpg jpeg png tiff]
extensions.each do |ext|
origin = "images/cover.#{ext}"
target = "#{images_dir}/cover.#{ext}"
if File.exist?(origin)
FileUtils.cp(origin, target)
return File.basename(target)
end
end
return false
end
# Returns true when producing a cover.
# We include a cover when not producing an Amazon-specific book
# as long as there's a cover image. (When uploading a book to
# Amazon KDP, the cover gets uploaded separately, so the MOBI file itself
# should have not have a cover.)
def cover?(options={})
!options[:amazon] && cover_img
end
def cover_img_path
path("#{images_dir}/#{cover_img}")
end
def images_dir
path('epub/OEBPS/images')
end
def escape(string)
CGI.escape_html(string)
end
# Returns a content.opf file based on a valid template.
def content_opf_template(title, copyright, author, uuid, cover_id,
toc_chapters, manifest_chapters, images)
if cover_id
cover_meta = %()
cover_html = ''
cover_ref = ''
else
cover_meta = cover_html = cover_ref = ''
end
%(
#{escape(title)}enCopyright (c) #{copyright} #{escape(author)}#{author}Softcoverurn:uuid:#{uuid}
#{Time.now.strftime('%Y-%m-%dT%H:%M:%S')}Z
#{cover_meta}
#{cover_html}
#{manifest_chapters.join("\n")}
#{images.join("\n")}
#{cover_ref}
#{toc_chapters.join("\n")}
)
end
# Returns a toc.ncx file based on a valid template.
def toc_ncx_template(title, uuid, chapter_nav)
%(
#{escape(title)}
#{chapter_nav.join("\n")}
)
end
# Returns the navigation HTML based on a valid template.
def nav_html_template(title, nav_list)
%(
#{title}
)
end
end
module Builders
class Epub < Builder
include Softcover::Output
include Softcover::EpubUtils
def build!(options={})
@preview = options[:preview]
Softcover::Builders::Html.new.build!
if manifest.markdown?
opts = options.merge({ source: :polytex, origin: :markdown })
self.manifest = Softcover::BookManifest.new(opts)
end
remove_html
remove_images
create_directories
write_mimetype
write_container_xml
write_ibooks_xml
write_toc
write_nav
copy_image_files
write_html(options)
write_contents(options)
create_style_files(options)
make_epub(options)
move_epub
end
# Returns true if generating a book preview.
def preview?
!!@preview
end
# Removes HTML.
# All the HTML is generated, so this clears out any unused files.
def remove_html
FileUtils.rm(Dir.glob(path('epub/OEBPS/*.html')))
end
# Removes images in case they are stale.
def remove_images
rm_r images_dir
end
def create_directories
mkdir('epub')
mkdir(path('epub/OEBPS'))
mkdir(path('epub/OEBPS/styles'))
mkdir(path('epub/META-INF'))
mkdir(images_dir)
mkdir('ebooks')
end
# Writes the mimetype file.
# This is required by the EPUB standard.
def write_mimetype
File.write(path('epub/mimetype'), 'application/epub+zip')
end
# Writes the container XML file.
# This is required by the EPUB standard.
def write_container_xml
File.write(path('epub/META-INF/container.xml'), container_xml)
end
# Writes iBooks-specific XML.
# This allows proper display of monospace fonts in code samples, among
# other things.
def write_ibooks_xml
xml_filename = 'com.apple.ibooks.display-options.xml'
File.write(path("epub/META-INF/#{xml_filename}"), ibooks_xml)
end
# Writes the content.opf file.
# This is required by the EPUB standard.
def write_contents(options={})
File.write(path('epub/OEBPS/content.opf'), content_opf(options))
end
# Returns the chapters to write.
def chapters
preview? ? manifest.preview_chapters : manifest.chapters
end
# Writes the HTML for the EPUB.
# Included is a math detector that processes the page with MathJax
# (via page.js) so that math can be included in EPUB (and thence MOBI).
def write_html(options={})
texmath_dir = File.join(images_dir, 'texmath')
mkdir images_dir
mkdir texmath_dir
File.write(path('epub/OEBPS/cover.html'), cover_page) if cover?(options)
pngs = []
chapters.each_with_index do |chapter, i|
target_filename = path("epub/OEBPS/#{chapter.fragment_name}")
File.open(target_filename, 'w') do |f|
content = File.read(path("html/#{chapter.fragment_name}"))
doc = strip_attributes(Nokogiri::HTML(content))
body = doc.at_css('body')
if body.nil?
$stderr.puts "\nError: Document not built due to empty chapter"
$stderr.puts "Chapters must include a title using the Markdown"
$stderr.puts " # This is a chapter"
$stderr.puts "or the LaTeX"
$stderr.puts " \\chapter{This is a chapter}"
exit(1)
end
inner_html = body.children.to_xhtml
if math?(inner_html)
html = html_with_math(chapter, images_dir, texmath_dir, pngs,
options)
html ||= inner_html # handle case of spurious math detection
else
html = inner_html
end
f.write(chapter_template("Chapter #{i}", html))
end
end
# Clean up unused PNGs.
png_files = Dir[path("#{texmath_dir}/*.png")]
(png_files - pngs).each do |f|
if File.exist?(f)
puts "Removing unused PNG #{f}" unless options[:silent]
FileUtils.rm(f)
end
end
end
# Returns HTML for HTML source that includes math.
# As a side-effect, html_with_math creates PNGs corresponding to any
# math in the given source. The technique involves using PhantomJS to
# hit the HTML source for each page containing math to create SVGs
# for every math element. Since ereader support for SVGs is spotty,
# they are then converted to PNGs using Inkscape. The filenames are
# SHAs of their contents, which arranges both for unique filenames
# and for automatic disk caching.
def html_with_math(chapter, images_dir, texmath_dir, pngs, options={})
content = File.read(File.join("html", "#{chapter.slug}.html"))
pagejs = "#{File.dirname(__FILE__)}/utils/page.js"
url = "file://#{Dir.pwd}/html/#{chapter.slug}.html"
cmd = "#{phantomjs} #{pagejs} #{url}"
silence { silence_stream(STDERR) { system cmd } }
# Sometimes in tests the phantomjs_source.html file is missing.
# It shouldn't ever happen, but it does no harm to skip it.
return nil unless File.exist?('phantomjs_source.html')
raw_source = File.read('phantomjs_source.html')
source = strip_attributes(Nokogiri::HTML(raw_source))
# rm 'phantomjs_source.html'
# Remove the first body div, which is the hidden MathJax SVGs.
if (mathjax_svgs = source.at_css('body div'))
mathjax_svgs.remove
else
# There's not actually any math, so return nil.
return nil
end
# Remove all the unneeded raw TeX displays.
source.css('script').each(&:remove)
# Remove all the MathJax preview spans.
source.css('MathJax_Preview').each(&:remove)
# Suck out all the SVGs
svgs = source.css('div#book svg')
frames = source.css('span.MathJax_SVG')
svgs.zip(frames).each do |svg, frame|
# Save the SVG file.
svg['viewBox'] = svg['viewbox']
svg.remove_attribute('viewbox')
first_child = frame.children.first
first_child.replace(svg) unless svg == first_child
output = svg.to_xhtml
svg_filename = File.join(texmath_dir, "#{digest(output)}.svg")
File.write(svg_filename, output)
# Convert to PNG.
png_filename = svg_filename.sub('.svg', '.png')
pngs << png_filename
unless File.exist?(png_filename)
unless options[:silent] || options[:quiet]
puts "Creating #{png_filename}"
end
svg_height = svg['style'].scan(/height: (.*?);/).flatten.first
scale_factor = 8 # This scale factor turns out to look good.
h = scale_factor * svg_height.to_f
cmd = "#{inkscape} -f #{svg_filename} -e #{png_filename} -h #{h}pt"
if options[:silent]
silence { silence_stream(STDERR) { system cmd } }
else
silence_stream(STDERR) { system cmd }
end
end
rm svg_filename
png = Nokogiri::XML::Node.new('img', source)
png['src'] = File.join('images', 'texmath',
File.basename(png_filename))
png['alt'] = png_filename.sub('.png', '')
svg.replace(png)
end
# Make references relative.
source.css('a.hyperref').each do |ref_node|
ref_node['href'] = ref_node['href'].sub('.html', '_fragment.html')
end
source.at_css('div#book').children.to_xhtml
end
# Returns the PhantomJS executable (if available).
def phantomjs
@phantomjs ||= executable(dependency_filename(:phantomjs))
end
# Returns the Inkscape executable (if available).
def inkscape
@inkscape ||= executable(dependency_filename(:inkscape))
end
# Strip attributes that are invalid in EPUB documents.
def strip_attributes(doc)
attrs = %w[data-tralics-id data-label data-number data-chapter
role aria-readonly target]
doc.tap do
attrs.each do |attr|
doc.xpath("//@#{attr}").remove
end
end
end
# Returns true if a string appears to have LaTeX math.
# We detect math via opening math commands: \(, \[, and \begin{equation}
# This gives a false positive when math is included in verbatim
# environments and nowhere else, but it does little harm (requiring only
# an unnecessary call to page.js).
# The regex includes both standard (La)TeX and amsmath extensions.
def math?(string)
math_regex = /(?:\\\(
|\\\[
|\\begin{(?:equation
|align
|gather
|flalign
|multline
|alignat
|split))/x
!!string.match(math_regex)
end
def create_style_files(options)
html_styles = File.join('html', 'stylesheets')
epub_styles = File.join('epub', 'OEBPS', 'styles')
FileUtils.cp(File.join(html_styles, 'pygments.css'), epub_styles)
File.write(File.join(epub_styles, 'softcover.css'),
clean_book_id(path("#{html_styles}/softcover.css")))
# Copy over the EPUB-specific CSS.
template_dir = Softcover::Utils.template_dir(options)
epub_css = File.join(template_dir, epub_styles, 'epub.css')
FileUtils.cp(epub_css, epub_styles)
# Copy over custom CSS.
File.write(File.join(epub_styles, 'custom.css'),
clean_book_id(path("#{html_styles}/custom.css")))
end
# Removes the '#book' CSS id.
# For some reason, EPUB books hate the #book ids in the stylesheet
# (i.e., such books fail to validate), so remove them.
def clean_book_id(filename)
File.read(filename).gsub(/#book /, '')
end
# Copies the image files from the HTML version of the document.
def copy_image_files
# Copy over all images to guarantee the same directory structure.
FileUtils.cp_r(File.join('html', 'images'),
File.join('epub', 'OEBPS'))
# Parse the full HTML file with Nokogiri to get images actually used.
html = File.read(manifest.full_html_file)
html_image_filenames = Nokogiri::HTML(html).css('img').map do |node|
node.attributes['src'].value
end
# Form the corresponding EPUB image paths.
used_image_filenames = html_image_filenames.map do |filename|
"epub/OEBPS/#{filename}"
end.to_set
# Delete unused images.
Dir.glob("epub/OEBPS/images/**/*").each do |image|
next if File.directory?(image)
rm image unless used_image_filenames.include?(image)
end
end
# Make the EPUB, which is basically just a zipped HTML file.
def make_epub(options={})
filename = manifest.filename
zfname = filename + '.zip'
base_file = "#{zip} -X0 #{zfname} mimetype"
fullzip = "#{zip} -rDXg9"
meta_info = "#{fullzip} #{zfname} META-INF -x \*.DS_Store -x mimetype"
main_info = "#{fullzip} #{zfname} OEBPS -x \*.DS_Store \*.gitkeep"
rename = "mv #{zfname} #{filename}.epub"
commands = [base_file, meta_info, main_info, rename]
command = commands.join(' && ')
Dir.chdir('epub') do
if Softcover.test? || options[:quiet] || options[:silent]
silence { system(command) }
else
system(command)
end
end
end
def zip
@zip ||= executable(dependency_filename(:zip))
end
# Move the completed EPUB book to the `ebooks` directory.
# Note that we handle the case of a preview book as well.
def move_epub
origin = manifest.filename
target = preview? ? origin + '-preview' : origin
FileUtils.mv(File.join('epub', "#{origin}.epub"),
File.join('ebooks', "#{target}.epub"))
end
# Writes the Table of Contents.
# This is required by the EPUB standard.
def write_toc
File.write('epub/OEBPS/toc.ncx', toc_ncx)
end
# Writes the navigation file.
# This is required by the EPUB standard.
def write_nav
File.write('epub/OEBPS/nav.html', nav_html)
end
def container_xml
%(
)
end
def ibooks_xml
%(
)
end
# Returns the content configuration file.
def content_opf(options={})
man_ch = chapters.map do |chapter|
%()
end
toc_ch = chapters.map do |chapter|
%()
end
image_files = Dir['epub/OEBPS/images/**/*'].select { |f| File.file?(f) }
images = image_files.map do |image|
ext = File.extname(image).sub('.', '') # e.g., 'png'
ext = 'jpeg' if ext == 'jpg'
# Strip off the leading 'epub/OEBPS'.
sep = File::SEPARATOR
href = image.split(sep)[2..-1].join(sep)
# Define an id based on the filename.
# Prefix with 'img-' in case the filname starts with an
# invalid character such as a number.
label = File.basename(image).gsub('.', '-')
id = "img-#{label}"
%()
end
content_opf_template(manifest.title, manifest.copyright,
manifest.author, manifest.uuid, cover_id(options),
toc_ch, man_ch, images)
end
def cover_page
%(
Cover
)
end
def cover_id(options)
cover?(options) ? "img-#{cover_img.sub('.', '-')}" : nil
end
# Returns the Table of Contents for the spine.
def toc_ncx
chapter_nav = []
chapters.each_with_index do |chapter, n|
chapter_nav << %()
chapter_nav << %( #{chapter_name(n)})
chapter_nav << %( )
chapter_nav << %()
end
toc_ncx_template(manifest.title, manifest.uuid, chapter_nav)
end
def chapter_name(n)
n == 0 ? language_labels["frontmatter"] : chapter_label(n)
end
# Returns the nav HTML content.
def nav_html
nav_list = manifest.chapters.map do |chapter|
element = preview? ? chapter.title : nav_link(chapter)
%(
#{element}
)
end
nav_html_template(manifest.title, nav_list)
end
# Returns a navigation link for the chapter.
def nav_link(chapter)
%(#{chapter.html_title})
end
# Returns the HTML template for a chapter.
def chapter_template(title, content)
%(
#{title}
#{content}
)
end
end
end
end