Sha256: 7509f0bf51365cd13210dd3d50a9dfad0f1372ddef0e6f6dea4617809e3fcc95

Contents?: true

Size: 983 Bytes

Versions: 3

Compression:

Stored size: 983 Bytes

Contents

require "optparse"
require "epub/parser"

using EPUB::Parser::XMLDocument::Refinements

def main(argv)
  option_parser = OptionParser.new {|opt|
    opt.version = EPUB::Parser::VERSION
    opt.banner = <<EOB
Extracts text from EPUB and output

Usage: #{opt.program_name} EPUBFILE
EOB
  }
  options = option_parser.getopts(argv)
  path = argv.shift
  raise "Specify EPUBFILE" unless path
  EPUB::Parser.parse(path).each_page_on_spine do |page|
    unless page.xhtml?
      $stderr.puts "Cannot parse non-XHTML document(#{page.media_type}): #{page.entry_name}"
      next
    end
    doc = EPUB::Parser::XMLDocument.new(page.read)
    body = doc.each_element_by_xpath("//xhtml:body", EPUB::NAMESPACES).first
    unless body
      $stderr.puts "body element doesn't exist in #{page.entry_name}"
      next
    end
    puts body.content
    # handle spaces
    # handle img@alt
  end
rescue => err
  $stderr.puts "Error: #{err}"
  $stderr.puts
  abort option_parser.help
end

main(ARGV)

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
epub-parser-0.4.8 bin/epubtotext
epub-parser-0.4.7 bin/epubtotext
epub-parser-0.4.6 bin/epubtotext