Sha256: efb5271399fba2247146f38f22cd73410813bfa8a9e13c9182602f6b81279801

Contents?: true

Size: 1.08 KB

Versions: 4

Compression:

Stored size: 1.08 KB

Contents

module Bookshelf
  module Parser
    class PDF < Base
      def parse
        apply_footnotes!
        spawn_command ["prince", with_footnotes_file.to_s, "-o", pdf_file.to_s]
      end

      def apply_footnotes!
        html = Nokogiri::HTML(html_file.read)

        # https://github.com/sparklemotion/nokogiri/issues/339
        html.css("html").first.tap do |element|
          next unless element
          element.delete("xmlns")
          element.delete("xml:lang")
        end

        html.css("p.footnote[id^='_fn']").each do |fn|
          fn.node_name = "span"
          fn.set_attribute("class", "fn")

          html.css("[href='##{fn["id"]}']").each do |link|
            link.add_next_sibling(fn)
          end
        end

        File.open(with_footnotes_file, "w") {|f| f << html.to_xhtml}
      end

      def with_footnotes_file
        Bookshelf.root_dir.join("output/#{name}.pdf.html")
      end

      def html_file
        Bookshelf.root_dir.join("output/#{name}.html")
      end

      def pdf_file
        Bookshelf.root_dir.join("output/#{name}.pdf")
      end
    end
  end
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
bookshelf-1.2.1 lib/bookshelf/parser/pdf.rb
bookshelf-1.2.0 lib/bookshelf/parser/pdf.rb
bookshelf-1.1.0 lib/bookshelf/parser/pdf.rb
bookshelf-1.0.0 lib/bookshelf/parser/pdf.rb