Sha256: 5257b45a1409e6aabe2fa7d2b5cfd5872d824a7010d8a5fda0d2351d4dec1ff8
Contents?: true
Size: 1.97 KB
Versions: 1
Compression:
Stored size: 1.97 KB
Contents
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com> # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA require "time" require "poppler" module ChupaText module Decomposers class PDF < Decomposer registry.register("pdf", self) def target?(data) data.extension == "pdf" or data.mime_type == "application/pdf" end def decompose(data) document = Poppler::Document.new(data.body) text = "" document.each do |page| text << page.get_text end text_data = TextData.new(text) text_data.uri = data.uri add_attribute(text_data, document, :title) add_attribute(text_data, document, :author) add_attribute(text_data, document, :subject) add_attribute(text_data, document, :keywords) add_attribute(text_data, document, :creator) add_attribute(text_data, document, :producer) add_attribute(text_data, document, :creation_date) yield(text_data) end private def add_attribute(text_data, document, name) value = document.send(name) return if value.nil? attribute_name = name.to_s.gsub(/_/, "-") value = Time.at(value).utc.iso8601 if value.is_a?(Integer) text_data[attribute_name] = value end end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
chupa-text-decomposer-pdf-1.0.0 | lib/chupa-text/decomposers/pdf.rb |