Sha256: 65a207f90b7701c8cc8ce776125440d12db72e0d25d8f2adf12d0281341e0fc2

Contents?: true

Size: 1.47 KB

Versions: 14

Compression:

Stored size: 1.47 KB

Contents

module Docsplit

  # Delegates to **pdfinfo** in order to extract information about a PDF file.
  class InfoExtractor

    # Regex matchers for different bits of information.
    MATCHERS = {
      :author   => /^Author:\s+([^\n]+)/,
      :date     => /^CreationDate:\s+([^\n]+)/,
      :creator  => /^Creator:\s+([^\n]+)/,
      :keywords => /^Keywords:\s+([^\n]+)/,
      :producer => /^Producer:\s+([^\n]+)/,
      :subject  => /^Subject:\s+([^\n]+)/,
      :title    => /^Title:\s+([^\n]+)/,
      :length   => /^Pages:\s+([^\n]+)/,
    }

    # Pull out a single datum from a pdf.
    def extract(key, pdfs, opts)
      extract_all(pdfs, opts)[key]
    end
    
    def extract_all(pdfs, opts)
      pdf = [pdfs].flatten.first
      cmd = "pdfinfo #{ESCAPE[pdf]} 2>&1"
      result = `#{cmd}`.chomp
      raise ExtractionFailed, result if $? != 0
      # ruby  1.8 (iconv) and 1.9 (String#encode) :
      if String.method_defined?(:encode)
        result.encode!('UTF-8', 'binary', :invalid => :replace, :undef => :replace, :replace => "") unless result.valid_encoding?
      else
        require 'iconv' unless defined?(Iconv)
        ic = Iconv.new('UTF-8//IGNORE','UTF-8')
        result = ic.iconv(result)
      end
      info = {}
      MATCHERS.each do |key, matcher|
        match = result.match(matcher)
        answer = match && match[1]
        if answer
          answer = answer.to_i if key == :length
          info[key] = answer
        end
      end
      info
    end

  end

end

Version data entries

14 entries across 14 versions & 5 rubygems

Version Path
docsplit-ng-0.8.0 lib/docsplit/info_extractor.rb
luccasmaso-docsplit-0.7.4.2 lib/docsplit/info_extractor.rb
burisu-docsplit-0.7.8 lib/docsplit/info_extractor.rb
docsplit-0.8.0.alpha1 lib/docsplit/info_extractor.rb
docsplit-0.8.0.alpha lib/docsplit/info_extractor.rb
docsplit-0.7.6 lib/docsplit/info_extractor.rb
burisu-docsplit-0.7.7 lib/docsplit/info_extractor.rb
concerto_docsplit-0.7.5 lib/docsplit/info_extractor.rb
docsplit-0.7.5 lib/docsplit/info_extractor.rb
luccasmaso-docsplit-0.7.4.1 lib/docsplit/info_extractor.rb
burisu-docsplit-0.7.6 lib/docsplit/info_extractor.rb
burisu-docsplit-0.7.5 lib/docsplit/info_extractor.rb
docsplit-0.7.4 lib/docsplit/info_extractor.rb
docsplit-0.7.3 lib/docsplit/info_extractor.rb