Sha256: 2b532d8b756fd1baa5ab2d942237165e69d89e6a453b2cb68460059d18718a26

Contents?: true

Size: 935 Bytes

Versions: 5

Compression:

Stored size: 935 Bytes

Contents

module Docsplit

  # Delegates to **pdfinfo** in order to extract information about a PDF file.
  class InfoExtractor

    # Regex matchers for different bits of information.
    MATCHERS = {
      :author   => /^Author:\s+([^\n]+)/,
      :date     => /^CreationDate:\s+([^\n]+)/,
      :creator  => /^Creator:\s+([^\n]+)/,
      :keywords => /^Keywords:\s+([^\n]+)/,
      :producer => /^Producer:\s+([^\n]+)/,
      :subject  => /^Subject:\s+([^\n]+)/,
      :title    => /^Title:\s+([^\n]+)/,
      :length   => /^Pages:\s+([^\n]+)/,
    }

    # Pull out a single datum from a pdf.
    def extract(key, pdfs, opts)
      pdf = [pdfs].flatten.first
      cmd = "pdfinfo #{ESCAPE[pdf]} 2>&1"
      result = `#{cmd}`.chomp
      raise ExtractionFailed, result if $? != 0
      match = result.match(MATCHERS[key])
      answer = match && match[1]
      answer = answer.to_i if answer && key == :length
      answer
    end

  end

end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
docsplit-0.6.4 lib/docsplit/info_extractor.rb
docsplit-0.6.3 lib/docsplit/info_extractor.rb
docsplit-0.6.2 lib/docsplit/info_extractor.rb
docsplit-0.6.1 lib/docsplit/info_extractor.rb
docsplit-0.6.0 lib/docsplit/info_extractor.rb