Sha256: 2b532d8b756fd1baa5ab2d942237165e69d89e6a453b2cb68460059d18718a26
Contents?: true
Size: 935 Bytes
Versions: 5
Compression:
Stored size: 935 Bytes
Contents
module Docsplit # Delegates to **pdfinfo** in order to extract information about a PDF file. class InfoExtractor # Regex matchers for different bits of information. MATCHERS = { :author => /^Author:\s+([^\n]+)/, :date => /^CreationDate:\s+([^\n]+)/, :creator => /^Creator:\s+([^\n]+)/, :keywords => /^Keywords:\s+([^\n]+)/, :producer => /^Producer:\s+([^\n]+)/, :subject => /^Subject:\s+([^\n]+)/, :title => /^Title:\s+([^\n]+)/, :length => /^Pages:\s+([^\n]+)/, } # Pull out a single datum from a pdf. def extract(key, pdfs, opts) pdf = [pdfs].flatten.first cmd = "pdfinfo #{ESCAPE[pdf]} 2>&1" result = `#{cmd}`.chomp raise ExtractionFailed, result if $? != 0 match = result.match(MATCHERS[key]) answer = match && match[1] answer = answer.to_i if answer && key == :length answer end end end
Version data entries
5 entries across 5 versions & 1 rubygems