Sha256: 14bed31d801450202f95abe9c705ed444613cf23349dc8225219992ca5f19352
Contents?: true
Size: 927 Bytes
Versions: 9
Compression:
Stored size: 927 Bytes
Contents
module Docsplit # Delegates to **pdfinfo** in order to extract information about a PDF file. class InfoExtractor # Regex matchers for different bits of information. MATCHERS = { :author => /^Author:\s+([^\n]+)/, :date => /^CreationDate:\s+([^\n]+)/, :creator => /^Creator:\s+([^\n]+)/, :keywords => /^Keywords:\s+([^\n]+)/, :producer => /^Producer:\s+([^\n]+)/, :subject => /^Subject:\s+([^\n]+)/, :title => /^Title:\s+([^\n]+)/, :length => /^Pages:\s+([^\n]+)/, } # Pull out a single datum from a pdf. def extract(key, pdfs, opts) pdf = [pdfs].flatten.first cmd = "pdfinfo #{pdf} 2>&1" result = `#{cmd}`.chomp raise ExtractionFailed, result if $? != 0 match = result.match(MATCHERS[key]) answer = match && match[1] answer = answer.to_i if answer && key == :length answer end end end
Version data entries
9 entries across 9 versions & 1 rubygems