lib/docsplit/info_extractor.rb in burisu-docsplit-0.7.8 vs lib/docsplit/info_extractor.rb in burisu-docsplit-0.7.9
- old
+ new
@@ -1,38 +1,36 @@
module Docsplit
-
# Delegates to **pdfinfo** in order to extract information about a PDF file.
class InfoExtractor
-
# Regex matchers for different bits of information.
MATCHERS = {
- :author => /^Author:\s+([^\n]+)/,
- :date => /^CreationDate:\s+([^\n]+)/,
- :creator => /^Creator:\s+([^\n]+)/,
- :keywords => /^Keywords:\s+([^\n]+)/,
- :producer => /^Producer:\s+([^\n]+)/,
- :subject => /^Subject:\s+([^\n]+)/,
- :title => /^Title:\s+([^\n]+)/,
- :length => /^Pages:\s+([^\n]+)/,
- }
+ author: /^Author:\s+([^\n]+)/,
+ date: /^CreationDate:\s+([^\n]+)/,
+ creator: /^Creator:\s+([^\n]+)/,
+ keywords: /^Keywords:\s+([^\n]+)/,
+ producer: /^Producer:\s+([^\n]+)/,
+ subject: /^Subject:\s+([^\n]+)/,
+ title: /^Title:\s+([^\n]+)/,
+ length: /^Pages:\s+([^\n]+)/
+ }.freeze
# Pull out a single datum from a pdf.
def extract(key, pdfs, opts)
extract_all(pdfs, opts)[key]
end
-
- def extract_all(pdfs, opts)
+
+ def extract_all(pdfs, _opts)
pdf = [pdfs].flatten.first
cmd = "pdfinfo #{ESCAPE[pdf]} 2>&1"
result = `#{cmd}`.chomp
- raise ExtractionFailed, result if $? != 0
+ raise ExtractionFailed, result if $?.exitstatus.nonzero?
# ruby 1.8 (iconv) and 1.9 (String#encode) :
if String.method_defined?(:encode)
- result.encode!('UTF-8', 'binary', :invalid => :replace, :undef => :replace, :replace => "") unless result.valid_encoding?
+ result.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') unless result.valid_encoding?
else
require 'iconv' unless defined?(Iconv)
- ic = Iconv.new('UTF-8//IGNORE','UTF-8')
+ ic = Iconv.new('UTF-8//IGNORE', 'UTF-8')
result = ic.iconv(result)
end
info = {}
MATCHERS.each do |key, matcher|
match = result.match(matcher)
@@ -42,9 +40,7 @@
info[key] = answer
end
end
info
end
-
end
-
end