lib/libis/format/fido.rb in libis-format-0.9.32 vs lib/libis/format/fido.rb in libis-format-0.9.33

- old
+ new

@@ -1,102 +1,87 @@ -require 'csv' - -require 'singleton' require 'libis/tools/extend/string' -require 'libis/tools/logger' require 'libis/tools/command' +require 'csv' require 'libis/format/config' -require 'libis/format/type_database' +require_relative 'identification_tool' + module Libis module Format - class Fido - include Singleton - include ::Libis::Tools::Logger + class Fido < Libis::Format::IdentificationTool - BAD_MIMETYPES = [nil, '', 'None', 'application/octet-stream'] + def self.add_formats(formats_file) + self.instance.formats << formats_file unless self.instance.formats.include?(formats_file) + end - def self.run(file, formats = nil) - self.instance.run file, formats + def self.del_formats(formats_file) + self.instance.formats.delete(formats_file) end - def run(file, xtra_formats = nil) + attr_reader :formats - fido_results = [] - - fmt_list = formats.dup - case xtra_formats - when Array - fmt_list += xtra_formats - when String - fmt_list << xtra_formats - else - # do nothing + def run_list(filelist) + create_list_file(filelist) do |list_file| + output = runner(nil, '-input', list_file.escape_for_string) + process_output(output) end + end + def run_dir(dir, recursive = true) args = [] - args << '-loadformats' << "#{fmt_list.join(',')}" unless fmt_list.empty? - args << "#{file.escape_for_string}" - fido = ::Libis::Tools::Command.run(Libis::Format::Config[:fido_path], *args) - warn "Fido errors: #{fido[:err].join("\n")}" unless fido[:err].empty? + args << '-recurse' if recursive + output = runner(dir, *args) + process_output(output) + end - keys = [:status, :time, :puid, :format_name, :signature_name, :filesize, :filename, :mimetype, :matchtype] - fido_output = CSV.parse(fido[:out].join("\n")).map { |a| Hash[keys.zip(a)] } + def run(file) + output = runner(file) + process_output(output) + end - fido_output.each do |x| - if x[:status] == 'OK' - x[:mimetype] = get_mimetype(x[:puid]) if x[:mimetype] == 'None' - next if BAD_MIMETYPES.include? x[:mimetype] - x[:score] = 5 - case x[:matchtype] - when 'signature' - x[:score] += 5 - when 'container' - typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(x[:puid]) - ext = File.extname(file) - x[:score] += 2 if typeinfo and typeinfo[:EXTENSIONS].include?(ext) - else - # do nothing - end - fido_results << x - end - end + protected - fido_results = fido_results.inject({}) do |result, value| - result[value[:score]] ||= [] - result[value[:score]] << value - result - end + def initialize + super + @formats = Libis::Format::Config[:fido_formats].dup + bad_mimetype('application/vnd.oasis.opendocument.text') + bad_mimetype('application/vnd.oasis.opendocument.spreadsheet') + end - max_score = fido_results.keys.max + attr_writer :formats - # Only if we find a single hit of type 'signature' or 'container', we are confident enough to return a result - return {} unless max_score and max_score >= 5 && fido_results[max_score].size == 1 + def runner(filename, *args) + # Load custome format definitions if present + args << '-loadformats' << "#{formats.join(',')}" unless formats.empty? - fido_results[max_score].first - end + # Workaround for Fido performance bug + args << '-bufsize' << '1000' - def self.add_format(f) - instance.formats << f - end + # Add filename to argument list (optional) + args << "#{filename.escape_for_string}" if filename - def self.formats - instance.formats - end + # No header output + args << '-q' - protected + # Run command and capture results + fido = ::Libis::Tools::Command.run(Libis::Format::Config[:fido_path], *args) - attr_reader :formats + # Log warning if needed + raise RuntimeError, "Fido errors: #{fido[:err].join("\n")}" unless fido[:err].empty? - def initialize - data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data')) - @formats = [(File.join(data_dir, 'lias_formats.xml'))] - end - - def get_mimetype(puid) - ::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first rescue nil + # Parse output (CSV) text into array and return result + keys = [:status, :time, :puid, :format_name, :format_version, :filesize, :filepath, :mimetype, :matchtype] + result = CSV.parse(fido[:out].join("\n")) + .map {|a| Hash[keys.zip(a)]} + .select {|a| a[:status] == 'OK'} + result.each do |r| + r.delete(:time) + r.delete(:status) + r.delete(:filesize) + r[:source] = :fido + end end end end