gem 'mime-types', '>= 1.16' require 'mime/types' module ChocolateDisco class << self @@supported_mime_types = nil # returns an array of supported mime types. def supported_mime_types @@supported_mime_types ||= Java::SupportedMimeTypes.constants.map { |const| eval("Java::SupportedMimeTypes::#{const}") } end # def supported_mime_types # returns whether or not the file is supported. # :path to specify the file. :mime to specify the mime. # :mimes to give an array of mimes. def supported?(opts = {}) mimes = [] mimes << opts[:mime] if opts[:mime].is_a?(String) mimes += opts[:mimes] if opts[:mimes].is_a?(Array) if opts[:path].is_a?(String) && File.exists?(opts[:path]) && !File.directory?(opts[:path]) && File.file?(opts[:path]) begin mimes << MIME::Types.of(opts[:path]).map { |mime| mime.simplified } rescue NoMethodError # begin raise(NotImplementedError.new("MIME types of a file '#{opts[:path]}' could not be inferred.")) end # rescue end # if opts[:path].is_a?(String) && File.exists?(opts[:path]) && !File.directory?(opts[:path]) && File.file?(opts[:path]) mimes.flatten! mimes.compact! mimes.uniq! mimes.map { |mime| return true if supported_mime_types.include?(mime) } false end # def supported?(opts = {}) # extracts text from path, returns string, empty string or nil. def extract(path, opts = {}) if opts[:force_mime].is_a?(String) mimes = [opts[:force_mime]] else mimes = MIME::Types.of(path).map { |mime| mime.simplified } end raise(NotImplementedError.new("MIME (#{mimes.join(', ')}) for '#{path}' is not supported by chocolate_disco.")) unless supported?(:mimes => mimes) extractor = mimes.map { |mime| Java::TextExtractionFactory.getExtractor(mime) }[0] raise(RuntimeError.new('TextExtractionFactory failed to get an instance of TextExtractor, which is impossible.')) if extractor.nil? extractor.extractText(java.io.File.new(path)) end # def extract alias :lets_party! :extract end # class << self end # module ChocolateDisco