lib/chronicle/etl/extractors/file_extractor.rb in chronicle-etl-0.2.4 vs lib/chronicle/etl/extractors/file_extractor.rb in chronicle-etl-0.3.0
- old
+ new
@@ -1,52 +1,34 @@
require 'pathname'
module Chronicle
module ETL
class FileExtractor < Chronicle::ETL::Extractor
- def extract
- if file?
- extract_file do |data, metadata|
- yield(data, metadata)
- end
- elsif directory?
- extract_from_directory do |data, metadata|
- yield(data, metadata)
- end
- end
- end
+ include Extractors::Helpers::FilesystemReader
- def results_count
- if file?
- return 1
- else
- search_pattern = File.join(@options[:filename], '**/*.eml')
- Dir.glob(search_pattern).count
- end
+ register_connector do |r|
+ r.description = 'file or directory of files'
end
- private
-
- def extract_from_directory
- search_pattern = File.join(@options[:filename], '**/*.eml')
- filenames = Dir.glob(search_pattern)
+ def extract
filenames.each do |filename|
- file = File.open(filename)
- yield(file.read, {filename: file})
+ yield Chronicle::ETL::Extraction.new(data: filename)
end
end
- def extract_file
- file = File.open(@options[:filename])
- yield(file.read, {filename: @options[:filename]})
+ def results_count
+ filenames.count
end
- def directory?
- Pathname.new(@options[:filename]).directory?
- end
+ private
- def file?
- Pathname.new(@options[:filename]).file?
+ def filenames
+ @filenames ||= filenames_in_directory(
+ path: @options[:filename],
+ dir_glob_pattern: @options[:dir_glob_pattern],
+ load_since: @options[:load_since],
+ load_until: @options[:load_until]
+ )
end
end
end
end