Sha256: 3f75c6103eb8c2115f07b90a3b2f040dc34dec8b0dac11924c2cf5e6684416df
Contents?: true
Size: 1.64 KB
Versions: 1
Compression:
Stored size: 1.64 KB
Contents
# frozen_string_literal: true require 'pathname' module Chronicle module ETL # Return filenames that match a pattern in a directory class FileExtractor < Chronicle::ETL::Extractor register_connector do |r| r.identifier = :file r.description = 'file or directory of files' end setting :input, default: ['.'] setting :dir_glob_pattern, default: '**/*' setting :larger_than setting :smaller_than def prepare @pathnames = gather_files end def extract @pathnames.each do |pathname| yield Chronicle::ETL::Extraction.new(data: pathname.to_path) end end def results_count @pathnames.count end private def gather_files roots = [@config.input].flatten.map { |filename| Pathname.new(filename) } raise(ExtractionError, 'Input must exist') unless roots.all?(&:exist?) directories, files = roots.partition(&:directory?) directories.each do |directory| files += Dir.glob(File.join(directory, @config.dir_glob_pattern)).map { |filename| Pathname.new(filename) } end files = files.uniq files = files.keep_if { |f| (f.mtime > @config.since) } if @config.since files = files.keep_if { |f| (f.mtime < @config.until) } if @config.until # pass in file sizes in bytes files = files.keep_if { |f| (f.size < @config.smaller_than) } if @config.smaller_than files = files.keep_if { |f| (f.size > @config.larger_than) } if @config.larger_than # # TODO: incorporate sort argument files.sort_by(&:mtime) end end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
chronicle-etl-0.6.1 | lib/chronicle/etl/extractors/file_extractor.rb |