module Eco module Data module Files DEFAULT_TIMESTAMP_PATTERN = '%Y-%m-%dT%H%M%S' class << self def included(base) base.send(:include, InstanceMethods) base.extend(ClassMethods) end end module InstanceMethods include Eco::Language::AuxiliarLogger # It offers a resilient way to read content from a file # @tolerance [Integer] the number of allowed encoding errors. # @return [String] the content of the file def get_file_content(file, encoding, tolerance: 5) unless self.class.file_exists?(file) logger.error("File does not exist: #{file}") exit(1) end encoding ||= self.class.encoding(file) encoding = (encoding == "bom") ? "#{encoding}|utf-8": encoding unless !encoding || encoding == 'utf-8' msg = "File encoding: '#{encoding}'" logger.debug(msg) puts msg end read_with_tolerance(file, encoding: encoding, tolerance: tolerance) end def read_with_tolerance(file, encoding:, tolerance: 5) if content = File.read(file, encoding: encoding) content = content.encode("utf-8") unless encoding.include?('utf-8') content.scrub do |bytes| replacement = '<' + bytes.unpack('H*')[0] + '>' if tolerance <= 0 logger.error("There were more than 5 encoding errors in the file '#{file}'.") return content else tolerance -= 1 logger.error("Encoding problem in file '#{file}': '#{replacement}'.") replacement end end end end end module ClassMethods def create_directory(path, includes_file: false) Directory.create(path, includes_file: includes_file) end def split(path) dir_path, file = File.split(path) dir_path = dir_path.gsub(File::SEPARATOR, File::ALT_SEPARATOR || File::SEPARATOR) dir_path.split(File::ALT_SEPARATOR || File::SEPARATOR).push(file) end def file_name(fullname) File.basename(fullname) end def file_basename(fullname) File.basename(fullname, File.extname(fullname)) end def file_path(fullname) File.dirname(fullname) end def file_fullpath(fullname) file_path(File.expand_path(fullname)) end def file_exists?(file) return false if !file return File.exists?(file) || File.exists?(File.expand_path(file)) end def dir_exists?(path) Dir.exist?(path) || Dir.exist?(File.expand_path(path)) end def timestamp(timestamp_pattern = DEFAULT_TIMESTAMP_PATTERN) Time.now.strftime(timestamp_pattern) end def timestamp_file(filename, timestamp_pattern = DEFAULT_TIMESTAMP_PATTERN) file_pattern = Eco::Data::Files::FilePattern.new(filename) file_pattern.resolve(start: timestamp(timestamp_pattern) + '_') end def copy_file(source_file, dest_file, time_stamp: false) dest_file = timestamp_file(dest_file) if time_stamp File.write(dest_file, File.read(source_file)) end def file_empty?(path) return true if !File.file?(path) File.zero?(path) end def has_bom?(path) return false if !path || file_empty?(path) File.open(path, "rb") do |f| bytes = f.read(3) return bytes.unpack("C*") == [239, 187, 191] end end def encoding(path) has_bom?(path) ? "bom" : "utf-8" end def script_subfolder basename = File.basename($0, File.extname($0)) path = File.dirname($0) File.join(path, basename) end def folder_files(folder = ".", pattern = "*", regexp: nil, older_than: nil) target = File.join(File.expand_path(folder), pattern) Dir[target].tap do |dir_files| dir_files.select! {|f| File.file?(f)} if older_than dir_files.select! {|f| File.mtime(f) < (Time.now - (60*60*24*older_than))} end if regexp && regexp.is_a?(Regexp) dir_files.select! {|f| File.basename(f).match(regexp)} end end.sort end def csv_files(folder = ".", regexp: nil, older_than: nil) folder_files(folder, "*.csv", regexp: regexp, older_than: older_than).sort end end class << self include Files::ClassMethods end end end end