module Eco module Data module Files module Encoding include Eco::Language::AuxiliarLogger BOM_BYTES = [239, 187, 191] def has_bom?(path) return false if !path || file_empty?(path) File.open(path, "rb") do |f| bytes = f.read(3) return bytes.unpack('C*') == BOM_BYTES end end def get_file_content_with_encoding(file, encoding: nil) encoding ||= scoped_encoding(file) unless !encoding || encoding == 'utf-8' msg = "File encoding: '#{encoding}'" logger.debug(msg) puts msg end bom_enc = encoding && encoding.split('|')[0] == 'bom' if has_bom?(file) || bom_enc content = remove_bom(File.read(file, encoding: 'utf-8')) encoding = 'utf-8' else content = File.read(file, encoding: encoding) end return nil unless content content = content.encode("utf-8") unless encoding.include?('utf-8') content end # Changes encoding from bom to utf8 # https://stackoverflow.com/a/24916365/4352306 def remove_bom(content) if content.bytes[0..2] == BOM_BYTES bom = BOM_BYTES.pack('C*').force_encoding('utf-8').encode('utf-8') content = content.sub(bom, '') content.force_encoding('utf-8') end content end def encoding(path) has_bom?(path) ? "bom" : "utf-8" end # Gives the parameter as it should def scoped_encoding(path) unless file_exists?(path) logger.error("File does not exist: #{path}") return nil end encoding ||= encoding(path) encoding = (encoding == "bom") ? "#{encoding}|utf-8": encoding encoding end def file_exists?(file) return false if !file return File.exists?(file) || File.exists?(File.expand_path(file)) end def file_empty?(path) return true if !File.file?(path) File.zero?(path) end end end end end