lib/remote_table/file.rb in remote_table-0.2.22 vs lib/remote_table/file.rb in remote_table-0.2.23
- old
+ new
@@ -61,11 +61,19 @@
return unless skip
RemoteTable.backtick_with_reporting "cat #{path} | tail -n +#{skip + 1} > #{path}.tmp"
FileUtils.mv "#{path}.tmp", path
end
+ USELESS_CHARACTERS = [
+ '\xef\xbb\xbf', # UTF-8 byte order mark
+ '\xc2\xad' # soft hyphen, often inserted by MS Office (html: ­)
+ ]
+ def remove_useless_characters!
+ RemoteTable.backtick_with_reporting "perl -pe 's/#{USELESS_CHARACTERS.join '//g; s/'}//g' #{path} > #{path}.tmp"
+ FileUtils.mv "#{path}.tmp", path
+ end
+
def convert_file_to_utf8!
- return if encoding == 'UTF-8' or encoding == 'UTF8'
RemoteTable.backtick_with_reporting "iconv -c -f #{encoding} -t UTF-8 #{path} > #{path}.tmp"
FileUtils.mv "#{path}.tmp", path
end
def restore_file!