Sha256: 05fa1ab17fefc43b55a662d288f258a9a2f0bf4e93f4e7847c7bad33361d120f
Contents?: true
Size: 720 Bytes
Versions: 6
Compression:
Stored size: 720 Bytes
Contents
module UTF8Util # use '?' intsead of the unicode replace char, since that is 3 bytes # and can increase the string size if it's done a lot REPLACEMENT_CHAR = "?" # Replace invalid UTF-8 character sequences with a replacement character # # Returns self as valid UTF-8. def self.clean!(str) raise NotImplementedError end # Replace invalid UTF-8 character sequences with a replacement character # # Returns a copy of this String as valid UTF-8. def self.clean(str) clean!(str.dup) end def self.clean!(str) return str if str.encoding.to_s == "UTF-8" str.force_encoding("binary").encode("UTF-8", :invalid => :replace, :undef => :replace, :replace => REPLACEMENT_CHAR) end end
Version data entries
6 entries across 6 versions & 1 rubygems