Sha256: 19f8b6f6f4b98ded6094f583ab5bd9a8c012034993ea1faaa253e4257c31371c

Contents?: true

Size: 1.12 KB

Versions: 10

Compression:

Stored size: 1.12 KB

Contents

# frozen_string_literal: true

require 'active_support'

module DHC
  module FixInvalidEncodingConcern
    extend ActiveSupport::Concern

    module ClassMethods
      # fix strings that contain non-UTF8 encoding in a forceful way
      # should none of the fix-attempts be successful,
      # an empty string is returned instead
      def fix_invalid_encoding(string)
        return string unless string.is_a?(String)
        result = string.dup

        # we assume it's ISO-8859-1 first
        if !result.valid_encoding? || !utf8?(result)
          result.encode!('UTF-8', 'ISO-8859-1', invalid: :replace, undef: :replace, replace: '')
        end

        # if it's still an issue, try with BINARY
        if !result.valid_encoding? || !utf8?(result)
          result.encode!('UTF-8', 'BINARY', invalid: :replace, undef: :replace, replace: '')
        end

        # if its STILL an issue, return an empty string :(
        if !result.valid_encoding? || !utf8?(result)
          result = ''
        end

        result
      end

      private

      def utf8?(string)
        string.encoding == Encoding::UTF_8
      end
    end
  end
end

Version data entries

10 entries across 10 versions & 1 rubygems

Version Path
dhc-3.0.0 lib/dhc/concerns/dhc/fix_invalid_encoding_concern.rb
dhc-2.4.0 lib/dhc/concerns/dhc/fix_invalid_encoding_concern.rb
dhc-2.3.0 lib/dhc/concerns/dhc/fix_invalid_encoding_concern.rb
dhc-2.2.1 lib/dhc/concerns/dhc/fix_invalid_encoding_concern.rb
dhc-2.2.0 lib/dhc/concerns/dhc/fix_invalid_encoding_concern.rb
dhc-2.1.1 lib/dhc/concerns/dhc/fix_invalid_encoding_concern.rb
dhc-2.1.0 lib/dhc/concerns/dhc/fix_invalid_encoding_concern.rb
dhc-2.0.1 lib/dhc/concerns/dhc/fix_invalid_encoding_concern.rb
dhc-2.0.0 lib/dhc/concerns/dhc/fix_invalid_encoding_concern.rb
dhc-1.0.0 lib/dhc/concerns/dhc/fix_invalid_encoding_concern.rb