Sha256: f21709c8368758848c42eddbf4eb99547b032a7432e2bdac46d2997de5d9b0d2

Contents?: true

Size: 1.12 KB

Versions: 22

Compression:

Stored size: 1.12 KB

Contents

# frozen_string_literal: true

require 'active_support'

module LHC
  module FixInvalidEncodingConcern
    extend ActiveSupport::Concern

    module ClassMethods
      # fix strings that contain non-UTF8 encoding in a forceful way
      # should none of the fix-attempts be successful,
      # an empty string is returned instead
      def fix_invalid_encoding(string)
        return string unless string.is_a?(String)
        result = string.dup

        # we assume it's ISO-8859-1 first
        if !result.valid_encoding? || !utf8?(result)
          result.encode!('UTF-8', 'ISO-8859-1', invalid: :replace, undef: :replace, replace: '')
        end

        # if it's still an issue, try with BINARY
        if !result.valid_encoding? || !utf8?(result)
          result.encode!('UTF-8', 'BINARY', invalid: :replace, undef: :replace, replace: '')
        end

        # if its STILL an issue, return an empty string :(
        if !result.valid_encoding? || !utf8?(result)
          result = ""
        end

        result
      end

      private

      def utf8?(string)
        string.encoding == Encoding::UTF_8
      end
    end
  end
end

Version data entries

22 entries across 22 versions & 1 rubygems

Version Path
lhc-10.5.4 lib/lhc/concerns/lhc/fix_invalid_encoding_concern.rb
lhc-10.5.3 lib/lhc/concerns/lhc/fix_invalid_encoding_concern.rb