Sha256: 61aeca98d37c1fdfcfa4fa196a2537947ffc475be57cff007bb87a08222d1853

Contents?: true

Size: 1.97 KB

Versions: 17

Compression:

Stored size: 1.97 KB

Contents

# typed: true
# frozen_string_literal: true

require 'charlock_holmes'

module Workato
  module Utilities
    module Encoding
      class << self
        # this function finds first possible encoding that allows to perform correct encoding operations
        # if no encoding is found it preserves initial and replaces bad symbols with ?
        def force_best_encoding!(string)
          original_encoding = string.encoding

          possible_encodings(string).each do |encoding|
            return string.force_encoding(::Encoding::ASCII_8BIT) if encoding.nil? # for binary
            next unless string.force_encoding(encoding).valid_encoding?

            begin
              # try encode to utf
              string.encode(::Encoding::UTF_8)
              return string
            rescue ::Encoding::UndefinedConversionError
              next
            end
          end
          if original_encoding == ::Encoding::BINARY
            string.force_encoding(::Encoding::BINARY)
          else
            string
              .encode!(::Encoding::UTF_8, invalid: :replace, undef: :replace, replace: '?')
              .encode!(original_encoding, invalid: :replace, undef: :replace, replace: '?')
          end
        end

        private

        def possible_encodings(string)
          encoding_candidates = CharlockHolmes::EncodingDetector.detect_all(string).sort! do |a, b|
            confidence_a, encoding_a = a.values_at(:confidence, :ruby_encoding)
            confidence_b, encoding_b = b.values_at(:confidence, :ruby_encoding)
            # If equal and one binary, prefer non-binary.
            if confidence_a == confidence_b
              if encoding_a == 'binary'
                confidence_b += 100
              elsif encoding_b == 'binary'
                confidence_a += 100
              end
            end
            confidence_b <=> confidence_a
          end
          encoding_candidates.map { |candidate| candidate[:ruby_encoding] }
        end
      end
    end
  end
end

Version data entries

17 entries across 17 versions & 1 rubygems

Version Path
workato-connector-sdk-1.3.14 lib/workato/utilities/encoding.rb
workato-connector-sdk-1.3.13 lib/workato/utilities/encoding.rb
workato-connector-sdk-1.3.12 lib/workato/utilities/encoding.rb
workato-connector-sdk-1.3.11 lib/workato/utilities/encoding.rb
workato-connector-sdk-1.3.10 lib/workato/utilities/encoding.rb
workato-connector-sdk-1.3.9 lib/workato/utilities/encoding.rb
workato-connector-sdk-1.3.8 lib/workato/utilities/encoding.rb
workato-connector-sdk-1.3.7 lib/workato/utilities/encoding.rb
workato-connector-sdk-1.3.6 lib/workato/utilities/encoding.rb
workato-connector-sdk-1.3.5 lib/workato/utilities/encoding.rb
workato-connector-sdk-1.3.4 lib/workato/utilities/encoding.rb
workato-connector-sdk-1.3.3 lib/workato/utilities/encoding.rb
workato-connector-sdk-1.3.2 lib/workato/utilities/encoding.rb
workato-connector-sdk-1.3.1 lib/workato/utilities/encoding.rb
workato-connector-sdk-1.3.0 lib/workato/utilities/encoding.rb
workato-connector-sdk-1.2.0 lib/workato/utilities/encoding.rb
workato-connector-sdk-1.1.0 lib/workato/utilities/encoding.rb