lib/active_support/multibyte/unicode.rb in activesupport-6.0.6.1 vs lib/active_support/multibyte/unicode.rb in activesupport-6.1.0.rc1

- old
+ new

@@ -3,57 +3,23 @@ module ActiveSupport module Multibyte module Unicode extend self - # A list of all available normalization forms. - # See https://www.unicode.org/reports/tr15/tr15-29.html for more - # information about normalization. - NORMALIZATION_FORMS = [:c, :kc, :d, :kd] - - NORMALIZATION_FORM_ALIASES = { # :nodoc: - c: :nfc, - d: :nfd, - kc: :nfkc, - kd: :nfkd - } - # The Unicode version that is supported by the implementation UNICODE_VERSION = RbConfig::CONFIG["UNICODE_VERSION"] - # The default normalization used for operations that require - # normalization. It can be set to any of the normalizations - # in NORMALIZATION_FORMS. - # - # ActiveSupport::Multibyte::Unicode.default_normalization_form = :c - attr_accessor :default_normalization_form - @default_normalization_form = :kc - - # Unpack the string at grapheme boundaries. Returns a list of character - # lists. - # - # Unicode.unpack_graphemes('क्षि') # => [[2325, 2381], [2359], [2367]] - # Unicode.unpack_graphemes('Café') # => [[67], [97], [102], [233]] - def unpack_graphemes(string) - ActiveSupport::Deprecation.warn(<<-MSG.squish) - ActiveSupport::Multibyte::Unicode#unpack_graphemes is deprecated and will be - removed from Rails 6.1. Use string.scan(/\X/).map(&:codepoints) instead. - MSG - - string.scan(/\X/).map(&:codepoints) + def default_normalization_form + ActiveSupport::Deprecation.warn( + "ActiveSupport::Multibyte::Unicode.default_normalization_form is deprecated and will be removed in Rails 6.2." + ) end - # Reverse operation of unpack_graphemes. - # - # Unicode.pack_graphemes(Unicode.unpack_graphemes('क्षि')) # => 'क्षि' - def pack_graphemes(unpacked) - ActiveSupport::Deprecation.warn(<<-MSG.squish) - ActiveSupport::Multibyte::Unicode#pack_graphemes is deprecated and will be - removed from Rails 6.1. Use array.flatten.pack("U*") instead. - MSG - - unpacked.flatten.pack("U*") + def default_normalization_form=(_) + ActiveSupport::Deprecation.warn( + "ActiveSupport::Multibyte::Unicode.default_normalization_form= is deprecated and will be removed in Rails 6.2." + ) end # Decompose composed characters to the decomposed form. def decompose(type, codepoints) if type == :compatibility @@ -74,11 +40,11 @@ # resulting in a valid UTF-8 string. # # Passing +true+ will forcibly tidy all bytes, assuming that the string's # encoding is entirely CP1252 or ISO-8859-1. def tidy_bytes(string, force = false) - return string if string.empty? + return string if string.empty? || string.ascii_only? return recode_windows1252_chars(string) if force string.scrub { |bad| recode_windows1252_chars(bad) } end else def tidy_bytes(string, force = false) @@ -102,49 +68,9 @@ end reader.finish out.encode!(Encoding::UTF_8) - end - end - - # Returns the KC normalization of the string by default. NFKC is - # considered the best normalization form for passing strings to databases - # and validations. - # - # * <tt>string</tt> - The string to perform normalization on. - # * <tt>form</tt> - The form you want to normalize in. Should be one of - # the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. - # Default is ActiveSupport::Multibyte::Unicode.default_normalization_form. - def normalize(string, form = nil) - form ||= @default_normalization_form - - # See https://www.unicode.org/reports/tr15, Table 1 - if alias_form = NORMALIZATION_FORM_ALIASES[form] - ActiveSupport::Deprecation.warn(<<-MSG.squish) - ActiveSupport::Multibyte::Unicode#normalize is deprecated and will be - removed from Rails 6.1. Use String#unicode_normalize(:#{alias_form}) instead. - MSG - - string.unicode_normalize(alias_form) - else - ActiveSupport::Deprecation.warn(<<-MSG.squish) - ActiveSupport::Multibyte::Unicode#normalize is deprecated and will be - removed from Rails 6.1. Use String#unicode_normalize instead. - MSG - - raise ArgumentError, "#{form} is not a valid normalization variant", caller - end - end - - %w(downcase upcase swapcase).each do |method| - define_method(method) do |string| - ActiveSupport::Deprecation.warn(<<-MSG.squish) - ActiveSupport::Multibyte::Unicode##{method} is deprecated and - will be removed from Rails 6.1. Use String methods directly. - MSG - - string.send(method) end end private def recode_windows1252_chars(string)