lib/active_support/multibyte/unicode.rb in activesupport-6.0.6.1 vs lib/active_support/multibyte/unicode.rb in activesupport-6.1.0.rc1
- old
+ new
@@ -3,57 +3,23 @@
module ActiveSupport
module Multibyte
module Unicode
extend self
- # A list of all available normalization forms.
- # See https://www.unicode.org/reports/tr15/tr15-29.html for more
- # information about normalization.
- NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
-
- NORMALIZATION_FORM_ALIASES = { # :nodoc:
- c: :nfc,
- d: :nfd,
- kc: :nfkc,
- kd: :nfkd
- }
-
# The Unicode version that is supported by the implementation
UNICODE_VERSION = RbConfig::CONFIG["UNICODE_VERSION"]
- # The default normalization used for operations that require
- # normalization. It can be set to any of the normalizations
- # in NORMALIZATION_FORMS.
- #
- # ActiveSupport::Multibyte::Unicode.default_normalization_form = :c
- attr_accessor :default_normalization_form
- @default_normalization_form = :kc
-
- # Unpack the string at grapheme boundaries. Returns a list of character
- # lists.
- #
- # Unicode.unpack_graphemes('क्षि') # => [[2325, 2381], [2359], [2367]]
- # Unicode.unpack_graphemes('Café') # => [[67], [97], [102], [233]]
- def unpack_graphemes(string)
- ActiveSupport::Deprecation.warn(<<-MSG.squish)
- ActiveSupport::Multibyte::Unicode#unpack_graphemes is deprecated and will be
- removed from Rails 6.1. Use string.scan(/\X/).map(&:codepoints) instead.
- MSG
-
- string.scan(/\X/).map(&:codepoints)
+ def default_normalization_form
+ ActiveSupport::Deprecation.warn(
+ "ActiveSupport::Multibyte::Unicode.default_normalization_form is deprecated and will be removed in Rails 6.2."
+ )
end
- # Reverse operation of unpack_graphemes.
- #
- # Unicode.pack_graphemes(Unicode.unpack_graphemes('क्षि')) # => 'क्षि'
- def pack_graphemes(unpacked)
- ActiveSupport::Deprecation.warn(<<-MSG.squish)
- ActiveSupport::Multibyte::Unicode#pack_graphemes is deprecated and will be
- removed from Rails 6.1. Use array.flatten.pack("U*") instead.
- MSG
-
- unpacked.flatten.pack("U*")
+ def default_normalization_form=(_)
+ ActiveSupport::Deprecation.warn(
+ "ActiveSupport::Multibyte::Unicode.default_normalization_form= is deprecated and will be removed in Rails 6.2."
+ )
end
# Decompose composed characters to the decomposed form.
def decompose(type, codepoints)
if type == :compatibility
@@ -74,11 +40,11 @@
# resulting in a valid UTF-8 string.
#
# Passing +true+ will forcibly tidy all bytes, assuming that the string's
# encoding is entirely CP1252 or ISO-8859-1.
def tidy_bytes(string, force = false)
- return string if string.empty?
+ return string if string.empty? || string.ascii_only?
return recode_windows1252_chars(string) if force
string.scrub { |bad| recode_windows1252_chars(bad) }
end
else
def tidy_bytes(string, force = false)
@@ -102,49 +68,9 @@
end
reader.finish
out.encode!(Encoding::UTF_8)
- end
- end
-
- # Returns the KC normalization of the string by default. NFKC is
- # considered the best normalization form for passing strings to databases
- # and validations.
- #
- # * <tt>string</tt> - The string to perform normalization on.
- # * <tt>form</tt> - The form you want to normalize in. Should be one of
- # the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>.
- # Default is ActiveSupport::Multibyte::Unicode.default_normalization_form.
- def normalize(string, form = nil)
- form ||= @default_normalization_form
-
- # See https://www.unicode.org/reports/tr15, Table 1
- if alias_form = NORMALIZATION_FORM_ALIASES[form]
- ActiveSupport::Deprecation.warn(<<-MSG.squish)
- ActiveSupport::Multibyte::Unicode#normalize is deprecated and will be
- removed from Rails 6.1. Use String#unicode_normalize(:#{alias_form}) instead.
- MSG
-
- string.unicode_normalize(alias_form)
- else
- ActiveSupport::Deprecation.warn(<<-MSG.squish)
- ActiveSupport::Multibyte::Unicode#normalize is deprecated and will be
- removed from Rails 6.1. Use String#unicode_normalize instead.
- MSG
-
- raise ArgumentError, "#{form} is not a valid normalization variant", caller
- end
- end
-
- %w(downcase upcase swapcase).each do |method|
- define_method(method) do |string|
- ActiveSupport::Deprecation.warn(<<-MSG.squish)
- ActiveSupport::Multibyte::Unicode##{method} is deprecated and
- will be removed from Rails 6.1. Use String methods directly.
- MSG
-
- string.send(method)
end
end
private
def recode_windows1252_chars(string)