lib/active_support/multibyte/chars.rb in activesupport-6.1.7.10 vs lib/active_support/multibyte/chars.rb in activesupport-7.0.0.alpha1

- old
+ new

@@ -1,15 +1,14 @@ # frozen_string_literal: true require "active_support/json" require "active_support/core_ext/string/access" require "active_support/core_ext/string/behavior" -require "active_support/core_ext/symbol/starts_ends_with" require "active_support/core_ext/module/delegation" -module ActiveSupport #:nodoc: - module Multibyte #:nodoc: +module ActiveSupport # :nodoc: + module Multibyte # :nodoc: # Chars enables you to work transparently with UTF-8 encoding in the Ruby # String class without having extensive knowledge about the encoding. A # Chars object accepts a string upon initialization and proxies String # methods in an encoding safe manner. All the normal String methods are also # implemented on the proxy. @@ -101,11 +100,11 @@ # Reverses all characters in the string. # # 'Café'.mb_chars.reverse.to_s # => 'éfaC' def reverse - chars(@wrapped_string.scan(/\X/).reverse.join) + chars(@wrapped_string.grapheme_clusters.reverse.join) end # Limits the byte size of the string to a number of bytes without breaking # characters. Usable when the storage for a string is limited for some # reason. @@ -124,30 +123,30 @@ end alias_method :titlecase, :titleize # Performs canonical decomposition on all the characters. # - # 'é'.length # => 2 - # 'é'.mb_chars.decompose.to_s.length # => 3 + # 'é'.length # => 1 + # 'é'.mb_chars.decompose.to_s.length # => 2 def decompose chars(Unicode.decompose(:canonical, @wrapped_string.codepoints.to_a).pack("U*")) end # Performs composition on all the characters. # - # 'é'.length # => 3 - # 'é'.mb_chars.compose.to_s.length # => 2 + # 'é'.length # => 1 + # 'é'.mb_chars.compose.to_s.length # => 1 def compose chars(Unicode.compose(@wrapped_string.codepoints.to_a).pack("U*")) end # Returns the number of grapheme clusters in the string. # # 'क्षि'.mb_chars.length # => 4 - # 'क्षि'.mb_chars.grapheme_length # => 3 + # 'क्षि'.mb_chars.grapheme_length # => 2 def grapheme_length - @wrapped_string.scan(/\X/).length + @wrapped_string.grapheme_clusters.length end # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent # resulting in a valid UTF-8 string. # @@ -155,10 +154,10 @@ # encoding is entirely CP1252 or ISO-8859-1. def tidy_bytes(force = false) chars(Unicode.tidy_bytes(@wrapped_string, force)) end - def as_json(options = nil) #:nodoc: + def as_json(options = nil) # :nodoc: to_s.as_json(options) end %w(reverse tidy_bytes).each do |method| define_method("#{method}!") do |*args|