lib/active_support/multibyte/chars.rb in activesupport-6.1.7.10 vs lib/active_support/multibyte/chars.rb in activesupport-7.0.0.alpha1
- old
+ new
@@ -1,15 +1,14 @@
# frozen_string_literal: true
require "active_support/json"
require "active_support/core_ext/string/access"
require "active_support/core_ext/string/behavior"
-require "active_support/core_ext/symbol/starts_ends_with"
require "active_support/core_ext/module/delegation"
-module ActiveSupport #:nodoc:
- module Multibyte #:nodoc:
+module ActiveSupport # :nodoc:
+ module Multibyte # :nodoc:
# Chars enables you to work transparently with UTF-8 encoding in the Ruby
# String class without having extensive knowledge about the encoding. A
# Chars object accepts a string upon initialization and proxies String
# methods in an encoding safe manner. All the normal String methods are also
# implemented on the proxy.
@@ -101,11 +100,11 @@
# Reverses all characters in the string.
#
# 'Café'.mb_chars.reverse.to_s # => 'éfaC'
def reverse
- chars(@wrapped_string.scan(/\X/).reverse.join)
+ chars(@wrapped_string.grapheme_clusters.reverse.join)
end
# Limits the byte size of the string to a number of bytes without breaking
# characters. Usable when the storage for a string is limited for some
# reason.
@@ -124,30 +123,30 @@
end
alias_method :titlecase, :titleize
# Performs canonical decomposition on all the characters.
#
- # 'é'.length # => 2
- # 'é'.mb_chars.decompose.to_s.length # => 3
+ # 'é'.length # => 1
+ # 'é'.mb_chars.decompose.to_s.length # => 2
def decompose
chars(Unicode.decompose(:canonical, @wrapped_string.codepoints.to_a).pack("U*"))
end
# Performs composition on all the characters.
#
- # 'é'.length # => 3
- # 'é'.mb_chars.compose.to_s.length # => 2
+ # 'é'.length # => 1
+ # 'é'.mb_chars.compose.to_s.length # => 1
def compose
chars(Unicode.compose(@wrapped_string.codepoints.to_a).pack("U*"))
end
# Returns the number of grapheme clusters in the string.
#
# 'क्षि'.mb_chars.length # => 4
- # 'क्षि'.mb_chars.grapheme_length # => 3
+ # 'क्षि'.mb_chars.grapheme_length # => 2
def grapheme_length
- @wrapped_string.scan(/\X/).length
+ @wrapped_string.grapheme_clusters.length
end
# Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent
# resulting in a valid UTF-8 string.
#
@@ -155,10 +154,10 @@
# encoding is entirely CP1252 or ISO-8859-1.
def tidy_bytes(force = false)
chars(Unicode.tidy_bytes(@wrapped_string, force))
end
- def as_json(options = nil) #:nodoc:
+ def as_json(options = nil) # :nodoc:
to_s.as_json(options)
end
%w(reverse tidy_bytes).each do |method|
define_method("#{method}!") do |*args|