lib/active_support/multibyte/unicode.rb in activesupport-4.2.11.3 vs lib/active_support/multibyte/unicode.rb in activesupport-5.0.0.beta1
- old
+ new
@@ -1,6 +1,5 @@
-# encoding: utf-8
module ActiveSupport
module Multibyte
module Unicode
extend self
@@ -9,11 +8,11 @@
# See http://www.unicode.org/reports/tr15/tr15-29.html for more
# information about normalization.
NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
# The Unicode version that is supported by the implementation
- UNICODE_VERSION = '7.0.0'
+ UNICODE_VERSION = '8.0.0'
# The default normalization used for operations that require
# normalization. It can be set to any of the normalizations
# in NORMALIZATION_FORMS.
#
@@ -56,11 +55,11 @@
LEADERS_AND_TRAILERS = WHITESPACE + [65279] # ZERO-WIDTH NO-BREAK SPACE aka BOM
# Returns a regular expression pattern that matches the passed Unicode
# codepoints.
def self.codepoints_to_pattern(array_of_codepoints) #:nodoc:
- array_of_codepoints.collect{ |e| [e].pack 'U*' }.join('|')
+ array_of_codepoints.collect{ |e| [e].pack 'U*'.freeze }.join('|'.freeze)
end
TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u
LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u
# Detect whether the codepoint is in a certain character class. Returns
@@ -209,13 +208,12 @@
end
end
codepoints
end
- # Ruby >= 2.1 has String#scrub, which is faster than the workaround used for < 2.1.
# Rubinius' String#scrub, however, doesn't support ASCII-incompatible chars.
- if '<3'.respond_to?(:scrub) && !defined?(Rubinius)
+ if !defined?(Rubinius)
# Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent
# resulting in a valid UTF-8 string.
#
# Passing +true+ will forcibly tidy all bytes, assuming that the string's
# encoding is entirely CP1252 or ISO-8859-1.
@@ -256,11 +254,11 @@
# and validations.
#
# * <tt>string</tt> - The string to perform normalization on.
# * <tt>form</tt> - The form you want to normalize in. Should be one of
# the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>.
- # Default is ActiveSupport::Multibyte.default_normalization_form.
+ # Default is ActiveSupport::Multibyte::Unicode.default_normalization_form.
def normalize(string, form=nil)
form ||= @default_normalization_form
# See http://www.unicode.org/reports/tr15, Table 1
codepoints = string.codepoints.to_a
case form
@@ -272,11 +270,11 @@
reorder_characters(decompose(:compatibility, codepoints))
when :kc
compose(reorder_characters(decompose(:compatibility, codepoints)))
else
raise ArgumentError, "#{form} is not a valid normalization variant", caller
- end.pack('U*')
+ end.pack('U*'.freeze)
end
def downcase(string)
apply_mapping string, :lowercase_mapping
end
@@ -337,10 +335,10 @@
rescue => e
raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable")
end
# Redefine the === method so we can write shorter rules for grapheme cluster breaks
- @boundary.each do |k,_|
+ @boundary.each_key do |k|
@boundary[k].instance_eval do
def ===(other)
detect { |i| i === other } ? true : false
end
end if @boundary[k].kind_of?(Array)