unicode.rb in activesupport-5.0.0.beta1

- old
+ new

@@ -1,6 +1,5 @@
-# encoding: utf-8
 module ActiveSupport
   module Multibyte
     module Unicode
 
       extend self
@@ -9,11 +8,11 @@
       # See http://www.unicode.org/reports/tr15/tr15-29.html for more
       # information about normalization.
       NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
 
       # The Unicode version that is supported by the implementation
-      UNICODE_VERSION = '7.0.0'
+      UNICODE_VERSION = '8.0.0'
 
       # The default normalization used for operations that require
       # normalization. It can be set to any of the normalizations
       # in NORMALIZATION_FORMS.
       #
@@ -56,11 +55,11 @@
       LEADERS_AND_TRAILERS = WHITESPACE + [65279] # ZERO-WIDTH NO-BREAK SPACE aka BOM
 
       # Returns a regular expression pattern that matches the passed Unicode
       # codepoints.
       def self.codepoints_to_pattern(array_of_codepoints) #:nodoc:
-        array_of_codepoints.collect{ |e| [e].pack 'U*' }.join('|')
+        array_of_codepoints.collect{ |e| [e].pack 'U*'.freeze }.join('|'.freeze)
       end
       TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u
       LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u
 
       # Detect whether the codepoint is in a certain character class. Returns
@@ -209,13 +208,12 @@
           end
         end
         codepoints
       end
 
-      # Ruby >= 2.1 has String#scrub, which is faster than the workaround used for < 2.1.
       # Rubinius' String#scrub, however, doesn't support ASCII-incompatible chars.
-      if '<3'.respond_to?(:scrub) && !defined?(Rubinius)
+      if !defined?(Rubinius)
         # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent
         # resulting in a valid UTF-8 string.
         #
         # Passing +true+ will forcibly tidy all bytes, assuming that the string's
         # encoding is entirely CP1252 or ISO-8859-1.
@@ -256,11 +254,11 @@
       # and validations.
       #
       # * <tt>string</tt> - The string to perform normalization on.
       # * <tt>form</tt> - The form you want to normalize in. Should be one of
       #   the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>.
-      #   Default is ActiveSupport::Multibyte.default_normalization_form.
+      #   Default is ActiveSupport::Multibyte::Unicode.default_normalization_form.
       def normalize(string, form=nil)
         form ||= @default_normalization_form
         # See http://www.unicode.org/reports/tr15, Table 1
         codepoints = string.codepoints.to_a
         case form
@@ -272,11 +270,11 @@
             reorder_characters(decompose(:compatibility, codepoints))
           when :kc
             compose(reorder_characters(decompose(:compatibility, codepoints)))
           else
             raise ArgumentError, "#{form} is not a valid normalization variant", caller
-        end.pack('U*')
+        end.pack('U*'.freeze)
       end
 
       def downcase(string)
         apply_mapping string, :lowercase_mapping
       end
@@ -337,10 +335,10 @@
           rescue => e
             raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable")
           end
 
           # Redefine the === method so we can write shorter rules for grapheme cluster breaks
-          @boundary.each do |k,_|
+          @boundary.each_key do |k|
             @boundary[k].instance_eval do
               def ===(other)
                 detect { |i| i === other } ? true : false
               end
             end if @boundary[k].kind_of?(Array)