lib/active_support/multibyte/unicode.rb in activesupport-4.2.11.3 vs lib/active_support/multibyte/unicode.rb in activesupport-5.0.0.beta1

- old
+ new

@@ -1,6 +1,5 @@ -# encoding: utf-8 module ActiveSupport module Multibyte module Unicode extend self @@ -9,11 +8,11 @@ # See http://www.unicode.org/reports/tr15/tr15-29.html for more # information about normalization. NORMALIZATION_FORMS = [:c, :kc, :d, :kd] # The Unicode version that is supported by the implementation - UNICODE_VERSION = '7.0.0' + UNICODE_VERSION = '8.0.0' # The default normalization used for operations that require # normalization. It can be set to any of the normalizations # in NORMALIZATION_FORMS. # @@ -56,11 +55,11 @@ LEADERS_AND_TRAILERS = WHITESPACE + [65279] # ZERO-WIDTH NO-BREAK SPACE aka BOM # Returns a regular expression pattern that matches the passed Unicode # codepoints. def self.codepoints_to_pattern(array_of_codepoints) #:nodoc: - array_of_codepoints.collect{ |e| [e].pack 'U*' }.join('|') + array_of_codepoints.collect{ |e| [e].pack 'U*'.freeze }.join('|'.freeze) end TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u # Detect whether the codepoint is in a certain character class. Returns @@ -209,13 +208,12 @@ end end codepoints end - # Ruby >= 2.1 has String#scrub, which is faster than the workaround used for < 2.1. # Rubinius' String#scrub, however, doesn't support ASCII-incompatible chars. - if '<3'.respond_to?(:scrub) && !defined?(Rubinius) + if !defined?(Rubinius) # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent # resulting in a valid UTF-8 string. # # Passing +true+ will forcibly tidy all bytes, assuming that the string's # encoding is entirely CP1252 or ISO-8859-1. @@ -256,11 +254,11 @@ # and validations. # # * <tt>string</tt> - The string to perform normalization on. # * <tt>form</tt> - The form you want to normalize in. Should be one of # the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. - # Default is ActiveSupport::Multibyte.default_normalization_form. + # Default is ActiveSupport::Multibyte::Unicode.default_normalization_form. def normalize(string, form=nil) form ||= @default_normalization_form # See http://www.unicode.org/reports/tr15, Table 1 codepoints = string.codepoints.to_a case form @@ -272,11 +270,11 @@ reorder_characters(decompose(:compatibility, codepoints)) when :kc compose(reorder_characters(decompose(:compatibility, codepoints))) else raise ArgumentError, "#{form} is not a valid normalization variant", caller - end.pack('U*') + end.pack('U*'.freeze) end def downcase(string) apply_mapping string, :lowercase_mapping end @@ -337,10 +335,10 @@ rescue => e raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable") end # Redefine the === method so we can write shorter rules for grapheme cluster breaks - @boundary.each do |k,_| + @boundary.each_key do |k| @boundary[k].instance_eval do def ===(other) detect { |i| i === other } ? true : false end end if @boundary[k].kind_of?(Array)