lib/active_support/multibyte/chars.rb in activesupport-2.3.18 vs lib/active_support/multibyte/chars.rb in activesupport-3.0.0.beta

- old
+ new

@@ -1,6 +1,8 @@ # encoding: utf-8 +require 'active_support/core_ext/string/access' +require 'active_support/core_ext/string/behavior' module ActiveSupport #:nodoc: module Multibyte #:nodoc: # Chars enables you to work transparently with UTF-8 encoding in the Ruby String class without having extensive # knowledge about the encoding. A Chars object accepts a string upon initialization and proxies String methods in an @@ -194,11 +196,11 @@ # # Example: # 'Café périferôl'.mb_chars.index('ô') #=> 12 # 'Café périferôl'.mb_chars.index(/\w/u) #=> 0 def index(needle, offset=0) - wrapped_offset = self.first(offset).wrapped_string.length + wrapped_offset = first(offset).wrapped_string.length index = @wrapped_string.index(needle, wrapped_offset) index ? (self.class.u_unpack(@wrapped_string.slice(0...index)).size) : nil end # Returns the position _needle_ in the string, counting in @@ -208,11 +210,11 @@ # Example: # 'Café périferôl'.mb_chars.rindex('é') #=> 6 # 'Café périferôl'.mb_chars.rindex(/\w/u) #=> 13 def rindex(needle, offset=nil) offset ||= length - wrapped_offset = self.first(offset).wrapped_string.length + wrapped_offset = first(offset).wrapped_string.length index = @wrapped_string.rindex(needle, wrapped_offset) index ? (self.class.u_unpack(@wrapped_string.slice(0...index)).size) : nil end # Like <tt>String#[]=</tt>, except instead of byte offsets you specify character offsets. @@ -360,10 +362,20 @@ slice = self[*args] self[*args] = '' slice end + # Limit the byte size of the string to a number of bytes without breaking characters. Usable + # when the storage for a string is limited for some reason. + # + # Example: + # s = 'こんにちは' + # s.mb_chars.limit(7) #=> "こに" + def limit(limit) + slice(0...translate_offset(limit)) + end + # Returns the codepoint of the first character in the string. # # Example: # 'こんにちは'.mb_chars.ord #=> 12371 def ord @@ -371,11 +383,11 @@ end # Convert characters in the string to uppercase. # # Example: - # 'Laurent, òu sont les tests?'.mb_chars.upcase.to_s #=> "LAURENT, ÒU SONT LES TESTS?" + # 'Laurent, où sont les tests ?'.mb_chars.upcase.to_s #=> "LAURENT, OÙ SONT LES TESTS ?" def upcase apply_mapping :uppercase_mapping end # Convert characters in the string to lowercase. @@ -648,27 +660,23 @@ end.join end end protected - + def translate_offset(byte_offset) #:nodoc: return nil if byte_offset.nil? return 0 if @wrapped_string == '' - chunk = @wrapped_string[0..byte_offset] + + if @wrapped_string.respond_to?(:force_encoding) + @wrapped_string = @wrapped_string.dup.force_encoding(Encoding::ASCII_8BIT) + end + begin - begin - chunk.unpack('U*').length - 1 - rescue ArgumentError => e - chunk = @wrapped_string[0..(byte_offset+=1)] - # Stop retrying at the end of the string - raise e unless byte_offset < chunk.length - # We damaged a character, retry - retry - end - # Catch the ArgumentError so we can throw our own - rescue ArgumentError - raise EncodingError, 'malformed UTF-8 character' + @wrapped_string[0...byte_offset].unpack('U*').length + rescue ArgumentError => e + byte_offset -= 1 + retry end end def justify(integer, way, padstr=' ') #:nodoc: raise ArgumentError, "zero width padding" if padstr.length == 0