lib/active_support/multibyte/chars.rb in activesupport-2.3.18 vs lib/active_support/multibyte/chars.rb in activesupport-3.0.0.beta
- old
+ new
@@ -1,6 +1,8 @@
# encoding: utf-8
+require 'active_support/core_ext/string/access'
+require 'active_support/core_ext/string/behavior'
module ActiveSupport #:nodoc:
module Multibyte #:nodoc:
# Chars enables you to work transparently with UTF-8 encoding in the Ruby String class without having extensive
# knowledge about the encoding. A Chars object accepts a string upon initialization and proxies String methods in an
@@ -194,11 +196,11 @@
#
# Example:
# 'Café périferôl'.mb_chars.index('ô') #=> 12
# 'Café périferôl'.mb_chars.index(/\w/u) #=> 0
def index(needle, offset=0)
- wrapped_offset = self.first(offset).wrapped_string.length
+ wrapped_offset = first(offset).wrapped_string.length
index = @wrapped_string.index(needle, wrapped_offset)
index ? (self.class.u_unpack(@wrapped_string.slice(0...index)).size) : nil
end
# Returns the position _needle_ in the string, counting in
@@ -208,11 +210,11 @@
# Example:
# 'Café périferôl'.mb_chars.rindex('é') #=> 6
# 'Café périferôl'.mb_chars.rindex(/\w/u) #=> 13
def rindex(needle, offset=nil)
offset ||= length
- wrapped_offset = self.first(offset).wrapped_string.length
+ wrapped_offset = first(offset).wrapped_string.length
index = @wrapped_string.rindex(needle, wrapped_offset)
index ? (self.class.u_unpack(@wrapped_string.slice(0...index)).size) : nil
end
# Like <tt>String#[]=</tt>, except instead of byte offsets you specify character offsets.
@@ -360,10 +362,20 @@
slice = self[*args]
self[*args] = ''
slice
end
+ # Limit the byte size of the string to a number of bytes without breaking characters. Usable
+ # when the storage for a string is limited for some reason.
+ #
+ # Example:
+ # s = 'こんにちは'
+ # s.mb_chars.limit(7) #=> "こに"
+ def limit(limit)
+ slice(0...translate_offset(limit))
+ end
+
# Returns the codepoint of the first character in the string.
#
# Example:
# 'こんにちは'.mb_chars.ord #=> 12371
def ord
@@ -371,11 +383,11 @@
end
# Convert characters in the string to uppercase.
#
# Example:
- # 'Laurent, òu sont les tests?'.mb_chars.upcase.to_s #=> "LAURENT, ÒU SONT LES TESTS?"
+ # 'Laurent, où sont les tests ?'.mb_chars.upcase.to_s #=> "LAURENT, OÙ SONT LES TESTS ?"
def upcase
apply_mapping :uppercase_mapping
end
# Convert characters in the string to lowercase.
@@ -648,27 +660,23 @@
end.join
end
end
protected
-
+
def translate_offset(byte_offset) #:nodoc:
return nil if byte_offset.nil?
return 0 if @wrapped_string == ''
- chunk = @wrapped_string[0..byte_offset]
+
+ if @wrapped_string.respond_to?(:force_encoding)
+ @wrapped_string = @wrapped_string.dup.force_encoding(Encoding::ASCII_8BIT)
+ end
+
begin
- begin
- chunk.unpack('U*').length - 1
- rescue ArgumentError => e
- chunk = @wrapped_string[0..(byte_offset+=1)]
- # Stop retrying at the end of the string
- raise e unless byte_offset < chunk.length
- # We damaged a character, retry
- retry
- end
- # Catch the ArgumentError so we can throw our own
- rescue ArgumentError
- raise EncodingError, 'malformed UTF-8 character'
+ @wrapped_string[0...byte_offset].unpack('U*').length
+ rescue ArgumentError => e
+ byte_offset -= 1
+ retry
end
end
def justify(integer, way, padstr=' ') #:nodoc:
raise ArgumentError, "zero width padding" if padstr.length == 0