benchmark/active_support.rb in utf8-0.1.6 vs benchmark/active_support.rb in utf8-0.1.7

- old
+ new

@@ -3,10 +3,11 @@ require 'benchmark' require 'rubygems' require 'active_support' +$KCODE = 'UTF8' raw = File.read(File.expand_path('../test.txt', __FILE__)) utf8 = raw.as_utf8 as_mb = ActiveSupport::Multibyte::Chars.new(raw) @@ -27,13 +28,19 @@ times.times {utf8[1024, 1024]} } x.report("#[-start, len]") { times.times {utf8[-1024, 1024]} } + x.report("#clean") { + times.times {utf8.clean} + } + x.report("#valid?") { + times.times {utf8.valid?} + } } -puts "\n\nActiveSupport::Multibyte::Chars" +puts "\n\nActiveSupport::Multibyte" Benchmark.bmbm { |x| x.report("#length") { times.times {as_mb.length} } x.report("#[index]") { @@ -45,7 +52,38 @@ x.report("#[start, len]") { times.times {as_mb[1024, 1024]} } x.report("#[-start, len]") { times.times {as_mb[-1024, 1024]} + } + x.report("ActiveSupport::Multibyte.clean") { + times.times {ActiveSupport::Multibyte.clean(raw)} + } + x.report("ActiveSupport::Multibyte.verify") { + times.times {ActiveSupport::Multibyte.verify(raw)} + } +} + +require 'iconv' +module ActiveSupport::Multibyte + class << self + OUTSIDE_ASCII = /[^\x00-\x7f]/n + ICONV_CLEANER = Iconv.new('UTF-8//IGNORE', 'UTF-8') + + def clean_with_iconv(string) + if string =~ OUTSIDE_ASCII + ICONV_CLEANER.iconv(string + ' ')[0..-2] + else + string + end + end + + alias_method :clean_without_iconv, :clean + alias_method :clean, :clean_with_iconv + end +end +puts "\n\nActiveSupport::Multibyte (patched with Iconv)" +Benchmark.bmbm { |x| + x.report("ActiveSupport::Multibyte.clean") { + times.times {ActiveSupport::Multibyte.clean(raw)} } } \ No newline at end of file