lib/docsplit/text_cleaner.rb in docsplit-0.7.2 vs lib/docsplit/text_cleaner.rb in docsplit-0.7.3

- old
+ new

@@ -33,11 +33,16 @@ SINGLETONS = /^[AaIi]$/ # For the time being, `clean` uses the regular StringScanner, and not the # multibyte-aware version, coercing to ASCII first. def clean(text) - require 'iconv' unless defined?(Iconv) - text = Iconv.iconv('ascii//translit//ignore', 'utf-8', text).first + if String.method_defined?(:encode) + text.encode!('ascii', :invalid => :replace, :undef => :replace, :replace => '?') + else + require 'iconv' unless defined?(Iconv) + text = Iconv.iconv('ascii//translit//ignore', 'utf-8', text).first + end + scanner = StringScanner.new(text) cleaned = [] spaced = false loop do if space = scanner.scan(SPACE)