# encoding: utf-8 # # THINK Does it also remove diaritics, like べ to へ? # module Picky module CharacterSubstituters # Substitutes Umlauts like # ä, ö, ü => ae, oe, ue. # (and more, see specs) # class WestEuropean def initialize # :nodoc: @chars = ActiveSupport::Multibyte.proxy_class end # Substitutes occurrences of certain characters # (like Umlauts) with ASCII representations of them. # # Examples: # ä -> ae # Ö -> Oe # ß -> ss # ç -> c # # (See the associated spec for all examples) # def substitute text trans = @chars.new(text).normalize(:kd) # Substitute special cases. # trans.gsub!('ß', 'ss') # Substitute umlauts (of A,O,U,a,o,u). # trans.gsub!(/([AOUaou])\314\210/u, '\1e') # Get rid of ecutes, graves etc. # trans.unpack('U*').select { |cp| cp < 0x0300 || cp > 0x035F }.pack('U*') end def to_s # :nodoc: self.class.name end end end end