Sha256: 6374979a4ab1b6c62d4e1aabfdd9c6cbc08cdc08b7731c33742ddad42e425be6

Contents?: true

Size: 1.59 KB

Versions: 4

Compression:

Stored size: 1.59 KB

Contents

# encoding: UTF-8

module Normalizer
  def self.normalize(string)
    utf8_to_ascii(string).upcase
  end
  
  def self.normalize_word(word)
    self.normalize(word).gsub(/[^A-Z0-9\-]/, '')
  end

protected
  def self.utf8_to_ascii(string)
    string = string.gsub(/[ÀÂÅÃÄÁẤẠ]/, "A")
    string = string.gsub(/[ÉÈÊË]/, "E")
    string = string.gsub(/[ÍÌÎÏ]/, "I")
    string = string.gsub(/[ÓÒÔØÕÖỚỔ]/, "O")
    string = string.gsub(/[ÚÙÛÜ]/, "U")
    string = string.gsub(/[Ý]/, "Y")
    string = string.gsub(/Æ/, "AE")
    string = string.gsub(/[ČÇ]/, "C")
    string = string.gsub(/[ŠŞ]/, "S")
    string = string.gsub(/[Đ]/, "D")
    string = string.gsub(/Ž/, "Z")
    string = string.gsub(/Ñ/, "N")
    string = string.gsub(/Œ/, "OE")
    string = string.gsub(/ß/, "B")
    string = string.gsub(/Ķ/, "K")
    string = string.gsub(/[áàâåãäăãắảạậầằ]/, "a")
    string = string.gsub(/[éèêëĕěếệểễềẻ]/, "e")
    string = string.gsub(/[íìîïǐĭīĩỉï]/, "i")
    string = string.gsub(/[óòôøõöŏỏỗộơọỡốơồờớổ]/, "o")
    string = string.gsub(/[úùûüůưừựủứụ]/, "u")
    string = string.gsub(/[žź]/, "z")
    string = string.gsub(/[ýÿỹ]/, "y")
    string = string.gsub(/[đ]/, "d")
    string = string.gsub(/æ/, "ae")
    string = string.gsub(/[čćç]/, "c")
    string = string.gsub(/[ñńň]/, "n")
    string = string.gsub(/œ/, "oe")
    string = string.gsub(/[śšş]/, "s")
    string = string.gsub(/ř/, "r")
    string = string.gsub(/ğ/, "g")
    string = string.gsub(/Ř/, "R")
  end

end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
dimus-taxamatch_rb-0.1.4 lib/taxamatch_rb/normalizer.rb
dimus-taxamatch_rb-0.1.5 lib/taxamatch_rb/normalizer.rb
dimus-taxamatch_rb-0.1.6 lib/taxamatch_rb/normalizer.rb
dimus-taxamatch_rb-0.1.7 lib/taxamatch_rb/normalizer.rb