Sha256: 62bbfff55fa7db52eb5e5529a6ce116350874f0de4a044a8efb936d1250fad46

Contents?: true

Size: 1.63 KB

Versions: 7

Compression:

Stored size: 1.63 KB

Contents

# encoding: UTF-8

module Taxamatch
  
  module Normalizer
    def self.normalize(string)
      utf8_to_ascii(string.strip.upcase).gsub(/[^\x00-\x7F]/,'?')
    end
  
    def self.normalize_word(word)
      self.normalize(word).gsub(/[^A-Z0-9\-]/, '').strip
    end
    
    def self.normalize_author(string)
      self.normalize(string).gsub(/[^A-Z]/, ' ').gsub(/[\s]{2,}/, ' ').strip
    end

    def self.normalize_year(year_string)
      year_int = year_string.gsub(/[^\d]/, '').to_i
      year_int = nil unless year_int.between?(1757, Time.now.year + 1)
      year_int
    end
      

  private
    def self.utf8_to_ascii(string)
      string = string.gsub(/\s{2,}/, ' ')
      string = string.gsub("×", "x")
      string = string.gsub(/[ÀÂÅÃÄÁẤẠÁáàâåãäăãắảạậầằá]/, "A")
      string = string.gsub(/[ÉÈÊËéèêëĕěếệểễềẻ]/, "E")
      string = string.gsub(/[ÍÌÎÏíìîïǐĭīĩỉï]/, "I")
      string = string.gsub(/[ÓÒÔØÕÖỚỔóòôøõöŏỏỗộơọỡốơồờớổő]/, "O")
      string = string.gsub(/[ÚÙÛÜúùûüůưừựủứụű]/, "U")
      string = string.gsub(/[Ýýÿỹ]/, "Y")
      string = string.gsub(/[Ææ]/, "AE")
      string = string.gsub(/[ČÇčćç]/, "C")
      string = string.gsub(/[ŠŞśšşſ]/, "S")
      string = string.gsub(/[Đđð]/, "D")
      string = string.gsub(/Žžź/, "Z")
      string = string.gsub(/[Ññńň]/, "N")
      string = string.gsub(/[Œœ]/, "OE")
      string = string.gsub(/ß/, "B")
      string = string.gsub(/Ķ/, "K")
      string = string.gsub(/ğ/, "G")
      string = string.gsub(/[Řř]/, "R")
    end

  end

end

Version data entries

7 entries across 7 versions & 1 rubygems

Version Path
taxamatch_rb-0.9.10 lib/taxamatch_rb/normalizer.rb
taxamatch_rb-0.9.9 lib/taxamatch_rb/normalizer.rb
taxamatch_rb-0.9.8 lib/taxamatch_rb/normalizer.rb
taxamatch_rb-0.9.7 lib/taxamatch_rb/normalizer.rb
taxamatch_rb-0.9.6 lib/taxamatch_rb/normalizer.rb
taxamatch_rb-0.9.5 lib/taxamatch_rb/normalizer.rb
taxamatch_rb-0.9.4 lib/taxamatch_rb/normalizer.rb