Sha256: b940ce9974dea4902086eefe3c1e486738250e3e595f8835ee6fd1312a5624cc

Contents?: true

Size: 1.92 KB

Versions: 1

Compression:

Stored size: 1.92 KB

Contents

# encoding: utf-8

module SportDb
  module Import



class Variant    ## (spelling) variant finder / builder for names


def self.frequency_table( name )   ## todo/check: use/rename to char_frequency_table
  ## calculate the frequency table of letters, digits, etc.
  freq = Hash.new(0)
  name.each_char do |ch|
     freq[ch] += 1
  end
  freq
end


##  "simple" translation
ALPHA_SPECIALS = {
  'Ä'=>'A',  'ä'=>'a',
             'á'=>'a',
             'à'=>'a',
  'É'=>'E',  'é'=>'e',
             'í'=>'i',
             'ñ'=>'n',
  'Ö'=>'O',  'ö'=>'o',
             'ó'=>'o',
  'Ü'=>'U',  'ü'=>'u',
             'ú'=>'u',
             'ß'=>'ss',
}

##  de,at,ch translation for umlauts
ALPHA_SPECIALS_DE = {
  'Ä'=>'Ae',  'ä'=>'ae',
  'Ö'=>'Oe',  'ö'=>'oe',
  'Ü'=>'Ue',  'ü'=>'ue',
              'ß'=>'ss',
}

## add ALPHA_SPECIALS_ES - why? why not?  is Espanyol catalan spelling or spanish (castillian)?
# 'ñ'=>'ny',    ## e.g. Español => Espanyol



def self.alpha_specials_count( freq, mapping )
  mapping.keys.reduce(0) do |count,ch|
    count += freq[ch]
    count
  end
end

def self.tr( name, mapping )
  buf = String.new
  name.each_char do |ch|
    buf << if mapping[ch]
              mapping[ch]
            else
              ch
            end
  end
  buf
end



def self.find( name )
  alt_names = []

  freq = frequency_table( name )

  if alpha_specials_count( freq, ALPHA_SPECIALS  ) > 0    # check if includes äöü etc.
    alt_names <<  tr( name, ALPHA_SPECIALS )
  end

  if alpha_specials_count( freq, ALPHA_SPECIALS_DE  ) > 0   ## todo/fix: add / pass-in language/country code and check - why? why not?
    alt_names <<  tr( name, ALPHA_SPECIALS_DE )
  end

  ## todo - make uniq  e.g. Preußen is Preussen, Preussen 2x
  alt_names = alt_names.uniq
  alt_names
end
end   # Variant


end ## module Import
end ## module SportDb

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
sportdb-config-0.4.1 lib/sportdb/config/variants.rb