Sha256: 8ed96a0ec78b2552c65eed16e8709ff1fd39915a7858520693615d9b724e8589
Contents?: true
Size: 1.43 KB
Versions: 1
Compression:
Stored size: 1.43 KB
Contents
# encoding: utf-8 module SportDb module Import class Variant ## (spelling) variant finder / builder for names def self.frequency_table( name ) ## todo/check: use/rename to char_frequency_table ## calculate the frequency table of letters, digits, etc. freq = Hash.new(0) name.each_char do |ch| freq[ch] += 1 end freq end ALPHA_SPECIALS = %w[ Ä Ö Ü ä ö ü ß ] ## "simple" translation SUB_ALPHA_SPECIALS = { 'Ä'=>'A', 'ä'=>'a', 'Ö'=>'O', 'ö'=>'o', 'Ü'=>'U', 'ü'=>'u', 'ß'=>'ss', } ## de,at,ch translation for umlauts SUB_ALPHA_SPECIALS_DE = { 'Ä'=>'Ae', 'ä'=>'ae', 'Ö'=>'Oe', 'ö'=>'oe', 'Ü'=>'Ue', 'ü'=>'ue', 'ß'=>'ss', } def self.alpha_specials_count( freq ) ALPHA_SPECIALS.reduce(0) do |count,ch| count += freq[ch] count end end def self.tr( name, mapping ) buf = String.new name.each_char do |ch| buf << if mapping[ch] mapping[ch] else ch end end buf end def self.find( name ) alt_names = [] freq = frequency_table( name ) if alpha_specials_count( freq ) > 0 # check if includes äöü etc. alt_names << tr( name, SUB_ALPHA_SPECIALS ) alt_names << tr( name, SUB_ALPHA_SPECIALS_DE ) end ## todo - make uniq e.g. Preußen is Preussen, Preussen 2x alt_names = alt_names.uniq alt_names end end # Variant end ## module Import end ## module SportDb
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
sportdb-config-0.4.0 | lib/sportdb/config/variants.rb |