# coding: utf-8 require 'csv' require 'yaml' module Ting module Conversions All=[] DATA_DIR=File.dirname(__FILE__)+'/data/' #Load various representations for initials and finals %w(Initial Final).each do |c| klazz=Ting.const_get c begin CSV.open(DATA_DIR+c.downcase+'.csv', 'r:utf-8').each do |name, *values| next if name == "name" All << name.to_s unless All.include?(name) || name =~ /standalone/i klazz.class_eval {attr_accessor name.to_sym} values.each_with_index do |v,i| klazz::All[i].send(name+'=', v && v.force_encoding('UTF-8')) end end rescue STDERR << "Bad data in #{c.downcase}.csv : #{$!}" raise end end #Substitution rules @@rules=YAML::load(IO.read(DATA_DIR+'rules.yaml')) def self.parse(type, string) capitalized = (string.downcase != string && string.downcase.capitalize == string) string = string.to_s.downcase if (final = Final::All.find {|f| f.respond_to?("#{type}_standalone") && f.send("#{type}_standalone") == string}) Syllable.new(Initial::Empty, final, nil, capitalized) else finals = Final::All.dup finals.unshift(finals.delete(Final::Uo)) #hack : move Uo to the front #otherwise wadegiles parses 'lo' as Le+O rather than Le+Uo #probably better to add a hardcoded 'overrule' table for these cases Initial.each do |ini| finals.each do |fin| next if Syllable.illegal?(ini,fin) if string == apply_rules(type, (ini.send(type)||'') + (fin.send(type)||'')) return Syllable.new(ini, fin, nil, capitalized) end end end raise "Can't parse `#{string.inspect}'" end end def self.unparse(type, tsyll) str = if tsyll.initial.send(type) apply_rules(type, tsyll.initial.send(type) + (tsyll.final.send(type) || '')) elsif tsyll.final.respond_to?(type.to_s+'_standalone') && standalone = tsyll.final.send(type.to_s+'_standalone') standalone else apply_rules(type, tsyll.final.send(type)) end (tsyll.capitalized? ? str.capitalize : str).force_encoding('UTF-8') end def self.tokenize(str) [].tap do |tokens| str,pos = str.dup, 0 while str && token = str[/[^' ]*/] tokens << [token.strip, pos] pos += token.length str = str[/[' ]+(.*)/, 1] end end end private def self.apply_rules(type, string) string.dup.tap do |s| @@rules[type] && @@rules[type].each do |rule| s.gsub!(Regexp.new(rule['match']), rule['subst']) end end end end end