# -*- coding: utf-8 -*- # Classes and constants used throughout the module # * Initial # * Final # * Syllable # * ILLEGAL_COMBINATIONS require 'yaml' module Ting # # A Chinese initial (start of a syllable) # class Initial attr :name def initialize(n) ; @name=n ; end All = %w( Empty Bo Po Mo Fo De Te Ne Le Ge Ke He Ji Qi Xi Zhi Chi Shi Ri Zi Ci Si ).map{|c| const_set c, Initial.new(c)} class << self private :new include Enumerable def each(&blk) ; All.each(&blk) ; end end Groups=[ Group_0=[ Empty ], Group_1=[ Bo,Po,Mo,Fo], #Bilabial and Labio-dental Group_2=[ De,Te,Ne,Le ], #Plosive, nasal and lateral approximant alveolar Group_3=[ Ge,Ke,He ], #Velar Group_4=[ Ji,Qi,Xi ], #Alveolo-palatal Group_5=[ Zhi,Chi,Shi,Ri ], #Retroflex Group_6=[ Zi,Ci,Si ], #Fricative and affricate alveolar ] def +(f) Syllable.new(self,f) end def inspect() ; "<#{self.class.name}::#{@name}>" ; end end # # A Chinese final (end of a syllable) # class Final attr :name def initialize(n) ; @name=n ; end All=%w( Empty A O E Ee Ai Ei Ao Ou An En Ang Eng Ong Er I Ia Io Ie Iai Iao Iu Ian In Iang Ing U Ua Uo Uai Ui Uan Un Uang Ueng V Ue Van Vn Iong ).map{|c| const_set c, Final.new(c)} class << self private :new include Enumerable def each(&blk) ; All.each(&blk) ; end end Groups=[ Group_0=[ Empty ], Group_A=[ A,O,E,Ee,Ai,Ei,Ao,Ou,An,En,Ang,Eng,Ong,Er ], Group_I=[ I,Ia,Io,Ie,Iai,Iao,Iu,Ian,In,Iang,Ing ], Group_U=[ U,Ua,Uo,Uai,Ui,Uan,Un,Uang,Ueng ], Group_V=[ V,Ue,Van,Vn,Iong] ] def inspect() ; "<#{self.class.name}::#{name}>" ; end end # # Combination of an initial and a final, a tone, and possible capitalization # A tone of 'nil' means the tone is not specified class Syllable attr_accessor :initial, :final, :tone, :capitalized def initialize(initial, final, tone = nil, capitalized = false) self.initial = initial self.final = final self.tone = tone self.capitalized = capitalized end def +(tone) self.class.new(self.initial, self.final, tone, self.capitalized) end def inspect "<#{self.class.name} >" end alias :capitalized? :capitalized def self.illegal?(i,f) ILLEGAL_COMBINATIONS.any? {|in_gr, fin_gr| in_gr.include?(i) && fin_gr.include?(f)} end alias :to_s :inspect def ==( other ) [ other.initial, other.final, other.tone, other.capitalized ] == [ self.initial, self.final, self.tone, self.capitalized ] end end # # Some groups of initials and finals may not be combined # This list is not exhaustive but is sufficient to resolve ambiguity # ILLEGAL_COMBINATIONS= [ [Initial::Group_0, Final::Group_0], [Initial::Group_1, Final::Group_0], [Initial::Group_2, Final::Group_0], [Initial::Group_3, Final::Group_0], [Initial::Group_4, Final::Group_0], [Initial::Group_4, Final::Group_U], [Initial::Group_4, Final::Group_A], [Initial::Group_3, Final::Group_I], [Initial::Group_5, Final::Group_I], [Initial::Group_6, Final::Group_I], [Initial::Group_1, Final::Group_V], [Initial::Group_3, Final::Group_V], # For "咯 / lo5" to parse correctly we need to list "Le + O" as valid, [Initial::Group_2 - [Initial::Le], [Final::O]], #Only bo, po, mo and fo are valid -o combinations [Initial::Group_3, [Final::O]], [Initial::Group_4, [Final::O]], [Initial::Group_5, [Final::O]], [Initial::Group_6, [Final::O]], [[Initial::Empty], [Final::Ong]] # TODO: Ong is actually the same as Ueng, in Hanyu Pinyin : -ong or weng ] class << self # # Yields a block for any valid initial/final pair # def valid_combinations( &blk ) return to_enum(__message__) unless block_given? inp = YAML::load(IO.read(File.join(File.dirname(__FILE__), 'data', 'valid_pinyin.yaml'))) inp.each do |final, initials| final = Final.const_get(final) initials.each do |initial, pinyin| initial = Initial.const_get(initial) yield [initial, final] end end end def all_syllables( &blk ) return to_enum(__message__) unless block_given? valid_combinations.map do |i,f| 1.upto(5) do |t| yield Syllable.new(i,f,t,false) yield Syllable.new(i,f,t,true) end end end end end