lib/ting/groundwork.rb in ting-0.3.0 vs lib/ting/groundwork.rb in ting-0.9.0
- old
+ new
@@ -1,177 +1,181 @@
-# Classes and constants used throughout the module
-# * Initial
-# * Final
-# * TonelessSyllable
-# * Syllable
-# * ILLEGAL_COMBINATIONS
-
-module Ting
-
- #
- # A Chinese initial (start of a syllable)
- #
-
- class Initial
- attr :name
-
- def initialize(n) ; @name=n ; end
-
- All = %w(
- Empty Bo Po Mo Fo De Te Ne Le Ge Ke He
- Ji Qi Xi Zhi Chi Shi Ri Zi Ci Si
- ).map{|c| const_set c, Initial.new(c)}
-
- class <<self
- private :new
- end
-
- Groups=[
- Group_0=[ Empty ],
- Group_1=[ Bo,Po,Mo,Fo], #Bilabial and Labio-dental
- Group_2=[ De,Te,Ne,Le ], #Plosive, nasal and lateral approximant alveolar
- Group_3=[ Ge,Ke,He ], #Velar
- Group_4=[ Ji,Qi,Xi ], #Alveolo-palatal
- Group_5=[ Zhi,Chi,Shi,Ri ], #Retroflex
- Group_6=[ Zi,Ci,Si ], #Fricative and affricate alveolar
- ]
-
- def +(f)
- TonelessSyllable.new(self,f)
- end
-
- def inspect() ; "<#{self.class.name}::#{@name}>" ; end
- end
-
-
- #
- # A Chinese final (end of a syllable)
- #
-
- class Final
- attr :name
-
- def initialize(n) ; @name=n ; end
-
- All=%w(
- Empty A O E Ee Ai Ei Ao Ou An En Ang Eng Ong Er
- I Ia Io Ie Iai Iao Iu Ian In Iang Ing
- U Ua Uo Uai Ui Uan Un Uang Ueng V Ue Van Vn Iong
- ).map{|c| const_set c, Final.new(c)}
-
- class <<self ; private :new ; end
-
- Groups=[
- Group_0=[ Empty ],
- Group_A=[ A,O,E,Ee,Ai,Ei,Ao,Ou,An,En,Ang,Eng,Ong,Er ],
- Group_I=[ I,Ia,Io,Ie,Iai,Iao,Iu,Ian,In,Iang,Ing ],
- Group_U=[ U,Ua,Uo,Uai,Ui,Uan,Un,Uang,Ueng ],
- Group_V=[ V,Ue,Van,Vn,Iong]
- ]
-
- def inspect() ; "<#{self.class.name}::#{name}>" ; end
- end
-
-
- #
- # Combination of an initial and a final
- # Not to be confused with a syllable that has the neutral tone
- #
-
- class TonelessSyllable
- attr_accessor :initial, :final
-
- def initialize(initial, final)
- self.initial = initial
- self.final = final
- end
-
- def +(tone)
- Syllable.new(initial, final, tone)
- end
-
- def inspect
- "<#{self.class.name} <initial=#{initial.name}, final=#{final.name}>>"
- end
-
- def self.illegal?(i,f)
- ILLEGAL_COMBINATIONS.any? {|in_gr, fin_gr| in_gr.include?(i) && fin_gr.include?(f)}
- end
-
- alias :to_s :inspect
- end
-
-
- #
- # Syllable : initial, final and tone
- #
-
- class Syllable < TonelessSyllable
- attr_accessor :tone
-
- def initialize(initial, final, tone)
- super(initial, final)
- self.tone = tone
- end
-
- def inspect
- "<#{self.class.name} <initial=#{initial.name}, final=#{final.name}, tone=#{tone}>>"
- end
-
- alias :to_s :inspect
- end
-
-
- #
- # Some groups of initials and finals may not be combined
- # This list is not exhaustive but is sufficient to resolve ambiguity
- #
-
- ILLEGAL_COMBINATIONS=
- [
- [Initial::Group_0, Final::Group_0],
- [Initial::Group_1, Final::Group_0],
- [Initial::Group_2, Final::Group_0],
- [Initial::Group_3, Final::Group_0],
- [Initial::Group_4, Final::Group_0],
-
- [Initial::Group_4, Final::Group_U],
- [Initial::Group_4, Final::Group_A],
-
- [Initial::Group_3, Final::Group_I],
- [Initial::Group_5, Final::Group_I],
- [Initial::Group_6, Final::Group_I],
-
- [Initial::Group_1, Final::Group_V],
- [Initial::Group_3, Final::Group_V],
-
- # For "咯 / lo5" to parse correctly we need to list "Le + O" as valid,
- [Initial::Group_2 - [Initial::Le], [Final::O]], #Only bo, po, mo and fo are valid -o combinations
- [Initial::Group_3, [Final::O]],
- [Initial::Group_4, [Final::O]],
- [Initial::Group_5, [Final::O]],
- [Initial::Group_6, [Final::O]],
-
- [[Initial::Empty], [Final::Ong]]
- # TODO: Ong is actually the same as Ueng, in Hanyu Pinyin : -ong or weng
- ]
-
- class <<self
-
- #
- # Yields a block for any valid initial/final pair
- #
-
- def valid_combinations
- require 'yaml'
- inp = YAML::load(IO.read(File.join(File.dirname(__FILE__), 'data', 'valid_pinyin.yaml')))
- inp.each do |final, initials|
- final = Final.const_get(final)
- initials.each do |initial, pinyin|
- initial = Initial.const_get(initial)
- yield(initial, final)
- end
- end
- end
-
- end
-end
+# -*- coding: utf-8 -*-
+# Classes and constants used throughout the module
+# * Initial
+# * Final
+# * Syllable
+# * ILLEGAL_COMBINATIONS
+
+require 'yaml'
+
+module Ting
+
+ #
+ # A Chinese initial (start of a syllable)
+ #
+
+ class Initial
+ attr :name
+
+ def initialize(n) ; @name=n ; end
+
+ All = %w(
+ Empty Bo Po Mo Fo De Te Ne Le Ge Ke He
+ Ji Qi Xi Zhi Chi Shi Ri Zi Ci Si
+ ).map{|c| const_set c, Initial.new(c)}
+
+ class << self
+ private :new
+ include Enumerable
+ def each(&blk) ; All.each(&blk) ; end
+ end
+
+ Groups=[
+ Group_0=[ Empty ],
+ Group_1=[ Bo,Po,Mo,Fo], #Bilabial and Labio-dental
+ Group_2=[ De,Te,Ne,Le ], #Plosive, nasal and lateral approximant alveolar
+ Group_3=[ Ge,Ke,He ], #Velar
+ Group_4=[ Ji,Qi,Xi ], #Alveolo-palatal
+ Group_5=[ Zhi,Chi,Shi,Ri ], #Retroflex
+ Group_6=[ Zi,Ci,Si ], #Fricative and affricate alveolar
+ ]
+
+ def +(f)
+ Syllable.new(self,f)
+ end
+
+ def inspect() ; "<#{self.class.name}::#{@name}>" ; end
+ end
+
+
+ #
+ # A Chinese final (end of a syllable)
+ #
+
+ class Final
+ attr :name
+
+ def initialize(n) ; @name=n ; end
+
+ All=%w(
+ Empty A O E Ee Ai Ei Ao Ou An En Ang Eng Ong Er
+ I Ia Io Ie Iai Iao Iu Ian In Iang Ing
+ U Ua Uo Uai Ui Uan Un Uang Ueng V Ue Van Vn Iong
+ ).map{|c| const_set c, Final.new(c)}
+
+ class << self
+ private :new
+ include Enumerable
+ def each(&blk) ; All.each(&blk) ; end
+ end
+
+ Groups=[
+ Group_0=[ Empty ],
+ Group_A=[ A,O,E,Ee,Ai,Ei,Ao,Ou,An,En,Ang,Eng,Ong,Er ],
+ Group_I=[ I,Ia,Io,Ie,Iai,Iao,Iu,Ian,In,Iang,Ing ],
+ Group_U=[ U,Ua,Uo,Uai,Ui,Uan,Un,Uang,Ueng ],
+ Group_V=[ V,Ue,Van,Vn,Iong]
+ ]
+
+ def inspect() ; "<#{self.class.name}::#{name}>" ; end
+ end
+
+
+ #
+ # Combination of an initial and a final, a tone, and possible capitalization
+ # A tone of 'nil' means the tone is not specified
+
+ class Syllable
+ attr_accessor :initial, :final, :tone, :capitalized
+
+ def initialize(initial, final, tone = nil, capitalized = false)
+ self.initial = initial
+ self.final = final
+ self.tone = tone
+ self.capitalized = capitalized
+ end
+
+ def +(tone)
+ self.class.new(self.initial, self.final, tone, self.capitalized)
+ end
+
+ def inspect
+ "<#{self.class.name} <initial=#{initial.name}, final=#{final.name}, tone=#{tone}#{', capitalized' if capitalized}>>"
+ end
+
+ alias :capitalized? :capitalized
+
+ def self.illegal?(i,f)
+ ILLEGAL_COMBINATIONS.any? {|in_gr, fin_gr| in_gr.include?(i) && fin_gr.include?(f)}
+ end
+
+ alias :to_s :inspect
+
+ def ==( other )
+ [ other.initial, other.final, other.tone, other.capitalized ] ==
+ [ self.initial, self.final, self.tone, self.capitalized ]
+ end
+ end
+
+ #
+ # Some groups of initials and finals may not be combined
+ # This list is not exhaustive but is sufficient to resolve ambiguity
+ #
+
+ ILLEGAL_COMBINATIONS=
+ [
+ [Initial::Group_0, Final::Group_0],
+ [Initial::Group_1, Final::Group_0],
+ [Initial::Group_2, Final::Group_0],
+ [Initial::Group_3, Final::Group_0],
+ [Initial::Group_4, Final::Group_0],
+
+ [Initial::Group_4, Final::Group_U],
+ [Initial::Group_4, Final::Group_A],
+
+ [Initial::Group_3, Final::Group_I],
+ [Initial::Group_5, Final::Group_I],
+ [Initial::Group_6, Final::Group_I],
+
+ [Initial::Group_1, Final::Group_V],
+ [Initial::Group_3, Final::Group_V],
+
+ # For "咯 / lo5" to parse correctly we need to list "Le + O" as valid,
+ [Initial::Group_2 - [Initial::Le], [Final::O]], #Only bo, po, mo and fo are valid -o combinations
+ [Initial::Group_3, [Final::O]],
+ [Initial::Group_4, [Final::O]],
+ [Initial::Group_5, [Final::O]],
+ [Initial::Group_6, [Final::O]],
+
+ [[Initial::Empty], [Final::Ong]]
+ # TODO: Ong is actually the same as Ueng, in Hanyu Pinyin : -ong or weng
+ ]
+
+ class << self
+
+ #
+ # Yields a block for any valid initial/final pair
+ #
+
+ def valid_combinations( &blk )
+ return to_enum(__message__) unless block_given?
+ inp = YAML::load(IO.read(File.join(File.dirname(__FILE__), 'data', 'valid_pinyin.yaml')))
+ inp.each do |final, initials|
+ final = Final.const_get(final)
+ initials.each do |initial, pinyin|
+ initial = Initial.const_get(initial)
+ yield [initial, final]
+ end
+ end
+ end
+
+ def all_syllables( &blk )
+ return to_enum(__message__) unless block_given?
+ valid_combinations.map do |i,f|
+ 1.upto(5) do |t|
+ yield Syllable.new(i,f,t,false)
+ yield Syllable.new(i,f,t,true)
+ end
+ end
+ end
+ end
+end