lib/grc.rb in grc-0.1.1 vs lib/grc.rb in grc-0.1.3

- old
+ new

@@ -4,50 +4,79 @@ # Methods for working with ancient greek in ruby module Grc class Error < StandardError; end + @std_error = 'ERROR: String does not contain any greek. Summon the muse and try again.' + def grc? !scan(/(\p{Greek})/).empty? end def no_downcase_diacritics + return @std_error unless grc? + tr('ἀἄᾄἂᾂἆᾆᾀἁἅᾅἃᾃἇᾇᾁάάᾴὰᾲᾰᾶᾷᾱᾳἐἔἒἑἕἓέέὲἠἤᾔἢᾒἦᾖᾐἡἥᾕἣᾓἧᾗᾑήήῄὴῂῆῇῃἰἴἲἶἱἵἳἷίίὶῐῖϊϊΐῒῗῑὀὄὂὁὅὃόόὸῤῥὐὔὒὖὑὕὓὗύύὺῠῦϋΰΰΰῢῧῡὠὤᾤὢᾢὦᾦᾠὡὥᾥὣᾣὧᾧᾡώώῴὼῲῶῷῳ', 'ααααααααααααααααααααααααααεεεεεεεεεηηηηηηηηηηηηηηηηηηηηηηηηιιιιιιιιιιιιιιιιιιιοοοοοοοοορρυυυυυυυυυυυυυυυυυυυυωωωωωωωωωωωωωωωωωωωωωωωω') end def no_upcase_diacritics + return @std_error unless grc? + str = self + # Adhoc solution for odd combinations of diacritics with capital letters ars = [[/[́̀͂́́́̀͂]/, ''], [/Α͂/, 'Α'], [/Η͂/, 'Η'], [/Ί|Ὶ|Ι͂|́Ι|̀Ι|͂Ι/, 'Ι'], - [/Ρ̓/, 'Ρ'], [/ Ὺ| ́Υ|Υ̓|Ύ|Ὺ|Υ͂|́Υ|̀Υ|͂Υ/, 'Υ'], [/͂Ω/, 'Ω']] # Adhoc solution for crazy diacritics with capital letters + [/Ρ̓/, 'Ρ'], [/ Ὺ| ́Υ|Υ̓|Ύ|Ὺ|Υ͂|́Υ|̀Υ|͂Υ/, 'Υ'], [/͂Ω/, 'Ω']] ars.each do |a| str = str.gsub(/#{a[0]}/, a[1]) end - str = str.tr('ἈἌἊἎἉἍἋἏΆᾺᾸᾹἘἜἚἙἝἛΈῈἨἬἪἮἩἭἫἯΉῊἸἼἺἾἹἽἻἿΊῚῘΪῙὈὌὊὉὍὋΌῸΡῬὙὝὛὟΎῪῨΫῩὨὬὪὮὩὭὫὯΏῺ', - 'ΑΑΑΑΑΑΑΑΑΑΑΑΕΕΕΕΕΕΕΕΗΗΗΗΗΗΗΗΗΗΙΙΙΙΙΙΙΙΙΙΙΙΙΟΟΟΟΟΟΟΟΡΡΥΥΥΥΥΥΥΥΥΩΩΩΩΩΩΩΩΩΩ') + str.tr('ἈἌἊἎἉἍἋἏΆᾺᾸᾹἘἜἚἙἝἛΈῈἨἬἪἮἩἭἫἯΉῊἸἼἺἾἹἽἻἿΊῚῘΪῙὈὌὊὉὍὋΌῸΡῬὙὝὛὟΎῪῨΫῩὨὬὪὮὩὭὫὯΏῺ', + 'ΑΑΑΑΑΑΑΑΑΑΑΑΕΕΕΕΕΕΕΕΗΗΗΗΗΗΗΗΗΗΙΙΙΙΙΙΙΙΙΙΙΙΙΟΟΟΟΟΟΟΟΡΡΥΥΥΥΥΥΥΥΥΩΩΩΩΩΩΩΩΩΩ') end def no_diacritics + return @std_error unless grc? + no_downcase_diacritics.no_upcase_diacritics end def tonos_to_oxia - tr('ΆΈΉΊΎΌΏάέήίΐύΰόώ', - 'ΆΈΉΊΎΌΏάέήίΐύΰόώ') + return @std_error unless grc? + + tr('άΆέΈήΉίΊΐόΌύΎΰώΏ', + 'άΆέΈήΉίΊΐόΌύΎΰώΏ') end + def to_oxia + return @std_error unless grc? + + tonos_to_oxia + end + def oxia_to_tonos - tr('ΆΈΉΊΎΌΏάέήίΐύΰόώ', - 'ΆΈΉΊΎΌΏάέήίΐύΰόώ') + return @std_error unless grc? + + tr('άΆέΈήΉίΊΐόΌύΎΰώΏ', + 'άΆέΈήΉίΊΐόΌύΎΰώΏ') end + def to_tonos + return @std_error unless grc? + + oxia_to_tonos + end + def acute_to_grave + return @std_error unless grc? + tr('ἄᾄἅᾅάάᾴἔἕέέἤᾔἥᾕήήῄἴἵίίΐὄὅόόὔὕύύΰΰὤᾤὥᾥώῴ', 'ἂᾂἃᾃὰὰᾲἒἓὲὲἢᾒἣᾓὴὴῂἲἳὶὶῒὂὃὸὸὒὓὺὺῢῢὢᾢὣᾣὼῲ') end def grave_to_acute + return @std_error unless grc? + tr('ἂᾂἃᾃὰὰᾲἒἓὲὲἢᾒἣᾓὴὴῂἲἳὶὶῒὂὃὸὸὒὓὺὺῢῢὢᾢὣᾣὼῲ', 'ἄᾄἅᾅάάᾴἔἕέέἤᾔἥᾕήήῄἴἵίίΐὄὅόόὔὕύύΰΰὤᾤὥᾥώῴ') end def tokenize @@ -73,10 +102,151 @@ hash[character] = character.dump end hash end + def transliterate + return @std_error unless grc? + + hash = { + ῥ: 'rh', + ͱ: '', + Ͳ: '', + ͳ: '', + ʹ: '', + "\u0375": '', + Ͷ: '', + ͷ: '', + ͺ: '', + ͻ: '', + ͼ: '', + ͽ: '', + Α: 'a', + Β: 'b', + Γ: 'g', + Δ: 'd', + Ε: 'e', + Ζ: 'z', + Η: 'ē', + Θ: 'th', + Ι: 'i', + Κ: 'k', + Λ: 'l', + Μ: 'm', + Ν: 'n', + Ξ: 'x', + Ο: 'o', + Π: 'p', + Ρ: 'r', + Σ: 's', + Τ: 't', + Υ: 'y', + Φ: 'ph', + Χ: 'ch', + Ψ: 'ps', + Ω: 'ō', + α: 'a', + β: 'b', + γ: 'g', + δ: 'd', + ε: 'e', + ζ: 'z', + η: 'ē', + θ: 'th', + ι: 'i', + κ: 'k', + λ: 'l', + μ: 'm', + ν: 'n', + ξ: 'x', + ο: 'o', + π: 'p', + ρ: 'r', + ς: 's', + σ: 's', + τ: 't', + υ: 'y', + φ: 'ph', + χ: 'ch', + ψ: 'ps', + ω: 'ō', + Ϗ: '', + ϐ: '', + ϑ: '', + ϒ: '', + ϓ: '', + ϔ: '', + ϕ: '', + ϖ: '', + ϗ: '', + Ϙ: '', + ϙ: '', + Ϛ: '', + ϛ: '', + Ϝ: '', + ϝ: '', + Ϟ: '', + ϟ: '', + Ϡ: '', + ϡ: '', + Ϣ: '', + ϣ: '', + Ϥ: '', + ϥ: '', + Ϧ: '', + ϧ: '', + Ϩ: '', + ϩ: '', + Ϫ: '', + ϫ: '', + Ϭ: '', + ϭ: '', + Ϯ: '', + ϯ: '', + ϰ: '', + ϱ: '', + ϲ: '', + ϳ: '', + ϴ: '', + ϵ: '', + "\u03F6": '', + Ϸ: '', + ϸ: '', + Ϲ: '', + Ϻ: '', + ϻ: '', + ϼ: '', + Ͻ: '', + Ͼ: '', + Ͽ: '', + gg: 'ng', + gk: 'nk', + gx: 'nx', + gc: 'nc', + "\u{0314}": 'rh', + rr: 'rrh', + ay: 'au', + ey: 'eu', + ēy: 'ēu', + oy: 'ou', + yi: 'ui' + } + result = [] + str = self + str.split.each do |word| + result << if word.grc? + the_word = word.gsub(/ῥ/, 'rh') + the_word = the_word =~ /[ἁἅᾅἃᾃἇᾇᾁἑἕἓἡἥᾕἣᾓἧᾗᾑἱἵἳἷὁὅὃὑὕὓὗὡὥᾥὣᾣὧᾧᾡ]/ ? "h#{the_word.no_diacritics}" : the_word.no_diacritics + hash.each { |k, v| the_word = the_word.gsub(/#{k}/, v) } + the_word + else + word + end + end + result.join(' ') + end + def nfc unicode_normalize(:nfc) end def nfd @@ -84,6 +254,6 @@ end end class String include Grc -end \ No newline at end of file +end