lib/grc.rb in grc-0.1.1 vs lib/grc.rb in grc-0.1.3
- old
+ new
@@ -4,50 +4,79 @@
# Methods for working with ancient greek in ruby
module Grc
class Error < StandardError; end
+ @std_error = 'ERROR: String does not contain any greek. Summon the muse and try again.'
+
def grc?
!scan(/(\p{Greek})/).empty?
end
def no_downcase_diacritics
+ return @std_error unless grc?
+
tr('ἀἄᾄἂᾂἆᾆᾀἁἅᾅἃᾃἇᾇᾁάάᾴὰᾲᾰᾶᾷᾱᾳἐἔἒἑἕἓέέὲἠἤᾔἢᾒἦᾖᾐἡἥᾕἣᾓἧᾗᾑήήῄὴῂῆῇῃἰἴἲἶἱἵἳἷίίὶῐῖϊϊΐῒῗῑὀὄὂὁὅὃόόὸῤῥὐὔὒὖὑὕὓὗύύὺῠῦϋΰΰΰῢῧῡὠὤᾤὢᾢὦᾦᾠὡὥᾥὣᾣὧᾧᾡώώῴὼῲῶῷῳ',
'ααααααααααααααααααααααααααεεεεεεεεεηηηηηηηηηηηηηηηηηηηηηηηηιιιιιιιιιιιιιιιιιιιοοοοοοοοορρυυυυυυυυυυυυυυυυυυυυωωωωωωωωωωωωωωωωωωωωωωωω')
end
def no_upcase_diacritics
+ return @std_error unless grc?
+
str = self
+ # Adhoc solution for odd combinations of diacritics with capital letters
ars = [[/[́̀͂́́́̀͂]/, ''], [/Α͂/, 'Α'], [/Η͂/, 'Η'], [/Ί|Ὶ|Ι͂|́Ι|̀Ι|͂Ι/, 'Ι'],
- [/Ρ̓/, 'Ρ'], [/ Ὺ| ́Υ|Υ̓|Ύ|Ὺ|Υ͂|́Υ|̀Υ|͂Υ/, 'Υ'], [/͂Ω/, 'Ω']] # Adhoc solution for crazy diacritics with capital letters
+ [/Ρ̓/, 'Ρ'], [/ Ὺ| ́Υ|Υ̓|Ύ|Ὺ|Υ͂|́Υ|̀Υ|͂Υ/, 'Υ'], [/͂Ω/, 'Ω']]
ars.each do |a|
str = str.gsub(/#{a[0]}/, a[1])
end
- str = str.tr('ἈἌἊἎἉἍἋἏΆᾺᾸᾹἘἜἚἙἝἛΈῈἨἬἪἮἩἭἫἯΉῊἸἼἺἾἹἽἻἿΊῚῘΪῙὈὌὊὉὍὋΌῸΡῬὙὝὛὟΎῪῨΫῩὨὬὪὮὩὭὫὯΏῺ',
- 'ΑΑΑΑΑΑΑΑΑΑΑΑΕΕΕΕΕΕΕΕΗΗΗΗΗΗΗΗΗΗΙΙΙΙΙΙΙΙΙΙΙΙΙΟΟΟΟΟΟΟΟΡΡΥΥΥΥΥΥΥΥΥΩΩΩΩΩΩΩΩΩΩ')
+ str.tr('ἈἌἊἎἉἍἋἏΆᾺᾸᾹἘἜἚἙἝἛΈῈἨἬἪἮἩἭἫἯΉῊἸἼἺἾἹἽἻἿΊῚῘΪῙὈὌὊὉὍὋΌῸΡῬὙὝὛὟΎῪῨΫῩὨὬὪὮὩὭὫὯΏῺ',
+ 'ΑΑΑΑΑΑΑΑΑΑΑΑΕΕΕΕΕΕΕΕΗΗΗΗΗΗΗΗΗΗΙΙΙΙΙΙΙΙΙΙΙΙΙΟΟΟΟΟΟΟΟΡΡΥΥΥΥΥΥΥΥΥΩΩΩΩΩΩΩΩΩΩ')
end
def no_diacritics
+ return @std_error unless grc?
+
no_downcase_diacritics.no_upcase_diacritics
end
def tonos_to_oxia
- tr('ΆΈΉΊΎΌΏάέήίΐύΰόώ',
- 'ΆΈΉΊΎΌΏάέήίΐύΰόώ')
+ return @std_error unless grc?
+
+ tr('άΆέΈήΉίΊΐόΌύΎΰώΏ',
+ 'άΆέΈήΉίΊΐόΌύΎΰώΏ')
end
+ def to_oxia
+ return @std_error unless grc?
+
+ tonos_to_oxia
+ end
+
def oxia_to_tonos
- tr('ΆΈΉΊΎΌΏάέήίΐύΰόώ',
- 'ΆΈΉΊΎΌΏάέήίΐύΰόώ')
+ return @std_error unless grc?
+
+ tr('άΆέΈήΉίΊΐόΌύΎΰώΏ',
+ 'άΆέΈήΉίΊΐόΌύΎΰώΏ')
end
+ def to_tonos
+ return @std_error unless grc?
+
+ oxia_to_tonos
+ end
+
def acute_to_grave
+ return @std_error unless grc?
+
tr('ἄᾄἅᾅάάᾴἔἕέέἤᾔἥᾕήήῄἴἵίίΐὄὅόόὔὕύύΰΰὤᾤὥᾥώῴ',
'ἂᾂἃᾃὰὰᾲἒἓὲὲἢᾒἣᾓὴὴῂἲἳὶὶῒὂὃὸὸὒὓὺὺῢῢὢᾢὣᾣὼῲ')
end
def grave_to_acute
+ return @std_error unless grc?
+
tr('ἂᾂἃᾃὰὰᾲἒἓὲὲἢᾒἣᾓὴὴῂἲἳὶὶῒὂὃὸὸὒὓὺὺῢῢὢᾢὣᾣὼῲ',
'ἄᾄἅᾅάάᾴἔἕέέἤᾔἥᾕήήῄἴἵίίΐὄὅόόὔὕύύΰΰὤᾤὥᾥώῴ')
end
def tokenize
@@ -73,10 +102,151 @@
hash[character] = character.dump
end
hash
end
+ def transliterate
+ return @std_error unless grc?
+
+ hash = {
+ ῥ: 'rh',
+ ͱ: '',
+ Ͳ: '',
+ ͳ: '',
+ ʹ: '',
+ "\u0375": '',
+ Ͷ: '',
+ ͷ: '',
+ ͺ: '',
+ ͻ: '',
+ ͼ: '',
+ ͽ: '',
+ Α: 'a',
+ Β: 'b',
+ Γ: 'g',
+ Δ: 'd',
+ Ε: 'e',
+ Ζ: 'z',
+ Η: 'ē',
+ Θ: 'th',
+ Ι: 'i',
+ Κ: 'k',
+ Λ: 'l',
+ Μ: 'm',
+ Ν: 'n',
+ Ξ: 'x',
+ Ο: 'o',
+ Π: 'p',
+ Ρ: 'r',
+ Σ: 's',
+ Τ: 't',
+ Υ: 'y',
+ Φ: 'ph',
+ Χ: 'ch',
+ Ψ: 'ps',
+ Ω: 'ō',
+ α: 'a',
+ β: 'b',
+ γ: 'g',
+ δ: 'd',
+ ε: 'e',
+ ζ: 'z',
+ η: 'ē',
+ θ: 'th',
+ ι: 'i',
+ κ: 'k',
+ λ: 'l',
+ μ: 'm',
+ ν: 'n',
+ ξ: 'x',
+ ο: 'o',
+ π: 'p',
+ ρ: 'r',
+ ς: 's',
+ σ: 's',
+ τ: 't',
+ υ: 'y',
+ φ: 'ph',
+ χ: 'ch',
+ ψ: 'ps',
+ ω: 'ō',
+ Ϗ: '',
+ ϐ: '',
+ ϑ: '',
+ ϒ: '',
+ ϓ: '',
+ ϔ: '',
+ ϕ: '',
+ ϖ: '',
+ ϗ: '',
+ Ϙ: '',
+ ϙ: '',
+ Ϛ: '',
+ ϛ: '',
+ Ϝ: '',
+ ϝ: '',
+ Ϟ: '',
+ ϟ: '',
+ Ϡ: '',
+ ϡ: '',
+ Ϣ: '',
+ ϣ: '',
+ Ϥ: '',
+ ϥ: '',
+ Ϧ: '',
+ ϧ: '',
+ Ϩ: '',
+ ϩ: '',
+ Ϫ: '',
+ ϫ: '',
+ Ϭ: '',
+ ϭ: '',
+ Ϯ: '',
+ ϯ: '',
+ ϰ: '',
+ ϱ: '',
+ ϲ: '',
+ ϳ: '',
+ ϴ: '',
+ ϵ: '',
+ "\u03F6": '',
+ Ϸ: '',
+ ϸ: '',
+ Ϲ: '',
+ Ϻ: '',
+ ϻ: '',
+ ϼ: '',
+ Ͻ: '',
+ Ͼ: '',
+ Ͽ: '',
+ gg: 'ng',
+ gk: 'nk',
+ gx: 'nx',
+ gc: 'nc',
+ "\u{0314}": 'rh',
+ rr: 'rrh',
+ ay: 'au',
+ ey: 'eu',
+ ēy: 'ēu',
+ oy: 'ou',
+ yi: 'ui'
+ }
+ result = []
+ str = self
+ str.split.each do |word|
+ result << if word.grc?
+ the_word = word.gsub(/ῥ/, 'rh')
+ the_word = the_word =~ /[ἁἅᾅἃᾃἇᾇᾁἑἕἓἡἥᾕἣᾓἧᾗᾑἱἵἳἷὁὅὃὑὕὓὗὡὥᾥὣᾣὧᾧᾡ]/ ? "h#{the_word.no_diacritics}" : the_word.no_diacritics
+ hash.each { |k, v| the_word = the_word.gsub(/#{k}/, v) }
+ the_word
+ else
+ word
+ end
+ end
+ result.join(' ')
+ end
+
def nfc
unicode_normalize(:nfc)
end
def nfd
@@ -84,6 +254,6 @@
end
end
class String
include Grc
-end
\ No newline at end of file
+end