Sha256: b96537c69ec963c0b02d0455c33ab761475a9569fcf70300c0d6a54157dc2dfc
Contents?: true
Size: 1.61 KB
Versions: 3
Compression:
Stored size: 1.61 KB
Contents
module Pascoale class SyllableSeparator include Constants ONSET = "(?:ch|lh|nh|gu|qu|[pbtdcgfv][lr]|[#{CONSONANTS}])" # Still in doubt if we should add suffixes to the "i" semivowel... # it slightly improves the the matches, but some of them causes more # noise than fix things =\ #NUCLEUS = "(?:ãe|ão|õe|[#{VOWELS}](?:u|i(?!nh|r$|m$|dora?$|ção$|dade$))?)" NUCLEUS = "(?:ãe|ão|õe|[#{VOWELS}](?:u|i(?!nh|r$|m$|ção$|dora?$))?)" CODA = "[#{CONSONANTS}]" # The concept of "rhyme" does not help in this algorithm. It seems the # concept makes no sense for syllable separation in portuguese KERNEL = "#{ONSET}?#{NUCLEUS}" def initialize(word) @word = word end def separated rest = @word result = [] while rest && rest.size > 0 if rest =~ /^(#{KERNEL})(?:(#{KERNEL})|(#{CODA})(#{KERNEL})|(#{CODA}#{CODA})(#{KERNEL})|(#{CODA}#{CODA})|(#{CODA}))?(.*)$/ result << $1 + $3.to_s + $5.to_s + $7.to_s + $8.to_s rest = $2.to_s + $4.to_s + $6.to_s + $9.to_s # Special case! Hate them :( # Pneu, Gnomo, Mnemônica, Pseudônimo elsif result.size == 0 if rest =~ /^([#{CONSONANTS}]#{KERNEL})(?:(#{KERNEL})|(#{CODA})(#{KERNEL})|(#{CODA}#{CODA})(#{KERNEL})|(#{CODA}#{CODA})|(#{CODA}))?(.*)$/ result << $1 + $3.to_s + $5.to_s + $7.to_s + $8.to_s rest = $2.to_s + $4.to_s + $6.to_s + $9.to_s end else raise %(Cannot separate "#{@word}". No rule match next syllable at "#{result.join('')}|>#{rest}") end end result end end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
pascoale-0.2.1 | lib/pascoale/syllable_separator.rb |
pascoale-0.2.0 | lib/pascoale/syllable_separator.rb |
pascoale-0.1.0 | lib/pascoale/syllable_separator.rb |