# coding: utf-8
require "fonemas/version"

module Fonemas
  require 'unicode_utils'

  def self.version
    VERSION
  end

  def self.clean(text)
    s = text.gsub(/,/,' ')
    s = s.gsub(/\s+/,' ')
    s = s.chomp.strip
    s = downcase(s)
    return s
  end
  
  def self.downcase(text)
    UnicodeUtils.downcase(text)
  end

  def self.lastVocal(word,from)
    #puts "last vocal for #{word} from #{from}"
    for i in 1..from
     # puts i
      #puts word[from-i]
      if word[from-i] =~ /[aeiou]/
        return word[from-i]
      end
    end
    return false
  end

  def self.silabar(palabra)
    #puts "silabar: #{palabra}"
    #algoritmo adaptado desde python
    #codigo original extraido desde:
    #https://github.com/xergio/silabas/blob/master/home/silabea.py
    silabas = []
    letra = 0
    salto = 0
    while silabas.join('').length  < palabra.length
      #puts "silabas antes: #{silabas}"
      #puts "letra: #{letra}"
      #puts "palabra length: #{palabra.length}"
      silaba = ''
      salto = 0
      if isConsonante(palabra[letra])
        if isInseparables(palabra[letra..letra+1])
          salto += 2
        else
          salto += 1
        end
      else
        salto += 0
      end

      #puts "salto: #{salto}"
      if isDiptongoConH(palabra,letra+salto,letra+salto+2)
        #puts "diptongo con h"
        salto += 3
      elsif isDiptongo(palabra,letra+salto,letra+salto+1)
        salto += 2
      elsif isTriptongo(palabra,letra+salto,letra+salto+2)
        salto += 3
      elsif isDieresis(palabra,letra+salto,letra+salto+1)
        salto += 2
      else
        salto += 1
      end
      #puts "acoda silaba: #{palabra[letra,letra+salto]} letra: #{letra} salto: #{salto}"

      salto += coda(palabra[letra+salto,palabra.length])

      #puts "dcoda silaba: #{palabra[letra,letra+salto]} letra: #{letra} salto: #{salto}"


      silaba = palabra[letra,salto]
      letra += salto
      silabas << silaba

      #puts "Dletra: #{letra}"
      #puts "Dsalto: #{salto}"

    end
    return silabas.join("-")
  end

  def self.isInseparables(trozo)
    #puts "isInspearable? #{trozo}"
    inseparables = %w(br bl cr cl dr fr fl gr gl kr ll pr pl tr rr ch)
    return inseparables.include? trozo
  end

  def self.coda(trozo)
    return 0 if trozo.nil?
    #puts "coda: #{trozo}"
    l = trozo.length
    if l == 0
      return 0
    elsif l == 1 and isConsonante(trozo)
      return 1
    elsif l > 1 and isInseparables(trozo[0,2])
      return 0
    elsif l > 1 and isConsonante(trozo,0) and isVocal(trozo,1)
      return 0
    elsif l > 2 and isConsonante(trozo,0) and isConsonante(trozo,1) and isVocal(trozo,2)
      return 1
    elsif l > 3 and isConsonante(trozo,0) and isInseparables(trozo[1,2]) and isVocal(trozo[3])
      return 1
    elsif l > 3 and isConsonante(trozo,0) and isConsonante(trozo,1) and isConsonante(trozo,2) and isVocal(trozo,3)
      return 2
    elsif l > 3 and isConsonante(trozo,0) and isConsonante(trozo,1) and isConsonante(trozo,2) and isConsonante(trozo,3)
      return 2
    else
      return 0
    end
  end


  def self.calcularPosicionSilabas(silabada)
    #puts "calcular posicion #{silabada}."
    output = []
    text = silabada
    while(!text.index("-").nil?)
      i = text.index("-")
      text = text.slice(0,i) + text.slice(i+1,text.length)
      output << i
    end
    return output
  end


  def self.isTonica(word,i)
    test = _isTonica(word,i)
    if test
      if _isTonica(word,i+1)
        return false
      else
        return test
      end
    else
      return false
    end
  end


  def self._isTonica(word,i)
    return false if isConsonante(word,i)
    #falta considerar las palabras que poseen acento pero no tilde
    tildes = %w(á é í ó ú ã ä ë)
    w = word.join
    #puts "isTonica? #{w}: #{i}"
    return true if w.size == 1


    if tildes.include? w[i]
      return true
    else
      g = silabar(w)
      hh = g.split("-")
      p = calcularPosicionSilabas(g)

      if hh.size == 1 and w.size > 4 and w.include? 'h' and w[0] != 'h'
        #caso johan
        p = w.index('h')
        if i < p
          return true
        else
          return false
        end
      end

      #puts hh.size
      if w =~ /[áéíóúãäë]/
        #acento ya existe en otra silaba
        return false
      else
        #puts es.visualize(w)
        if hh.size == 1
          if lastVocal(w,w.size-1) == word[i]
            return true
          else
            return false
          end
          #monosilabos
        elsif hh.size == 2
            #agudas, se acentuan en n,s o vocal
            #puts "#{word[i]} #{i}<#{p[0]} - #{lastVocal(w,p[0])}"
            if w =~ /[nsaeiou]$/
              #termina en n s y vocal y no tiene tilde
              #por lo tanto es grave
            #  puts "#{lastVocal(w,p[0])} == #{word[i]} #{word[i].class.name}"
              if i < p[0] and lastVocal(w,p[0]) == word[i]
                return true
              else
                return false
              end
            else
              if i < p[0]
                return false
              else
                return lastVocal(w,w.size) == word[i]
              end
            end
        elsif hh.size >= 3
          #puts hh.join("-")
          #puts "hhsize3 i: #{i}, p:#{p}"
          if i >= p[p.size-1]
            if w =~ /[nsaeiou]$/
              return false
            else
              return true
            end
          elsif i > p[p.size-2] and i <= p[p.size-1] and w =~ /[nsaeiou]$/
            return true
          else
            return false
          end
        end
      end


      return false
    end
  end

  def self.isFinal(word,i)
    return word.size == i-1
  end

  def self.isFricativa(word,i)
    fricativas = %w(f s c z j ll y g b w b v w s m b x d)
    return fricativas.include? word[i]
  end

  def self.entreVocales(word,i)
    if i == 0 or word.size - 1 == i
      return false
    else
      return (isVocal(word,i-1) and isVocal(word,i+1))
    end
  end

  def self.entreVocalyConsonante(word,i)
    return ((isVocal(word,i-1) and !isVocal(word,i+1)) or (isVocal(word,i+1) and !isVocal(word,i-1)))

  end

  def self.isVocal(word,i=0)
    vocales = %w(a e i o u á é í ó ú)
    return vocales.include? word[i]
  end

  def self.isConsonante(word,i=0)
    return !isVocal(word,i)
  end

  def self.isTriptongo(palabra,first,third)
    t = palabra[first,third]
    return false if t.length < 3
    triptongos = %w(iai iei uai uei uau iau uay uey)
    return triptongos.include? t
  end

  def self.isDieresis(palabra,first,second)
    t = palabra[first,second]
    return false if t.length < 2
    dieresis = %w(ue ui)
    return dieresis.include? t

  end

  def self.isDiptongo(word,first,second)
    trozo = word[first..second]
    return false if trozo.length != 2
    #puts "diptongo word #{word}, first: #{first}, second: #{second}"
    #puts "test diptongo #{word[first] + word[second]}"
    f = word[first]
    s = word[second]
    abiertas = %w(a e o á é ó)
    cerradas = %w(i u í ú)
    return ((abiertas.include? f and cerradas.include? s) or (abiertas.include? s and cerradas.include? f) or (cerradas.include? f and cerradas.include? s))

  end

  def self.isDiptongoConH(word,first,third)
    test = word[first..third]
    #puts "test diptongo con h: #{test}"
    if test[1] == 'h'
      if test[2,2] == 'ue'
        return false
      else
        test = test.gsub(/h/,'')
      end
    else
      return false
    end
    return isDiptongo(test,0,1)
  end

  def self.separar(word)
    word = downcase(word)
    output = []
    i = 0
    while(i < word.length)
      if word[i] == 'c' and word[i+1] == 'h'
        output << "ch"
        i+=1
      elsif word[i] == 'l' and word[i+1] == 'l'
        output << 'll'
        i+=1
      elsif word[i] == 'r' and word[i+1] == 'r'
        output << 'rr'
        i+=1
      else
        output << word[i]
      end
      i +=1
    end
    return output
  end

  def self.fonemaLetra(letra)
    case letra
      when 'a','á' then ['aa']
      when 'b' then ['b ee']
      when 'c' then ['s ee']
      when 'd' then ['d ee']
      when 'e','é' then ['ee']
      when 'f' then ['ee f ee']
      when 'g' then ['g ee']
      when 'h' then ['aa ch e']
      when 'i','í' then ['ii']
      when 'j' then ['j oo t a']
      when 'k' then ['k aa']
      when 'l' then ['ee l e']
      when 'm' then ['ee m e']
      when 'n' then ['ee n e']
      when 'ñ' then ['ee nh e']
      when 'o','ó' then ['oo']
      when 'p' then ['p ee']
      when 'q' then ['k uu']
      when 'r' then ['ee rr ee','ee r ee']
      when 's' then ['ee s e']
      when 't' then ['t ee']
      when 'u','ú' then ['uu']
      when 'v' then ['b ee','uu b e']
      when 'w' then ['d o b l e b ee','d o b l e uu b e']
      when 'x' then ['ee k i s']
      when 'y' then ['ll ee']
      when 'z' then ['s ee t a']
      else
        raise "error, no conozco pronunciación de #{letra}"
    end
  end


  def self.fonemas(word)
    word = word.gsub(/'/,'')
    if word.size == 1
      return fonemaLetra(word)
    end
    if word.include?('_')
      output = []
      for a in word.split('_')
        if a.size > 0
          output << Fonemas.fonemas(a)
        end
      end
      return [output.join(" ")]
    end
    word = separar(word)
    fonema = []
    for i in 0..(word.length-1)
      letra = word[i]
      case letra
        when 'a','á','ä','ã' then
          if isTonica(word,i)
            fonema << 'aa'
          else
            fonema << 'a'
          end
        when 'b','v' then
          if isVocal(word,i-1) and (word[i+1] == 'b' or word[i+1] == 'v')
            fonema << ['bb','']
          elsif i == 0 and isVocal(word,i+1)
            if word[i+1] == 'u' and isDiptongo(word,i+1,i+2)
              fonema << ['bb','b','g']
            else
              fonema << ['bb','b']
            end
          elsif word[i+1] == 'u' and isDiptongo(word,i+1,i+2)
            if entreVocales(word,i)
              fonema << ['b','g','']
            else
              fonema << ['bb','g']
            end
          elsif isFricativa(word,i-1)
            fonema << 'b'
          elsif isFinal(word,i)
            fonema << 'b'
          elsif entreVocales(word,i)
            fonema << ['b','']
          else
            fonema << 'bb'
          end
        when 'c' then
          if word[i+1] == 'e' or word[i+1] == 'i'
            fonema << 's'
          else
            fonema << 'k'
          end
        when 'ch' then
          if entreVocales(word,i)
            #fonema << ['ch','sh','tch','j']
            fonema << 'ch'
          else
            fonema << 'ch'
            #fonema << ['ch','sh','tch']
          end
        when 'd' then
          if i == 0 and isVocal(word,i+1)
            fonema << ['dd','d']
          elsif entreVocales(word,i) or i == word.size-1
            fonema << ['d','']
          elsif entreVocalyConsonante(word,i)
            fonema << ['dd','d']
          else
            fonema << 'd'
          end
        when 'e','é','ë' then
          if isTonica(word,i)
            fonema << 'ee'
          else
            fonema << 'e'
          end
        when 'f' then
          fonema << 'f'
        when 'g' then
          if word[i+1] == 'u' and i == 0 and isTonica(word,i+2)
            #nada

          elsif word[i+1] == 'e' or word[i+1] == 'i'
            fonema << 'j'
          else
            if !entreVocales(word,i) and word[i-1] != 'n'
              fonema << 'gg'
            else
              fonema << 'g'
            end
          end
        when 'h' then
          if word[i+1] == 'u' and isDiptongo(word,i+1,i+2)
            fonema << ['','g']
          elsif i > 0 and word[i-1] == 'o' and word[i+1] == 'a'
            fonema << 'j'
          end
          #nada
        when 'i','í' then
          if isTonica(word,i)
            fonema << 'ii'
          else
            fonema << 'i'
          end
        when 'j' then
          if i == 0 and word[i+1] == 'o' and (word[i+2] == 'ã' or word[i+2] == 'h')
            fonema << 'll'
          else
            fonema << 'j'
          end
        when 'k' then
          fonema << 'k'
        when 'l' then
          fonema << 'l'
        when 'll' then
          #fonema << ['ll','lli','i']
          fonema << ['ll','i']
        when 'm' then
          fonema << 'm'
        when 'n' then
          fonema << 'n'
        when 'ñ'  then
          fonema << 'nh'
        when 'o','ó' then
          if isTonica(word,i)
            fonema << 'oo'
          else
            fonema << 'o'
          end
        when 'p' then
          fonema << 'p'
        when 'q' then
          fonema << 'k'
        when 'r' then
          if i == 0
            fonema << 'rr'
          else
            fonema << 'r'
          end
        when 'rr' then
          fonema << 'rr'
        when 's' then
          if word[i-1] == 'r' or word[i-1] == 'd' or i == word.size-1
            #fonema << ['s','','h']
            fonema << ['s','']
          elsif entreVocalyConsonante(word,i)
            #fonema << ['s','h']
            fonema << 's'
          elsif word[i-1] == 'b' and word[i+1] == 't'
            #fonema << ['s','h']
            fonema << 's'
          elsif word[i-1] == 'b'
            fonema << ['s','']
          else
            fonema << 's'
          end
        when 't' then
          fonema << 't'
        when 'ü' then
          fonema << 'u'
        when 'u','ú' then
          if word[i-1] == 'q'
            #nada
          elsif word[i-1] == 'g' and i == 1 and isTonica(word,i+1)
              fonema << ['gu']
          elsif isTonica(word,i)
              fonema << 'uu'
          else
              fonema << 'u'
          end
        when 'w' then
          if i == 0
            fonema << ['b','bb']
          elsif word[i-1] == 'o'
            fonema << 'u'
          elsif word[i+1] == 'i'
            fonema << 'u'
          else #if entreVocales(word,i)
            fonema << 'gu'
#          else
#            fonema << 'Gu'
          end
        when 'x' then
          #fonema << ['ks','k','h']
          fonema << ['ks','k']
        when 'y' then
          if i == word.size - 1
            fonema << 'i'
          else
            #fonema << ['ll','lli','i']
            fonema << 'll'
          end
        when 'z' then
          if i == word.size - 1
            #fonema << ['s','h','']
            fonema << ['s','']
          else
            fonema << 's'
          end

        else
          raise "error, no conozco pronunciación de #{letra} en #{word}"
      end


    end
    #puts "pre: #{fonema}"
    t =  normalize(generateFonemas(fonema))
    #puts "out: #{t}"

    #self.checkFonemas(t)

    return t
  end

  #def self.checkFonemas(p)
  #  #un ultimo chequeo de seguridad
  #  for pronunciacion in p
  #    for fonema in pronunciacion.split(" ")
  #      raise "fonema invalido" unless lista_de_fonemas.include? fonema
  #    end
  #  end
  #
  #end

  def self.generateFonemas(fonema,i=0,current=[])
      if i == fonema.length
        return current.join(' ')
      end

      c = fonema[i]
      if c.class.name == 'Array'
        output = []
        for j in c
          if j == ''
            output << generateFonemas(fonema,i+1,current)
          else
            output << generateFonemas(fonema,i+1,current + [j])
          end
        end
        return output
      else
        if c == ''
          return generateFonemas(fonema,i+1,current)
        else
          return generateFonemas(fonema,i+1,current + [c])
        end
      end

  end


  def self.normalize(t)
    @output = []
    anormalize(t)
    return @output
  end

  def self.anormalize(t)
    #puts "pre normalize: #{t}"
    if t.class.name == 'Array'
      for i in t
        anormalize(i)
      end
    else
      #puts "found #{t}"
      @output << t
    end

  end

  def self.lista_de_fonemas
    phonelist = ['SIL']
    phonelist += %w{a e i o u aa ee ii oo uu}
    phonelist += %w{bb b d e f g i j k l m n o p rr r s t u ks k gu ch dd gg ll nh}
    phonelist.uniq
  end


end