# frozen_string_literal: true

require_relative 'grc/version'

# Methods for working with ancient greek in ruby
module Grc
  class Error < StandardError; end

  @std_error = 'ERROR: String does not contain any greek. Summon the muse and try again.'

  # General methods

  # `grc?` (str → bool)
  # Returns true if the string contains greek characters.
  def grc?
    !scan(/(\p{Greek})/).empty?
  end

  # `tokenize` (str → array)
  # Returns an array of tokens from the string.
  def tokenize
    gsub(/([[:punct:]]|·|·|‧|⸱|𐄁|\.|;|;)/, ' \1').split
  end

  # `transliterate` (str → str)
  # Returns a string with greek characters replaced with their transliteration.
  def transliterate
    return @std_error unless grc?

    result = []
    str = self
    str.tokenize do |token|
      result << if token.grc?
                  the_word = token.gsub(/ῥ/, 'rh')
                  the_word = the_word =~ /[ἁἅᾅἃᾃἇᾇᾁἑἕἓἡἥᾕἣᾓἧᾗᾑἱἵἳἷὁὅὃὑὕὓὗὡὥᾥὣᾣὧᾧᾡ]/ ? "h#{the_word.no_diacritics}" : the_word.no_diacritics
                  hash.each { |k, v| the_word = the_word.gsub(/#{k}/, v) }
                  the_word
                else
                  word
                end
    end
    result.join(' ')
  end

  # Unicode Inspection Methods

  # `unicode_points` (str → array)
  # Returns an array of unicode points from the string.
  def unicode_points
    unpack('U*').map { |i| "\\u#{i.to_s(16).rjust(4, "0").upcase}" }
  end

  # `hash_dump`: (str → hash)
  # Returns a hash of the string's unicode points (Char: Unicode_points).
  def hash_dump
    hash = {}
    each_char do |character|
      hash[character] = character.dump
    end
    hash
  end

  # `unicode_name` (str → array)
  # Returns an array of unicode names from the string.
  def unicode_name
    require 'unicode/name'
    each_char.map { |character| Unicode::Name.of character }
  end

  # Unicode Normalization

  # `nfd` (str → str)
  # Returns a string with the canonical decomposition of the string.
  def nfd
    unicode_normalize(:nfd)
  end

  # `nfc` (str → str)
  # Returns a string with the canonical composition of the string.
  def nfc
    unicode_normalize(:nfc)
  end

  # Case folding

  # `grc_downcase` (str → str)
  # Returns the lowercase version of string for greek characters resolving confusable characters.
  # See https://www.w3.org/TR/charmod-norm/#PreNormalization
  def grc_downcase
    nfd.downcase.nfc
  end

  # `grc_upcase` (str → str)
  # Default `upcase` methods strips diacritical marks from greek characters.
  # This method returns the corresponding uppercase version of string for greek characters preserving diacritical marks.
  # See pages 1-7 of http://www.tlg.uci.edu/encoding/precomposed.pdf
  # https://icu.unicode.org/design/case/greek-upper
  def grc_upcase
    case_map = {
      ᾀ: 'ᾈ',
      ᾁ: 'ᾉ',
      ᾂ: 'ᾊ',
      ᾃ: 'ᾋ',
      ᾄ: 'ᾌ',
      ᾅ: 'ᾍ',
      ᾆ: 'ᾎ',
      ᾇ: 'ᾏ',
      ᾐ: 'ᾘ',
      ᾑ: 'ᾙ',
      ᾒ: 'ᾚ',
      ᾓ: 'ᾛ',
      ᾔ: 'ᾜ',
      ᾕ: 'ᾝ',
      ᾖ: 'ᾞ',
      ᾗ: 'ᾟ',
      ᾠ: 'ᾨ',
      ᾡ: 'ᾩ',
      ᾢ: 'ᾪ',
      ᾣ: 'ᾫ',
      ᾤ: 'ᾬ',
      ᾥ: 'ᾭ',
      ᾦ: 'ᾮ',
      ᾧ: 'ᾯ',
      ᾳ: 'ᾼ',
      ῃ: 'ῌ',
      ῳ: 'ῼ'
    }

    nfc.each_char.map do |char|
      if char.grc?
        case_map[:"#{char}"] || char.upcase
      else
        char
      end
    end.join
  end

  # Diacritical marks

  # `no_downcase_diacritics` (str → str)
  # Returns a string with the diacritics removed from lowercase characters.
  def no_downcase_diacritics
    return @std_error unless grc?

    each_char.map do |char| # Loop through each character
      if char.grc? && char.lower? # If character is greek and lowercase
        char.nfd.gsub(/\p{Mn}/, '').nfc # decompose, remove non-spacing markers (diacritics), recompose and return
      else # else
        char # return char
      end
    end.join # end char loop
  end

  # `no_upcase_diacritics` (str → str)
  # Returns a string with the diacritics removed from uppercase characters.
  def no_upcase_diacritics
    return @std_error unless grc?

    each_char.map do |char| # Loop through each character
      if char.grc? && char.upper? # If character is greek and uppercase
        char.nfd.gsub(/\p{Mn}/, '').nfc # Decompose, remove non-spacing markers (diacritics), recompose and return
      else # else
        char # Return char
      end
    end.join
  end

  # `no_diacritics` (str → str)
  # Returns a string with the diacritics removed.
  def no_diacritics
    return @std_error unless grc?

    no_downcase_diacritics.no_upcase_diacritics
  end

  # Accents

  # `to_grave` (str → str)
  # Returns a string with the grave replacing the acute accent.
  def to_grave
    return @std_error unless grc?

    # Simple transform method with grave to acute mapping
    tr('ἄᾄἅᾅάάᾴἔἕέέἤᾔἥᾕήήῄἴἵίίΐὄὅόόὔὕύύΰΰὤᾤὥᾥώῴ',
       'ἂᾂἃᾃὰὰᾲἒἓὲὲἢᾒἣᾓὴὴῂἲἳὶὶῒὂὃὸὸὒὓὺὺῢῢὢᾢὣᾣὼῲ')
  end

  # `to_acute` (str → str)
  # Returns a string with the acute replacing the grave accent.
  def to_acute
    return @std_error unless grc?

    # Simple transform method with acute to grave mapping
    tr('ἂᾂἃᾃὰὰᾲἒἓὲὲἢᾒἣᾓὴὴῂἲἳὶὶῒὂὃὸὸὒὓὺὺῢῢὢᾢὣᾣὼῲ',
       'ἄᾄἅᾅάάᾴἔἕέέἤᾔἥᾕήήῄἴἵίίΐὄὅόόὔὕύύΰΰὤᾤὥᾥώῴ')
  end

  # `to_oxia` (str → str)
  # Returns a string with the oxia replacing the tonos.
  def to_oxia
    return @std_error unless grc?

    tr('άΆέΈήΉίΊΐόΌύΎΰώΏ',
       'άΆέΈήΉίΊΐόΌύΎΰώΏ')
  end

  # `to_tonos` (str → str)
  # Returns a string with the tonos replacing the oxia.
  # See page 9 of http://www.tlg.uci.edu/encoding/precomposed.pdf
  def to_tonos
    return @std_error unless grc?

    tr('άΆέΈήΉίΊΐόΌύΎΰώΏ',
       'άΆέΈήΉίΊΐόΌύΎΰώΏ')
  end

  def upper?
    !!match(/\p{Upper}/)
  end

  def lower?
    !!match(/\p{Lower}/)
  end

  def oxia_to_tonos
    return @std_error unless grc?

    to_tonos
  end

  def tonos_to_oxia
    return @std_error unless grc?

    to_oxia
  end

  def grave_to_acute
    return @std_error unless grc?

    to_acute
  end

  def acute_to_grave
    return @std_error unless grc?

    to_grave
  end

end


class String
  include Grc
end