Sha256: 1f90dcd9d2b4a72352adb27a762c58a121e0f095b054facbb0aadae62f204161

Contents?: true

Size: 1.35 KB

Versions: 1

Compression:

Stored size: 1.35 KB

Contents

module Normalizer
  ARABIC_KAF = "\u0643".freeze # ك
  FARSI_KEHEH = "\u06a9".freeze # ک

  ARABIC_YEH = "\u064a".freeze # ي
  ARABIC_ALEF_MAKSOURA = "\u0649".freeze # ى
  FARSI_YEH = "\u06cc".freeze # ی

  ALEF_MADDA = "\u0622".freeze # آ
  ALEF_WITH_HAMZA_BELOW = "\u0625".freeze # إ
  ALEF_WITH_HAMZA_ABOVE = "\u0623".freeze # أ
  ALEF = "\u0627".freeze # ا

  TATWIL = "\u0640".freeze # ـ

  FATHATAN = "\u064b".freeze
  DAMMATAN = "\u064c".freeze
  KASRATAN = "\u064d".freeze
  FATHA = "\u064e".freeze
  DAMMA = "\u064f".freeze
  KASRA = "\u0650".freeze
  SHADDA = "\u0651".freeze
  SUKUN = "\u0652".freeze

  CHARACTERS_MAPPINGS = {
    ARABIC_KAF => FARSI_KEHEH,
    ARABIC_YEH => FARSI_YEH,
    ARABIC_ALEF_MAKSOURA => FARSI_YEH,
    ALEF_MADDA => ALEF,
    ALEF_WITH_HAMZA_BELOW => ALEF,
    ALEF_WITH_HAMZA_ABOVE => ALEF,
    TATWIL => ''
  }.freeze

  DIACRITICS = [
    FATHATAN,
    DAMMATAN,
    KASRATAN,
    FATHA,
    DAMMA,
    KASRA,
    SHADDA,
    SUKUN
  ].freeze

  def normalize
    map_charachters
    remove_diacritics
    word
  end

  private

  def map_charachters
    rules = filter_rules(CHARACTERS_MAPPINGS)
    return if rules.empty?

    @word = word.gsub(/[#{rules.keys.join}]/, rules)
  end

  def remove_diacritics
    rules = filter_rules(DIACRITICS)
    return if rules.empty?

    @word = word.gsub(/[#{rules.join}]/, '')
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
farsi_processor-0.2.1 lib/normalizer.rb