# encoding: UTF-8
$:.unshift(File.dirname(__FILE__)) unless
   $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
# $:.unshift('taxamatch_rb')
require 'taxamatch_rb/damerau_levenshtein_mod'
require 'taxamatch_rb/parser'
require 'taxamatch_rb/normalizer'
require 'taxamatch_rb/phonetizer'
require 'taxamatch_rb/authmatch'

$KCODE='u' if RUBY_VERSION.split('.')[1].to_i < 9  

module Taxamatch

  class Base
  
    def initialize
      @parser = Taxamatch::Parser.new
      @dlm = Taxamatch::DamerauLevenshteinMod.new
    end
   
   
    #takes two scientific names and returns true if names match and false if they don't
    def taxamatch(str1, str2) 
      preparsed_1 = @parser.parse(str1)
      preparsed_2 = @parser.parse(str2)
      taxamatch_preparsed(preparsed_1, preparsed_2)['match']
    end
  
    #takes two hashes of parsed scientific names, analyses them and returns back 
    #this function is useful when species strings are preparsed.
    def taxamatch_preparsed(preparsed_1, preparsed_2)
      result = nil
      result =  match_uninomial(preparsed_1, preparsed_2) if preparsed_1[:uninomial] && preparsed_2[:uninomial] 
      result =  match_multinomial(preparsed_1, preparsed_2) if preparsed_1[:genus] && preparsed_2[:genus]
      if result && result['match']
        result['match'] = false if match_authors(preparsed_1, preparsed_2) == 0 
      end
      return result
    end
  
    def match_uninomial(preparsed_1, preparsed_2)
      return false
    end

    def match_multinomial(preparsed_1, preparsed_2)
      gen_match = match_genera(preparsed_1[:genus], preparsed_2[:genus])
      sp_match = match_species(preparsed_1[:species], preparsed_2[:species])
      au_match = match_authors(preparsed_1, preparsed_2)
      total_length = preparsed_1[:genus][:epitheton].size + preparsed_2[:genus][:epitheton].size + preparsed_1[:species][:epitheton].size + preparsed_2[:species][:epitheton].size
      match = match_matches(gen_match, sp_match)
      match.merge({'score' => (1- match['edit_distance']/(total_length/2))})
    end
  
    def match_genera(genus1, genus2)
      genus1_length = genus1[:normalized].size
      genus2_length = genus2[:normalized].size
      match = false
      ed = @dlm.distance(genus1[:normalized], genus2[:normalized],2,3)
      return {'edit_distance' => ed, 'phonetic_match' => true, 'match' => true} if genus1[:phonetized] == genus2[:phonetized] 
    
      match = true if ed <= 3 && ([genus1_length, genus2_length].min > ed * 2) && (ed < 2 || genus1[0] == genus2[0])
      {'edit_distance' => ed, 'match' => match, 'phonetic_match' => false} 
    end

    def match_species(sp1, sp2)
      sp1_length = sp1[:normalized].size
      sp2_length = sp2[:normalized].size
      sp1[:phonetized] = Taxamatch::Phonetizer.normalize_ending sp1[:phonetized]
      sp2[:phonetized] = Taxamatch::Phonetizer.normalize_ending sp2[:phonetized]
      match = false
      ed = @dlm.distance(sp1[:normalized], sp2[:normalized], 4, 4)
      return {'edit_distance' => ed, 'phonetic_match' => true, 'match' => true} if sp1[:phonetized] == sp2[:phonetized]
    
      match = true if ed <= 4 && ([sp1_length, sp2_length].min >= ed * 2) && (ed < 2 || sp1[:normalized][0] == sp2[:normalized][0]) && (ed < 4 || sp1[:normalized][0...3] == sp2[:normalized][0...3])
      { 'edit_distance' => ed, 'match' => match, 'phonetic_match' => false}
    end
  
    def match_authors(preparsed_1, preparsed_2)
      au1 = preparsed_1[:all_authors]
      au2 = preparsed_2[:all_authors]
      yr1 = preparsed_1[:all_years]
      yr2 = preparsed_2[:all_years]
      Taxamatch::Authmatch.authmatch(au1, au2, yr1, yr2)
    end
  
    def match_matches(genus_match, species_match, infraspecies_matches = []) 
      match = species_match
      match['edit_distance'] += genus_match['edit_distance']
      match['match'] = false if match['edit_distance'] > 4
      match['match'] &&= genus_match['match']
      match['phonetic_match'] &&= genus_match['phonetic_match']
      match
    end

  end

end