# -*- encoding: utf-8 -*-
require 'helper'

class TestFuzzyMatch < MiniTest::Spec
  it %{identify the best match based on string similarity} do
    d = FuzzyMatch.new %w{ RATZ CATZ }
    d.find('RITZ').must_equal 'RATZ'
    d.find('RíTZ').must_equal 'RATZ'

    d = FuzzyMatch.new [ 'X' ]
    d.find('X').must_equal 'X'
    d.find('A').must_be_nil
  end

  it %{not gather metadata about the last result by default} do
    d = FuzzyMatch.new %w{ NISSAN HONDA }
    d.find('MISSAM')
    lambda do
      d.last_result
    end.must_raise ::RuntimeError, /gather_last_result/
  end

  it %{optionally gather metadata about the last result} do
    d = FuzzyMatch.new %w{ NISSAN HONDA }
    d.find 'MISSAM', :gather_last_result => true
    d.last_result.score.must_equal 0.6
    d.last_result.winner.must_equal 'NISSAN'
  end

  it %{use NORMALIZERS} do
    d = FuzzyMatch.new ['BOEING 737-100/200', 'BOEING 737-900']
    d.find('BOEING 737100 number 900').must_equal 'BOEING 737-900'

    normalizers = [
      %r{(7\d)(7|0)-?(\d{1,3})} # tighten 737-100/200 => 737100, which will cause it to win over 737-900
    ]
    d = FuzzyMatch.new ['BOEING 737-100/200', 'BOEING 737-900'], :normalizers => normalizers
    d.find('BOEING 737100 number 900').must_equal 'BOEING 737-100/200'
  end

  it %{use IDENTITIES} do
    # false positive without identity
    d = FuzzyMatch.new %w{ foo bar }
    d.find('baz').must_equal 'bar'

    d = FuzzyMatch.new %w{ foo bar }, :identities => [ /ba(.)/ ]
    d.find('baz').must_be_nil
  end

  # TODO this is not very helpful
  it %{use BLOCKINGS} do
    d = FuzzyMatch.new [ 'X' ], :blockings => [ /X/, /Y/ ]
    d.find('X').must_equal 'X'
    d.find('A').must_be_nil
  end

  # TODO this is not very helpful
  it %{optionally only attempt matches with records that fit into a blocking} do
    d = FuzzyMatch.new [ 'X' ], :blockings => [ /X/, /Y/ ], :must_match_blocking => true
    d.find('X').must_equal 'X'
    d.find('A').must_be_nil

    d = FuzzyMatch.new [ 'X' ], :blockings => [ /X/, /Y/ ]
    d.find('X', :must_match_blocking => true).must_equal 'X'
    d.find('A', :must_match_blocking => true).must_be_nil
  end

  it %{receive the deprecated FuzzyMatch#free method without complaint} do
    d = FuzzyMatch.new %w{ A B }
    d.free
    d.find('A').wont_be_nil
  end

  it %{return all records in sorted order} do
    d = FuzzyMatch.new [ 'X', 'X22', 'Y', 'Y4' ], :blockings => [ /X/, /Y/ ], :must_match_blocking => true
    d.find_all('X').must_equal ['X', 'X22' ]
    d.find_all('A').must_equal []
  end

  it %{optionally force the first blocking to decide} do
    d = FuzzyMatch.new [ 'Boeing 747', 'Boeing 747SR', 'Boeing ER6' ], :blockings => [ /(boeing \d{3})/i, /boeing/i ]
    d.find_all('Boeing 747').must_equal [ 'Boeing 747', 'Boeing 747SR', 'Boeing ER6' ]

    d = FuzzyMatch.new [ 'Boeing 747', 'Boeing 747SR', 'Boeing ER6' ], :blockings => [ /(boeing \d{3})/i, /boeing/i ], :first_blocking_decides => true
    d.find_all('Boeing 747').must_equal [ 'Boeing 747', 'Boeing 747SR' ]

    # first_blocking_decides refers to the needle
    d = FuzzyMatch.new [ 'Boeing 747', 'Boeing 747SR', 'Boeing ER6' ], :blockings => [ /(boeing \d{3})/i, /boeing/i ], :first_blocking_decides => true
    d.find_all('Boeing ER6').must_equal ["Boeing ER6", "Boeing 747", "Boeing 747SR"]

    d = FuzzyMatch.new [ 'Boeing 747', 'Boeing 747SR', 'Boeing ER6' ], :blockings => [ /(boeing \d{3})/i, /boeing (7|E)/i, /boeing/i ], :first_blocking_decides => true
    d.find_all('Boeing ER6').must_equal [ 'Boeing ER6' ]

    # or equivalently with an identity
    d = FuzzyMatch.new [ 'Boeing 747', 'Boeing 747SR', 'Boeing ER6' ], :blockings => [ /(boeing \d{3})/i, /boeing/i ], :first_blocking_decides => true, :identities => [ /boeing (7|E)/i ]
    d.find_all('Boeing ER6').must_equal [ 'Boeing ER6' ]
  end

  describe "the :read option" do
    it %{interpret a Numeric as an array index} do
      ab = ['a', 'b']
      ba = ['b', 'a']
      haystack = [ab, ba]
      by_first = FuzzyMatch.new haystack, :read => 0
      by_last = FuzzyMatch.new haystack, :read => 1
      by_first.find('a').must_equal ab
      by_last.find('b').must_equal ab
      by_first.find('b').must_equal ba
      by_last.find('a').must_equal ba
    end

    it %{interpret a Symbol, etc. as hash key} do
      ab = { :one => 'a', :two => 'b' }
      ba = { :one => 'b', :two => 'a' }
      haystack = [ab, ba]
      by_first = FuzzyMatch.new haystack, :read => :one
      by_last = FuzzyMatch.new haystack, :read => :two
      by_first.find('a').must_equal ab
      by_last.find('b').must_equal ab
      by_first.find('b').must_equal ba
      by_last.find('a').must_equal ba
    end

    MyStruct = Struct.new(:one, :two)
    it %{interpret a Symbol as a method id (if the object responds to it)} do
      ab = MyStruct.new('a', 'b')
      ba = MyStruct.new('b', 'a')
      haystack = [ab, ba]
      by_first = FuzzyMatch.new haystack, :read => :one
      by_last = FuzzyMatch.new haystack, :read => :two
      by_first.read.must_equal :one
      by_last.read.must_equal :two
      by_first.find('a').must_equal ab
      by_last.find('b').must_equal ab
      by_first.find('b').must_equal ba
      by_last.find('a').must_equal ba
    end

    it %{treat the deprecrated :haystack_reader option as an alias} do
      ab = ['a', 'b']
      ba = ['b', 'a']
      haystack = [ab, ba]
      by_first = FuzzyMatch.new haystack, :haystack_reader => 0
      by_first.find('a').must_equal ab
      by_first.find('b').must_equal ba
    end
  end

  it %{not return any result if the maximum score is zero} do
    FuzzyMatch.new(['a']).find('b').must_be_nil
  end

  it %{optionally require that the matching record share at least one word with the needle} do
    d = FuzzyMatch.new %w{ RATZ CATZ }, :must_match_at_least_one_word => true
    d.find('RITZ').must_be_nil

    d = FuzzyMatch.new ["Foo's Bar"], :must_match_at_least_one_word => true
    d.find("Foo's").must_equal "Foo's Bar"
    d.find("'s").must_be_nil
    d.find("Foo").must_be_nil
    
    d = FuzzyMatch.new ["Bolivia, Plurinational State of"], :must_match_at_least_one_word => true
    d.find("Bolivia").must_equal "Bolivia, Plurinational State of"
  end

  it %{use STOP WORDS} do
    d = FuzzyMatch.new [ 'A HOTEL', 'B HTL' ]
    d.find('A HTL', :must_match_at_least_one_word => true).must_equal 'B HTL'

    d = FuzzyMatch.new [ 'A HOTEL', 'B HTL' ], :must_match_at_least_one_word => true
    d.find('A HTL').must_equal 'B HTL'

    d = FuzzyMatch.new [ 'A HOTEL', 'B HTL' ], :must_match_at_least_one_word => true, :stop_words => [ %r{HO?TE?L} ]
    d.find('A HTL').must_equal 'A HOTEL'
  end

  it %{print a basic explanation to stdout} do
    require 'stringio'
    capture = StringIO.new
    begin
      old_stdout = $stdout
      $stdout = capture
      d = FuzzyMatch.new %w{ RATZ CATZ }
      d.explain('RITZ')
    ensure
      $stdout = old_stdout
    end
    capture.rewind
    capture.read.must_include 'CATZ'
  end

  it %{not be fooled by substrings (but rather compare whole words to whole words)} do
    d = FuzzyMatch.new [ 'PENINSULA HOTELS' ], :must_match_at_least_one_word => true
    d.find('DOLCE LA HULPE BXL FI').must_be_nil
  end

  it %{not be case-sensitive when checking for sharing of words} do
    d = FuzzyMatch.new [ 'A', 'B' ]
    d.find('a', :must_match_at_least_one_word => true).must_equal 'A'
  end

  it %{defaults to a pure-ruby engine, but also has amatch} do
    if defined?($testing_amatch) and $testing_amatch
      FuzzyMatch.engine.must_equal :amatch
    else
      FuzzyMatch.engine.must_equal :pure_ruby
    end
  end
end