require 'helper'

require 'shoulda'

$log = false

class TestFuzzyMatchConvoluted < MiniTest::Spec
  def setup
    clear_ltd
    
    # dh 8 400
    @a_needle = ['DE HAVILLAND CANADA DHC8400 Dash 8']
    @a_haystack = ['DEHAVILLAND DEHAVILLAND DHC8-400 DASH-8']
    # dh 88
    @b_needle = ['ABCDEFG DH88 HIJKLMNOP']
    # dh 89
    @c_haystack = ['ABCDEFG DH89 HIJKLMNOP']
    # dh 8 200
    @d_needle = ['DE HAVILLAND CANADA DHC8200 Dash 8']
    @d_haystack = ['BOMBARDIER DEHAVILLAND DHC8-200Q DASH-8']
    @d_lookalike = ['ABCD DHC8200 Dash 8']
    
    @t_1 = [ '/(dh)c?-?(\d{0,2})-?(\d{0,4})(?:.*?)(dash|\z)/i', 'good tightening for de havilland' ]
    
    @r_1 = [ '/(dh)c?-?(\d{0,2})-?(\d{0,4})(?:.*?)(dash|\z)/i', 'good identity for de havilland' ]
    
    @needle = [
      @a_needle,
      @b_needle,
      ['DE HAVILLAND DH89 Dragon Rapide'],
      ['DE HAVILLAND CANADA DHC8100 Dash 8 (E9, CT142, CC142)'],
      @d_needle,
      ['DE HAVILLAND CANADA DHC8300 Dash 8'],
      ['DE HAVILLAND DH90 Dragonfly']
    ]
    @haystack = [
      @a_haystack,
      @c_haystack,
      @d_haystack,
      ['DEHAVILLAND DEHAVILLAND DHC8-100 DASH-8'],
      ['DEHAVILLAND DEHAVILLAND TWIN OTTER DHC-6']
    ]
    @tightenings = []
    @identities = []
    @blockings = []
    @positives = []
    @negatives = []
  end

  def clear_ltd
    @_ltd = nil
  end
  
  def ltd
    @_ltd ||= FuzzyMatch.new  @haystack,
                                        :tightenings => @tightenings,
                                        :identities => @identities,
                                        :blockings => @blockings,
                                        :positives => @positives,
                                        :negatives => @negatives,
                                        :blocking_only => @blocking_only,
                                        :log => $log
  end

  should "optionally only pay attention to things that match blockings" do
    assert_equal @a_haystack, ltd.improver.match(@a_needle)

    clear_ltd
    @blocking_only = true
    assert_equal nil, ltd.improver.match(@a_needle)

    clear_ltd
    @blocking_only = true
    @blockings.push ['/dash/i']
    assert_equal @a_haystack, ltd.improver.match(@a_needle)
  end
  
  # the example from the readme, considerably uglier here
  should "check a simple table" do
    @haystack = [ 'seamus', 'andy', 'ben' ]
    @positives = [ [ 'seamus', 'Mr. Seamus Abshere' ] ]
    needle = [ 'Mr. Seamus Abshere', 'Sr. Andy Rossmeissl', 'Master BenT' ]
  
    assert_nothing_raised do
      ltd.improver.check needle
    end
  end
  
  should "treat a String as a full record if passed through" do
    dash = 'DHC8-400'
    b747 = 'B747200/300'
    dc9 = 'DC-9-10'
    haystack_records = [ dash, b747, dc9 ]
    simple_ltd = FuzzyMatch.new haystack_records, :log => $log
    assert_equal dash, simple_ltd.improver.match('DeHavilland Dash-8 DHC-400')
    assert_equal b747, simple_ltd.improver.match('Boeing 747-300')
    assert_equal dc9, simple_ltd.improver.match('McDonnell Douglas MD81/DC-9')
  end
  
  should "call it a mismatch if you hit a blank positive" do
    @positives.push [@a_needle[0], '']
    assert_raises(FuzzyMatch::Improver::Mismatch) do
      ltd.improver.match @a_needle
    end
  end

  should "call it a false positive if you hit a blank negative" do
    @negatives.push [@a_needle[0], '']
    assert_raises(FuzzyMatch::Improver::FalsePositive) do
      ltd.improver.match @a_needle
    end
  end
  
  should "have a false match without blocking" do
    # @d_needle will be our victim
    @haystack.push @d_lookalike
    @tightenings.push @t_1
    
    assert_equal @d_lookalike, ltd.improver.match(@d_needle)
  end
  
  should "do blocking if the needle matches a block" do
    # @d_needle will be our victim
    @haystack.push @d_lookalike
    @tightenings.push @t_1
    @blockings.push ['/(bombardier|de ?havilland)/i']
    
    assert_equal @d_haystack, ltd.improver.match(@d_needle)
  end
  
  should "treat blocks as exclusive" do
    @haystack = [ @d_needle ]
    @tightenings.push @t_1
    @blockings.push ['/(bombardier|de ?havilland)/i']

    assert_equal nil, ltd.improver.match(@d_lookalike)
  end
  
  should "only use identities if they stem from the same regexp" do
    @identities.push @r_1
    @identities.push [ '/(cessna)(?:.*?)(citation)/i' ]
    @identities.push [ '/(cessna)(?:.*?)(\d\d\d)/i' ]
    x_needle = [ 'CESSNA D-333 CITATION V']
    x_haystack = [ 'CESSNA D-333' ]
    @haystack.push x_haystack
    
    assert_equal x_haystack, ltd.improver.match(x_needle)
  end
  
  should "use the best score from all of the tightenings" do
    x_needle = ["BOEING 737100"]
    x_haystack = ["BOEING BOEING 737-100/200"]
    x_haystack_wrong = ["BOEING BOEING 737-900"]
    @haystack.push x_haystack
    @haystack.push x_haystack_wrong
    @tightenings.push ['/(7\d)(7|0)-?\d{1,3}\/(\d\d\d)/i']
    @tightenings.push ['/(7\d)(7|0)-?(\d{1,3}|[A-Z]{0,3})/i']
    
    assert_equal x_haystack, ltd.improver.match(x_needle)
  end
  
  should "compare using prefixes if tightened key is shorter than correct match" do
    x_needle = ["BOEING 720"]
    x_haystack = ["BOEING BOEING 720-000"]
    x_haystack_wrong = ["BOEING BOEING 717-200"]
    @haystack.push x_haystack
    @haystack.push x_haystack_wrong
    @tightenings.push @t_1
    @tightenings.push ['/(7\d)(7|0)-?\d{1,3}\/(\d\d\d)/i']
    @tightenings.push ['/(7\d)(7|0)-?(\d{1,3}|[A-Z]{0,3})/i']
    
    assert_equal x_haystack, ltd.improver.match(x_needle)
  end
  
  should "use the shortest original input" do
    x_needle = ['De Havilland DHC8-777 Dash-8 Superstar']
    x_haystack = ['DEHAVILLAND DEHAVILLAND DHC8-777 DASH-8 Superstar']
    x_haystack_long = ['DEHAVILLAND DEHAVILLAND DHC8-777 DASH-8 Superstar/Supernova']
    
    @haystack.push x_haystack_long
    @haystack.push x_haystack
    @tightenings.push @t_1
    
    assert_equal x_haystack, ltd.improver.match(x_needle)
  end
  
  should "perform lookups needle to haystack" do
    assert_equal @a_haystack, ltd.improver.match(@a_needle)
  end

  should "succeed if there are no checks" do
    assert_nothing_raised do
      ltd.improver.check @needle
    end
  end

  should "succeed if the positive checks just work" do
    @positives.push [ @a_needle[0], @a_haystack[0] ]
  
    assert_nothing_raised do
      ltd.improver.check @needle
    end
  end

  should "fail if positive checks don't work" do
    @positives.push [ @d_needle[0], @d_haystack[0] ]

    assert_raises(FuzzyMatch::Improver::Mismatch) do
      ltd.improver.check @needle
    end
  end

  should "succeed if proper tightening is applied" do
    @positives.push [ @d_needle[0], @d_haystack[0] ]
    @tightenings.push @t_1

    assert_nothing_raised do
      ltd.improver.check @needle
    end
  end

  should "use a Google Docs spreadsheet as a source of tightenings" do
    @positives.push [ @d_needle[0], @d_haystack[0] ]
    @tightenings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false
  
    # sabshere 9/30/10 this shouldn't raise anything
    # but the tightenings have been changed... we should be using test-only tightenings, not production ones
    # assert_nothing_raised do
    assert_raises(FuzzyMatch::Improver::Mismatch) do
      ltd.improver.check @needle
    end
  end
  
  should "fail if negative checks don't work" do
    @negatives.push [ @b_needle[0], @c_haystack[0] ]
  
    assert_raises(FuzzyMatch::Improver::FalsePositive) do
      ltd.improver.check @needle
    end
  end
  
  should "do inline checking" do
    @negatives.push [ @b_needle[0], @c_haystack[0] ]
  
    assert_raises(FuzzyMatch::Improver::FalsePositive) do
      ltd.improver.match @b_needle
    end
  end

  should "fail if negative checks don't work, even with tightening" do
    @negatives.push [ @b_needle[0], @c_haystack[0] ]
    @tightenings.push @t_1
  
    assert_raises(FuzzyMatch::Improver::FalsePositive) do
      ltd.improver.check @needle
    end
  end

  should "succeed if proper identity is applied" do
    @negatives.push [ @b_needle[0], @c_haystack[0] ]
    @positives.push [ @d_needle[0], @d_haystack[0] ]
    @identities.push @r_1
  
    assert_nothing_raised do
      ltd.improver.check @needle
    end
  end
end