test/FuzzyStringMatcher_spec.rb in perobs-4.2.0 vs test/FuzzyStringMatcher_spec.rb in perobs-4.3.0

- old
+ new

@@ -27,17 +27,29 @@ require 'perobs/Store' require 'perobs/FuzzyStringMatcher' module PEROBS + class WordRef < PEROBS::Object + + attr_persist :word, :line + + def initialize(store, word, line) + super(store) + self.word = word + self.line = line + end + + end + describe FuzzyStringMatcher do before(:all) do @db_name = generate_db_name(__FILE__) @store = PEROBS::Store.new(@db_name) - @fsm = FuzzyStringMatcher.new(@store, 'test') - @fsm2 = FuzzyStringMatcher.new(@store, 'test', true, 2) + @store['fsm'] = @fsm = @store.new(FuzzyStringMatcher) + @store['fsm2'] = @fsm2 = @store.new(FuzzyStringMatcher, true, 2) end after(:all) do @store.delete_store end @@ -101,10 +113,48 @@ it 'should not find an unknown match' do expect(@fsm.best_matches('foobar')).to eql([]) end + it 'should find a match' do + dut = { + [ 'one' ] => [ [ 'one', 1.0 ] ], + [ 'three' ] => [ [ 'three', 1.0 ] ], + [ 'four' ]=> [ [ 'four', 1.0 ], [ 'fourteen', 0.666 ] ], + [ 'four', 1.0 ]=> [ [ 'four', 1.0 ] ], + [ 'even' ] => [ [ 'seven', 0.666 ], [ 'eleven', 0.666 ] ], + [ 'teen' ] => [ ['thirteen', 0.6666666666666666], + ['fourteen', 0.6666666666666666], + ['fifteen', 0.6666666666666666], + ['sixteen', 0.6666666666666666], + ['seventeen', 0.6666666666666666], + ['eighteen', 0.6666666666666666], + ['nineteen', 0.6666666666666666] ], + [ 'aight' ] => [ [ 'eight', 0.5 ] ], + [ 'thirdteen' ] => [ [ 'thirteen', 0.5 ] ], + [ 'shirt teen', 0.3 ] => [ [ 'thirteen', 0.333 ] ] + } + check_data_under_test(@fsm, dut) + end + + it 'should sort best to worst matches' do + @fsm.clear + %w( xbar xfoox foor bar foobar barfoo foo rab baar fool xbarx + foobarx xfoobarx foo_bar ).each do |w| + @fsm.learn(w, w) + end + dut = { + [ 'foo' ] => [["foo", 1.0], ["foor", 0.5], ["foobar", 0.5], + ["fool", 0.5], ["foobarx", 0.5], ["foo_bar", 0.5], + ["barfoo", 0.5]], + [ 'bar' ] => [["bar", 1.0], ["barfoo", 0.5], ["xbar", 0.5], + ["foobar", 0.5], ["foo_bar", 0.5]], + [ 'foobar' ] => [["foobar", 1.0], ["foobarx", 0.8], ["xfoobarx", 0.6]] + } + check_data_under_test(@fsm, dut) + end + it 'should handle a larger text' do text =<<-EOT MIT License Permission is hereby granted, free of charge, to any person obtaining @@ -129,21 +179,61 @@ text.split.each do |word| @fsm2.learn(word, word) end stats = @fsm2.stats - expect(stats['dictionary_size']).to eql(363) + expect(stats['dictionary_size']).to eql(352) expect(stats['max_list_size']).to eql(22) - expect(stats['avg_list_size']).to be_within(0.001).of(2.366) + expect(stats['avg_list_size']).to be_within(0.001).of(2.409) end it 'should find case sensitive matches' do dut = { [ 'SOFTWARE', 0.5, 20 ] => [ [ 'SOFTWARE', 1.0 ], [ 'SOFTWARE.', 0.888 ] ], [ 'three', 0.5, 20 ] => [ [ 'the', 0.5 ], [ 'free', 0.5 ] ] } check_data_under_test(@fsm2, dut) + end + + it 'should support references to PEROBS objects' do + text =<<-EOT +MIT License + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: +EOT + + line_no = 1 + @store['fsm'] = fsm = @store.new(FuzzyStringMatcher) + @store['refs'] = refs = @store.new(Array) + text.each_line do |line| + line.split.each do |word| + ref = @store.new(WordRef, word, line_no) + refs << ref + fsm.learn(word, ref) + end + line_no += 1 + end + + found_lines = [] + fsm.best_matches('SOFTWARE').each do |match| + found_lines << match[0].line + end + expect(found_lines.sort).to eql([ 4, 5, 5, 7, 8 ]) + end + + it 'should with small search words' do + @fsm.clear + mats = 'Yukihiro Matsumoto' + @fsm.learn(mats) + expect(@fsm.best_matches('Yukihiro').first.first).to eql(mats) + expect(@fsm.best_matches('Mats', 0.3).first.first).to eql(mats) end def check_data_under_test(fsm, dut) dut.each do |inputs, reference| key = inputs[0]