spec/functional/automatic_segmentation_spec.rb in picky-4.31.0 vs spec/functional/automatic_segmentation_spec.rb in picky-4.31.1
- old
+ new
@@ -2,119 +2,125 @@
#
require 'spec_helper'
describe "automatic splitting" do
- let(:index) do
- index = Picky::Index.new :automatic_text_splitting do
- indexing removes_characters: /[^a-z\s]/i,
- stopwords: /\b(in|a)\b/
- category :text
- end
+ [false, true].each do |sym_keys|
+ context "symbol index? #{sym_keys}" do
+ let(:index) do
+ index = Picky::Index.new :automatic_text_splitting do
+ symbol_keys sym_keys
+
+ indexing removes_characters: /[^a-z\s]/i,
+ stopwords: /\b(in|a)\b/
+ category :text
+ end
- require 'ostruct'
- index.add OpenStruct.new(id: 1, text: 'It does rain in Spain. Purple is a new color. Bow to the king.')
- index.add OpenStruct.new(id: 2, text: 'Rainbow rainbow.')
- index.add OpenStruct.new(id: 3, text: 'Bow and arrow in Papua New Guinea.')
- index.add OpenStruct.new(id: 4, text: 'The color purple.')
- index.add OpenStruct.new(id: 5, text: 'Sun and rain.')
- index.add OpenStruct.new(id: 6, text: 'The king is in Spain.')
+ require 'ostruct'
+ index.add OpenStruct.new(id: 1, text: 'It does rain in Spain. Purple is a new color. Bow to the king.')
+ index.add OpenStruct.new(id: 2, text: 'Rainbow rainbow.')
+ index.add OpenStruct.new(id: 3, text: 'Bow and arrow in Papua New Guinea.')
+ index.add OpenStruct.new(id: 4, text: 'The color purple.')
+ index.add OpenStruct.new(id: 5, text: 'Sun and rain.')
+ index.add OpenStruct.new(id: 6, text: 'The king is in Spain.')
- index
- end
+ index
+ end
- context 'splitting the text automatically' do
- let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
+ context 'splitting the text automatically' do
+ let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
- # It splits the text correctly.
- #
- it do
- automatic_splitter.segment('purplerainbow').should == [
- ['purple', 'rain', 'bow'],
- 2.078999999999999
- ]
- end
- end
+ # It splits the text correctly.
+ #
+ it do
+ automatic_splitter.segment('purplerainbow').should == [
+ ['purple', 'rain', 'bow'],
+ 2.078999999999999
+ ]
+ end
+ end
- context 'splitting the text automatically' do
- let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
+ context 'splitting the text automatically' do
+ let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text] }
- # It splits the text correctly.
- #
- it { automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow'] }
- it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
- it { automatic_splitter.split('purple').should == ['purple'] }
+ # It splits the text correctly.
+ #
+ it { automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow'] }
+ it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
+ it { automatic_splitter.split('purple').should == ['purple'] }
- # When it can't, it splits it using the partial index (correctly).
- #
- it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain'] }
- it { automatic_splitter.split('purplerainb').should == ['purple', 'rain'] }
- #
- it { automatic_splitter.split('purplerai').should == ['purple'] }
- it { automatic_splitter.split('purplera').should == ['purple'] }
- it { automatic_splitter.split('purpler').should == ['purple'] }
- #
- it { automatic_splitter.split('purpl').should == [] }
- it { automatic_splitter.split('purp').should == [] }
- it { automatic_splitter.split('pur').should == [] }
- it { automatic_splitter.split('pu').should == [] }
- it { automatic_splitter.split('p').should == [] }
- end
+ # When it can't, it splits it using the partial index (correctly).
+ #
+ it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain'] }
+ it { automatic_splitter.split('purplerainb').should == ['purple', 'rain'] }
+ #
+ it { automatic_splitter.split('purplerai').should == ['purple'] }
+ it { automatic_splitter.split('purplera').should == ['purple'] }
+ it { automatic_splitter.split('purpler').should == ['purple'] }
+ #
+ it { automatic_splitter.split('purpl').should == [] }
+ it { automatic_splitter.split('purp').should == [] }
+ it { automatic_splitter.split('pur').should == [] }
+ it { automatic_splitter.split('pu').should == [] }
+ it { automatic_splitter.split('p').should == [] }
+ end
- context 'splitting text automatically (with partial)' do
- let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text], partial: true }
+ context 'splitting text automatically (with partial)' do
+ let(:automatic_splitter) { Picky::Splitters::Automatic.new index[:text], partial: true }
- # It splits the text correctly.
- #
- it { automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow'] }
- it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
- it { automatic_splitter.split('purple').should == ['purple'] }
+ # It splits the text correctly.
+ #
+ it { automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow'] }
+ it { automatic_splitter.split('purplerain').should == ['purple', 'rain'] }
+ it { automatic_splitter.split('purple').should == ['purple'] }
- # Creates the right queries (see below).
- #
- it { automatic_splitter.split('colorpurple').should == ['color', 'purple'] }
- it { automatic_splitter.split('bownew').should == ['bow', 'new'] }
- it { automatic_splitter.split('spainisking').should == ['spain', 'is', 'king'] }
+ # Creates the right queries (see below).
+ #
+ it { automatic_splitter.split('colorpurple').should == ['color', 'purple'] }
+ it { automatic_splitter.split('bownew').should == ['bow', 'new'] }
+ it { automatic_splitter.split('spainisking').should == ['spain', 'is', 'king'] }
- # When it can't, it splits it using the partial index (correctly).
- #
- it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain', 'bo'] }
- it { automatic_splitter.split('purplerainb').should == ['purple', 'rain', 'b'] }
- #
- it { automatic_splitter.split('purplerai').should == ['purple', 'rai'] }
- it { automatic_splitter.split('purplera').should == ['purple', 'ra'] }
- it { automatic_splitter.split('purpler').should == ['purple'] } # No 'r' in partial index.
- #
- it { automatic_splitter.split('purpl').should == ['purpl'] }
- it { automatic_splitter.split('purp').should == ['purp'] }
- it { automatic_splitter.split('pur').should == [] } # No 'pur' in partial index etc.
- it { automatic_splitter.split('pu').should == [] }
- it { automatic_splitter.split('p').should == [] }
+ # When it can't, it splits it using the partial index (correctly).
+ #
+ it { automatic_splitter.split('purplerainbo').should == ['purple', 'rain', 'bo'] }
+ it { automatic_splitter.split('purplerainb').should == ['purple', 'rain', 'b'] }
+ #
+ it { automatic_splitter.split('purplerai').should == ['purple', 'rai'] }
+ it { automatic_splitter.split('purplera').should == ['purple', 'ra'] }
+ it { automatic_splitter.split('purpler').should == ['purple'] } # No 'r' in partial index.
+ #
+ it { automatic_splitter.split('purpl').should == ['purpl'] }
+ it { automatic_splitter.split('purp').should == ['purp'] }
+ it { automatic_splitter.split('pur').should == [] } # No 'pur' in partial index etc.
+ it { automatic_splitter.split('pu').should == [] }
+ it { automatic_splitter.split('p').should == [] }
- let(:try) do
- splitter = automatic_splitter
- Picky::Search.new index do
- searching splits_text_on: splitter
- end
- end
+ let(:try) do
+ splitter = automatic_splitter
+ Picky::Search.new index do
+ searching splits_text_on: splitter
+ end
+ end
- # Should find the one with all parts.
- #
- it { try.search('purplerainbow').ids.should == [1] }
- it { try.search('sunandrain').ids.should == [5] }
+ # Should find the one with all parts.
+ #
+ it { try.search('purplerainbow').ids.should == [1] }
+ it { try.search('sunandrain').ids.should == [5] }
- # Common parts are found in multiple examples.
- #
- it { try.search('colorpurple').ids.should == [4,1] }
- it { try.search('bownew').ids.should == [3,1] }
- it { try.search('spainisking').ids.should == [6,1] }
- end
+ # Common parts are found in multiple examples.
+ #
+ it { try.search('colorpurple').ids.should == [4,1] }
+ it { try.search('bownew').ids.should == [3,1] }
+ it { try.search('spainisking').ids.should == [6,1] }
+ end
- it 'is fast enough' do
- automatic_splitter = Picky::Splitters::Automatic.new index[:text]
+ it 'is fast enough' do
+ automatic_splitter = Picky::Splitters::Automatic.new index[:text]
- performance_of do
- automatic_splitter.split('purplerainbow')
- end.should < 0.0002
+ performance_of do
+ automatic_splitter.split('purplerainbow')
+ end.should < 0.0002
+ end
+ end
end
end
\ No newline at end of file