spec/processor_spec.rb in pupa-0.0.11 vs spec/processor_spec.rb in pupa-0.0.12
- old
+ new
@@ -100,10 +100,18 @@
before :each do
Pupa.session = Moped::Session.new(['localhost:27017'], database: 'pupa_test')
Pupa.session.collections.each(&:drop)
end
+ let :_type do
+ if testing_python_compatibility?
+ 'organization'
+ else
+ 'pupa/organization'
+ end
+ end
+
let :graphable do
{
'1' => Pupa::Organization.new({
_id: '1',
name: 'Child',
@@ -123,11 +131,11 @@
let :ungraphable do
{
'4' => Pupa::Organization.new({
_id: '4',
name: 'Child',
- parent: {_type: 'pupa/organization', name: 'Parent'},
+ parent: {_type: _type, name: 'Parent'},
}),
'5' => Pupa::Organization.new({
_id: '5',
name: 'Parent',
}),
@@ -136,42 +144,72 @@
name: 'Parent',
}),
}
end
+ let :foreign_keys_on_foreign_objects do
+ {
+ '7' => Pupa::Organization.new({
+ _id: '7',
+ name: 'Child',
+ parent: {_type: _type, name: 'Parent'},
+ }),
+ '8' => Pupa::Organization.new({
+ _id: '8',
+ name: 'Grandchild',
+ parent: {_type: _type, foreign_keys: {parent_id: '9'}}
+ }),
+ '9' => Pupa::Organization.new({
+ _id: '9',
+ name: 'Parent',
+ }),
+ }
+ end
+
it 'should use a dependency graph if possible' do
processor.should_receive(:load_scraped_objects).and_return(graphable)
Pupa::Processor::DependencyGraph.any_instance.should_receive(:tsort).and_return(['2', '1'])
processor.import
end
+ it 'should remove duplicate objects and re-assign foreign keys' do
+ processor.should_receive(:load_scraped_objects).and_return(graphable)
+
+ processor.import
+ documents = Pupa.session[:organizations].find.entries
+ documents.size.should == 2
+ documents[0].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '2', '_type' => _type, 'name' => 'Parent'}
+ documents[1].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '1', '_type' => _type, 'name' => 'Child', 'parent_id' => '2'}
+ end
+
it 'should not use a dependency graph if not possible' do
processor.should_receive(:load_scraped_objects).and_return(ungraphable)
Pupa::Processor::DependencyGraph.any_instance.should_not_receive(:tsort)
processor.import
end
- it 'should remove duplicate objects and re-assign foreign keys' do
- processor.should_receive(:load_scraped_objects).and_return(graphable)
+ it 'should remove duplicate objects and resolve foreign objects' do
+ processor.should_receive(:load_scraped_objects).and_return(ungraphable)
processor.import
documents = Pupa.session[:organizations].find.entries
documents.size.should == 2
- documents[0].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '2', '_type' => 'pupa/organization', 'name' => 'Parent'}
- documents[1].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '1', '_type' => 'pupa/organization', 'name' => 'Child', 'parent_id' => '2'}
+ documents[0].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '5', '_type' => _type, 'name' => 'Parent'}
+ documents[1].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '4', '_type' => _type, 'name' => 'Child', 'parent_id' => '5'}
end
- it 'should resolve foreign objects' do
- processor.should_receive(:load_scraped_objects).and_return(ungraphable)
+ it 'should resolve foreign keys on foreign objects' do
+ processor.should_receive(:load_scraped_objects).and_return(foreign_keys_on_foreign_objects)
processor.import
documents = Pupa.session[:organizations].find.entries
- documents.size.should == 2
- documents[0].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '5', '_type' => 'pupa/organization', 'name' => 'Parent'}
- documents[1].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '4', '_type' => 'pupa/organization', 'name' => 'Child', 'parent_id' => '5'}
+ documents.size.should == 3
+ documents[0].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '9', '_type' => _type, 'name' => 'Parent'}
+ documents[1].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '7', '_type' => _type, 'name' => 'Child', 'parent_id' => '9'}
+ documents[2].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '8', '_type' => _type, 'name' => 'Grandchild', 'parent_id' => '7'}
end
context 'with existing documents' do
before :each do
processor.should_receive(:load_scraped_objects).and_return(graphable)
@@ -194,16 +232,17 @@
name: 'Parent',
}),
}
end
+ # Use a foreign object to not use a dependency graph.
let :unresolvable_foreign_key do
{
'a' => Pupa::Organization.new({
_id: 'a',
name: 'Child',
- parent: {_type: 'pupa/organization', name: 'Parent'},
+ parent: {_type: _type, name: 'Parent'},
}),
'b' => Pupa::Organization.new({
_id: 'b',
name: 'Parent',
}),
@@ -218,11 +257,11 @@
let :unresolvable_foreign_object do
{
'a' => Pupa::Organization.new({
_id: 'a',
name: 'Child',
- parent: {_type: 'pupa/organization', name: 'Nonexistent'},
+ parent: {_type: _type, name: 'Nonexistent'},
}),
'b' => Pupa::Organization.new({
_id: 'b',
name: 'Parent',
}),
@@ -237,11 +276,11 @@
let :duplicate_documents do
{
'a' => Pupa::Organization.new({
_id: 'a',
name: 'Child',
- parent: {_type: 'pupa/organization', name: 'Parent'},
+ parent: {_type: _type, name: 'Parent'},
}),
'b' => Pupa::Organization.new({
_id: 'b',
name: 'Parent',
}),
@@ -251,18 +290,37 @@
parent_id: 'b',
}),
}
end
+ let :resolvable_foreign_keys_on_foreign_objects do
+ {
+ 'a' => Pupa::Organization.new({
+ _id: 'a',
+ name: 'Child',
+ parent: {_type: _type, name: 'Parent'},
+ }),
+ 'b' => Pupa::Organization.new({
+ _id: 'b',
+ name: 'Grandchild',
+ parent: {_type: _type, foreign_keys: {parent_id: 'c'}}
+ }),
+ 'c' => Pupa::Organization.new({
+ _id: 'c',
+ name: 'Parent',
+ }),
+ }
+ end
+
it 'should resolve foreign keys' do
processor.should_receive(:load_scraped_objects).and_return(resolvable_foreign_key)
processor.import
documents = Pupa.session[:organizations].find.entries
documents.size.should == 2
- documents[0].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '2', '_type' => 'pupa/organization', 'name' => 'Parent'}
- documents[1].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '1', '_type' => 'pupa/organization', 'name' => 'Child', 'parent_id' => '2'}
+ documents[0].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '2', '_type' => _type, 'name' => 'Parent'}
+ documents[1].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '1', '_type' => _type, 'name' => 'Child', 'parent_id' => '2'}
end
it 'should raise an error if a foreign key cannot be resolved' do
processor.should_receive(:load_scraped_objects).and_return(unresolvable_foreign_key)
expect{processor.import}.to raise_error(Pupa::Errors::UnprocessableEntity)
@@ -274,9 +332,20 @@
end
it 'should raise an error if a duplicate was inadvertently saved' do
processor.should_receive(:load_scraped_objects).and_return(duplicate_documents)
expect{processor.import}.to raise_error(Pupa::Errors::DuplicateDocumentError)
+ end
+
+ it 'should resolve foreign keys on foreign objects' do
+ processor.should_receive(:load_scraped_objects).and_return(resolvable_foreign_keys_on_foreign_objects)
+
+ processor.import
+ documents = Pupa.session[:organizations].find.entries
+ documents.size.should == 3
+ documents[0].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '2', '_type' => _type, 'name' => 'Parent'}
+ documents[1].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => '1', '_type' => _type, 'name' => 'Child', 'parent_id' => '2'}
+ documents[2].slice('_id', '_type', 'name', 'parent_id').should == {'_id' => 'b', '_type' => _type, 'name' => 'Grandchild', 'parent_id' => '1'}
end
end
end
end