require_relative 'spec_helper'
require_relative '../lib/mida'
def test_parsing(md, vocabulary, expected_results)
items = md.search(vocabulary)
expected_results.each_with_index do |expected_result,i|
item = items[i]
test_to_h(item, expected_result)
test_properties(item, expected_result)
end
end
def test_to_h(item, expected_result)
item.to_h.should == expected_result
end
def test_properties(item, expected_result)
item.properties.each do |name, value|
match_array(value, expected_result[:properties][name])
end
end
def match_array(value_array, expected_results)
value_array.each_with_index do |element, i|
if element.is_a?(Mida::Item)
test_properties(element, expected_results[i])
else
element.should == expected_results[i]
end
end
end
shared_examples_for 'one root itemscope' do
it 'should not match itemscopes with different names' do
@md.search(%r{nothing}).size.should == 0
end
it 'should find the correct number of itemscopes' do
@md.items.size.should == 1
end
end
describe Mida::Document, 'when run with a document containing textContent and non textContent itemprops' do
before do
@html = '
'
end
context 'when not given a page_url' do
before do
@md = Mida::Document.new(@html)
end
it 'should return all the properties and types with the correct values' do
expected_results = [
{ vocabulary: Mida::Vocabulary::Generic,
type: nil, id: nil, properties: {'link_field' => ['']} },
{ vocabulary: Mida::Vocabulary::Generic,
type: nil,
id: nil,
properties: {
'span_field' => ['Some span content'],
'dtreviewed' => ['2009-01-06'],
'meta_field' => ['Some meta content'],
'a_field1' => ['http://example.com'],
'a_field2' => [''],
'a_field3' => [''],
'a_field4' => [''],
'area_right' => [''],
'audio_field' => [''],
'embed_field' => [''],
'iframe_field' => ['http://www.example.com/iframe_test'],
'img_field' => [''],
'object_field' => [''],
'source_field' => [''],
'track_field' => [''],
'video_field' => ['']
}
}
]
test_parsing(@md, %r{}, expected_results)
end
end
context 'when given a page_url' do
before do
@md = Mida::Document.new(@html, 'http://example.com/start/')
end
it 'should return all the properties and types with the correct values' do
expected_results = [
{ vocabulary: Mida::Vocabulary::Generic,
type: nil, id: nil, properties: {
'link_field' => ['http://example.com/start/stylesheet.css']
}
},
{ vocabulary: Mida::Vocabulary::Generic,
type: nil,
id: nil,
properties: {
'span_field' => ['Some span content'],
'dtreviewed' => ['2009-01-06'],
'meta_field' => ['Some meta content'],
'a_field1' => ['http://example.com'],
'a_field2' => ['http://example.com/start/welcome/index.html'],
'a_field3' => ['http://example.com/intro'],
'a_field4' => ['http://example.com/intro/index.html'],
'area_right' => ['http://example.com/start/right.html'],
'audio_field' => ['http://example.com/start/asound.ogg'],
'embed_field' => ['http://example.com/start/helloworld.swf'],
'iframe_field' => ['http://www.example.com/iframe_test'],
'img_field' => ['http://example.com/start/animage.png'],
'object_field' => ['http://example.com/start/object.png'],
'source_field' => ['http://example.com/start/song.ogg'],
'track_field' => ['http://example.com/start/atrack.ogg'],
'video_field' => ['http://example.com/start/movie.ogg']
}
}
]
test_parsing(@md, %r{}, expected_results)
end
end
end
describe Mida::Document, 'when run against a full html document containing one itemscope with no itemtype' do
before do
html = '
There is some text here
and also some here
Romeo Pizza
Reviewed by Ulysses Grant on
.
Delicious, tasty pizza in Eastlake!This is a very nice pizza place.
Rating: 4.5
'
@md = Mida::Document.new(html)
end
it_should_behave_like 'one root itemscope'
it 'should return all the properties and types with the correct values' do
expected_results = [{
vocabulary: Mida::Vocabulary::Generic,
type: nil,
id: nil,
properties: {
'itemreviewed' => ['Romeo Pizza'],
'reviewer' => ['Ulysses Grant'],
'dtreviewed' => ['2009-01-06'],
'fielda' => ['a5482'],
'summary' => ['Delicious, tasty pizza in Eastlake!'],
'description' => ['This is a very nice pizza place.'],
'rating' => ['4.5']
}
}]
test_parsing(@md, %r{}, expected_results)
end
end
describe Mida::Document, 'when run against a full html document containing one itemscope nested within another' do
before do
html = '
There is some text here
and also some here
Romeo Pizza
237 Italian WayUnited Kingdom
Rating: 4.5
'
@md = Mida::Document.new(html)
end
it_should_behave_like 'one root itemscope'
it 'should return all the properties and types with the correct values' do
expected_results = [{
vocabulary: Mida::Vocabulary::Generic,
type: nil,
id: nil,
properties: {
'itemreviewed' => ['Romeo Pizza'],
'address' => [{
vocabulary: Mida::Vocabulary::Generic,
type: nil, id: nil, properties: {
'firstline' => ['237 Italian Way'],
'country' => ['United Kingdom']
}
}],
'rating' => ['4.5']
}
}]
test_parsing(@md, %r{}, expected_results)
end
end
describe Mida::Document, 'when run against a full html document containing one itemscope nested within another within another' do
before do
html = '
There is some text here
and also some here
Romeo Pizza
237Italian Way
United Kingdom
Rating: 4.5
'
@md = Mida::Document.new(html)
end
it_should_behave_like 'one root itemscope'
it 'should return all the properties and types with the correct values' do
expected_results = [{
vocabulary: Mida::Vocabulary::Generic,
type: nil,
id: nil,
properties: {
'itemreviewed' => ['Romeo Pizza'],
'address' => [{
vocabulary: Mida::Vocabulary::Generic,
type: nil,
id: nil,
properties: {
'firstline' => [{
vocabulary: Mida::Vocabulary::Generic,
type: nil,
id: nil,
properties: {
'number' => ['237'],
'road' => ['Italian Way']
},
}],
'country' => ['United Kingdom']
},
}],
'rating' => ['4.5']
}
}]
test_parsing(@md, %r{^$}, expected_results)
end
end
describe Mida::Document, 'when run against a full html document containing one itemscope with an itemtype' do
before do
html = '
There is some text here
and also some here
Romeo Pizza
Reviewed by Ulysses Grant on
.
Delicious, tasty pizza in Eastlake!This is a very nice pizza place.
Rating: 4.5
'
@md = Mida::Document.new(html)
end
it_should_behave_like 'one root itemscope'
it 'should find the correct number of itemscopes if outer specified' do
@md.search(%r{http://data-vocabulary.org/Review}).size.should == 1
end
it 'should specify the correct type' do
@md.search(%r{http://data-vocabulary.org/Review}).first.type.should == 'http://data-vocabulary.org/Review'
end
it 'should return all the properties and types with the correct values' do
expected_results = [{
vocabulary: Mida::Vocabulary::Generic,
type: 'http://data-vocabulary.org/Review',
id: nil,
properties: {
'itemreviewed' => ['Romeo Pizza'],
'reviewer' => ['Ulysses Grant'],
'dtreviewed' => ['2009-01-06'],
'summary' => ['Delicious, tasty pizza in Eastlake!'],
'description' => ['This is a very nice pizza place.'],
'rating' => ['4.5']
}
}]
test_parsing(@md, %r{http://data-vocabulary.org/Review}, expected_results)
end
end
describe Mida::Document, 'when run against a full html document containing two non-nested itemscopes with itemtypes' do
before do
html = '
There is some text here
and also some here
Romeo Pizza
Rating: 4.5
An org namehttp://example.com
'
@md = Mida::Document.new(html)
end
it 'should return all the itemscopes' do
@md.items.size.should == 2
end
it 'should give the type of each itemscope if none specified' do
itemscope_names = {
'http://data-vocabulary.org/Review' => 0,
'http://data-vocabulary.org/Organization' => 0
}
@md.items.each do |item|
itemscope_names[item.type] += 1
end
itemscope_names.size.should eq 2
itemscope_names.each { |name, num| num.should == 1 }
end
it 'should return all the properties and types with the correct values for 1st itemscope' do
expected_results = [{
vocabulary: Mida::Vocabulary::Generic,
type: 'http://data-vocabulary.org/Review',
id: nil,
properties: {
'itemreviewed' => ['Romeo Pizza'],
'rating' => ['4.5']
}
}]
test_parsing(@md, %r{http://data-vocabulary.org/Review}, expected_results)
end
it 'should return all the properties from the text for 2nd itemscope' do
expected_results = [{
vocabulary: Mida::Vocabulary::Generic,
type: 'http://data-vocabulary.org/Organization',
id: nil,
properties: {
'name' => ['An org name'],
'url' => ['http://example.com']
}
}]
test_parsing(@md, %r{http://data-vocabulary.org/Organization}, expected_results)
end
end
describe Mida::Document, 'when run against a full html document containing one
itemscope nested within another and the inner block is
surrounded with another non itemscope block' do
before do
html = '
DC07
Dyson
1 Review,
Average: 5.0
'
@md = Mida::Document.new(html)
end
it_should_behave_like 'one root itemscope'
it 'should return the correct number of itemscopes' do
vocabularies = [
%r{http://data-vocabulary.org/Product},
%r{http://data-vocabulary.org/Review-aggregate}
]
vocabularies.each {|vocabulary| @md.search(vocabulary).size.should == 1}
end
context "when looking at the outer vocabulary" do
it 'should return all the properties from the text with the correct values' do
expected_results = [{
vocabulary: Mida::Vocabulary::Generic,
type: 'http://data-vocabulary.org/Product',
id: nil,
properties: {
'name' => ['DC07'],
'brand' => ['Dyson'],
'review' => [{
vocabulary: Mida::Vocabulary::Generic,
type: 'http://data-vocabulary.org/Review-aggregate',
id: nil,
properties: {
'count' => ['1'],
'rating' => ['5.0']
}
}]
}
}]
test_parsing(@md, %r{http://data-vocabulary.org/Product}, expected_results)
end
end
end
describe Mida::Document, 'when run against a document containing an itemscope
that contains another non-linked itemscope' do
before do
html = '
DC07
Dyson
1 Review,
Average: 5.0
'
@md = Mida::Document.new(html)
end
it 'should return the correct number of itemscopes when search used' do
vocabularies = {
%r{} => 2,
%r{http://data-vocabulary.org/Product} => 1,
%r{http://data-vocabulary.org/Review-aggregate} => 1
}
vocabularies.each {|vocabulary, num| @md.search(vocabulary).size.should == num}
end
it 'should return the correct number of items' do
@md.items.size.should == 2
end
context "when no vocabulary specified or looking at the outer vocabulary" do
it 'should return all the properties from the text with the correct values' do
pending("get the contains: feature working")
expected_result = {
type: 'http://data-vocabulary.org/Product',
id: nil,
properties: {
'name' => 'DC07',
'brand' => 'Dyson'
},
contains: {
type: 'http://data-vocabulary.org/Review-aggregate',
id: nil,
properties: {
'count' => '1',
'rating' => '5.0'
}
}
}
@md.search('http://data-vocabulary.org/Product').first.should == expected_result
end
end
end
describe Mida::Document, 'when run against a document using itemrefs' do
before do
html = '
30
Name: Amanda
Band: Jazz Band
Size: 12 players
'
@md = Mida::Document.new(html)
end
it 'should return all the properties from the text with the correct values' do
expected_results = [{
vocabulary: Mida::Vocabulary::Generic,
type: nil,
id: nil,
properties: {
'name' => ['Amanda'],
'band' => [{
vocabulary: Mida::Vocabulary::Generic,
type: nil,
id: nil,
properties: {
'name' => ['Jazz Band'],
'size' => ['12']
}
}],
'age' => ['30']
}
}]
test_parsing(@md, %r{}, expected_results)
end
end
describe Mida::Document, 'when run against a document using multiple itemprops with the same name' do
before do
html = '
Flavours in my favourite ice cream:
Lemon sorbet
Apricot sorbet
StrawberryHomemade
'
@md = Mida::Document.new(html)
end
it_should_behave_like 'one root itemscope'
it 'should return the correct number of itemscopes' do
vocabularies = [
%r{icecreams},
%r{icecream-type}
]
vocabularies.each {|vocabulary| @md.search(vocabulary).size.should == 1}
end
it 'should return all the properties from the text with the correct values' do
expected_results = [{
vocabulary: Mida::Vocabulary::Generic,
type: 'icecreams',
id: nil,
properties: {
'flavour' => [
'Lemon sorbet',
'Apricot sorbet',
{ vocabulary: Mida::Vocabulary::Generic,
type: 'icecream-type',
id: nil,
properties: {
'fruit' => ['Strawberry'],
'style' => ['Homemade']
}
}
]
}
}]
test_parsing(@md, %r{icecreams}, expected_results)
end
end
describe Mida::Document, 'when run against a document using an itemprop with multiple properties' do
before do
html = '
orange
'
@md = Mida::Document.new(html)
end
it 'should return all the properties from the text with the correct values' do
expected_results = [{
vocabulary: Mida::Vocabulary::Generic,
type: nil,
id: nil,
properties: {
'favourite-colour' => ['orange'],
'favourite-fruit' => ['orange']
}
}]
test_parsing(@md, %r{}, expected_results)
end
end
describe Mida::Document, 'when run against a full html document containing an itemtype that matches a registered vocabulary' do
before do
html = '
There is some text here
and also some here
Romeo Pizza
Reviewed by Ulysses Grant on
.
Delicious, tasty pizza in Eastlake!This is a very nice pizza place.
Rating: 4.5
'
class Review < Mida::VocabularyDesc
itemtype %r{http://data-vocabulary.org/Review}
has_one 'itemreviewed', 'reviewer', 'dtreviewed', 'summary'
has_one 'rating', 'description'
end
Mida::Vocabulary.register(Review)
@md = Mida::Document.new(html)
end
it_should_behave_like 'one root itemscope'
it '#search should match against Review' do
@md.search(Review).size.should == 1
end
it 'should specify the correct type' do
@md.search(Review).first.type.should == 'http://data-vocabulary.org/Review'
end
it 'should return all the properties and types with the correct values' do
expected_results = [{
vocabulary: Review,
type: 'http://data-vocabulary.org/Review',
id: nil,
properties: {
'itemreviewed' => ['Romeo Pizza'],
'reviewer' => ['Ulysses Grant'],
'dtreviewed' => ['2009-01-06'],
'summary' => ['Delicious, tasty pizza in Eastlake!'],
'description' => ['This is a very nice pizza place.'],
'rating' => ['4.5']
}
}]
test_parsing(@md, Review, expected_results)
end
end