spec/document_spec.rb in mida-0.2.0 vs spec/document_spec.rb in mida-0.3.0

- old
+ new

@@ -1,7 +1,7 @@ -require_relative 'spec_helper' -require_relative '../lib/mida' +require 'spec_helper' +require 'mida' def test_parsing(md, vocabulary, expected_results) items = md.search(vocabulary) expected_results.each_with_index do |expected_result,i| item = items[i] @@ -38,346 +38,83 @@ it 'should find the correct number of itemscopes' do @md.items.size.should == 1 end end -describe Mida::Document, 'when run with a document containing textContent and non textContent itemprops' do +describe Mida::Document do before do - @html = ' - <html> - <head itemscope> - <link itemprop="link_field" rel="stylesheet" type="text/css" href="stylesheet.css" /> - </head> - <body> - There is some text here - <div> - and also some here - <div itemscope> - <span itemprop="span_field">Some span content</span> - <time itemprop="dtreviewed" datetime="2009-01-06">Jan 6</time>. - <meta itemprop="meta_field" content="Some meta content"> - <a itemprop="a_field1" href="http://example.com">non content</a> - <a itemprop="a_field2" href="welcome/index.html">non content</a> - <a itemprop="a_field3" href="/intro">non content</a> - <a itemprop="a_field4" href="/intro/index.html">non content</a> - <map name="somemap"> - <area shape="rect" coords="0,0,50,120" href="left.html" /> - <area itemprop="area_right" shape="rect" coords="51,0,120,120" href="right.html" /> - </map> - <audio itemprop="audio_field" src="asound.ogg" controls="controls"> - Audio tag not supported by your browser. - </audio> - - <embed itemprop="embed_field" src="helloworld.swf" /> - <iframe itemprop="iframe_field" src="http://www.example.com/iframe_test"></iframe> - <img itemprop="img_field" src="animage.png" width="120" height="120" usemap="#planetmap" /> - <object itemprop="object_field" data="object.png" type="image/png" /> - <audio controls="controls"> - <source itemprop="source_field" src="song.ogg" type="audio/ogg" /> - <track itemprop="track_field" src="atrack.ogg" /> - Audio tag not supported by your browser. - </audio> - <video itemprop="video_field" src="movie.ogg" controls="controls"> - Video tag not supported by your browser. - </video> - </div> - </div> - </body> - </html> - ' - end - - - context 'when not given a page_url' do - before do - @md = Mida::Document.new(@html) - end - - it 'should return all the properties and types with the correct values' do - expected_results = [ - { vocabulary: Mida::Vocabulary::Generic, - type: nil, id: nil, properties: {'link_field' => ['']} }, - { vocabulary: Mida::Vocabulary::Generic, - type: nil, - id: nil, - properties: { - 'span_field' => ['Some span content'], - 'dtreviewed' => ['2009-01-06'], - 'meta_field' => ['Some meta content'], - 'a_field1' => ['http://example.com'], - 'a_field2' => [''], - 'a_field3' => [''], - 'a_field4' => [''], - 'area_right' => [''], - 'audio_field' => [''], - 'embed_field' => [''], - 'iframe_field' => ['http://www.example.com/iframe_test'], - 'img_field' => [''], - 'object_field' => [''], - 'source_field' => [''], - 'track_field' => [''], - 'video_field' => [''] - } - } - ] - - test_parsing(@md, %r{}, expected_results) - - end - end - - context 'when given a page_url' do - before do - @md = Mida::Document.new(@html, 'http://example.com/start/') - end - - it 'should return all the properties and types with the correct values' do - expected_results = [ - { vocabulary: Mida::Vocabulary::Generic, - type: nil, id: nil, properties: { - 'link_field' => ['http://example.com/start/stylesheet.css'] - } - }, - { vocabulary: Mida::Vocabulary::Generic, - type: nil, - id: nil, - properties: { - 'span_field' => ['Some span content'], - 'dtreviewed' => ['2009-01-06'], - 'meta_field' => ['Some meta content'], - 'a_field1' => ['http://example.com'], - 'a_field2' => ['http://example.com/start/welcome/index.html'], - 'a_field3' => ['http://example.com/intro'], - 'a_field4' => ['http://example.com/intro/index.html'], - 'area_right' => ['http://example.com/start/right.html'], - 'audio_field' => ['http://example.com/start/asound.ogg'], - 'embed_field' => ['http://example.com/start/helloworld.swf'], - 'iframe_field' => ['http://www.example.com/iframe_test'], - 'img_field' => ['http://example.com/start/animage.png'], - 'object_field' => ['http://example.com/start/object.png'], - 'source_field' => ['http://example.com/start/song.ogg'], - 'track_field' => ['http://example.com/start/atrack.ogg'], - 'video_field' => ['http://example.com/start/movie.ogg'] - } - } - ] - - test_parsing(@md, %r{}, expected_results) - end - end - -end - -describe Mida::Document, 'when run against a full html document containing one itemscope with no itemtype' do - - before do html = ' <html><body> There is some text here <div> and also some here - <div itemscope> + <div itemscope itemtype="http://data-vocabulary.org/Review"> <span itemprop="itemreviewed">Romeo Pizza</span> - Reviewed by <span itemprop="reviewer">Ulysses Grant</span> on - <time itemprop="dtreviewed" datetime="2009-01-06">Jan 6</time>. - <meta itemprop="fielda" content="a5482"> - - <span itemprop="summary">Delicious, tasty pizza in Eastlake!</span> - <span itemprop="description">This is a very nice pizza place.</span> Rating: <span itemprop="rating">4.5</span> </div> - </div> - </body></html> - ' - @md = Mida::Document.new(html) - - end - - it_should_behave_like 'one root itemscope' - - it 'should return all the properties and types with the correct values' do - expected_results = [{ - vocabulary: Mida::Vocabulary::Generic, - type: nil, - id: nil, - properties: { - 'itemreviewed' => ['Romeo Pizza'], - 'reviewer' => ['Ulysses Grant'], - 'dtreviewed' => ['2009-01-06'], - 'fielda' => ['a5482'], - 'summary' => ['Delicious, tasty pizza in Eastlake!'], - 'description' => ['This is a very nice pizza place.'], - 'rating' => ['4.5'] - } - }] - - test_parsing(@md, %r{}, expected_results) - end - -end - -describe Mida::Document, 'when run against a full html document containing one itemscope nested within another' do - - before do - html = ' - <html><body> - There is some text here - <div> - and also some here - <div itemscope> - <span itemprop="itemreviewed">Romeo Pizza</span> - <div itemprop="address" itemscope> - <span itemprop="firstline">237 Italian Way</span> - <span itemprop="country">United Kingdom</span> - </div> - Rating: <span itemprop="rating">4.5</span> + <div itemscope itemtype="http://data-vocabulary.org/Organization"> + <span itemprop="name">An org name</span> + <span itemprop="url">http://example.com</span> </div> </div> </body></html> ' @md = Mida::Document.new(html) - end - it_should_behave_like 'one root itemscope' - - it 'should return all the properties and types with the correct values' do - expected_results = [{ - vocabulary: Mida::Vocabulary::Generic, - type: nil, - id: nil, - properties: { - 'itemreviewed' => ['Romeo Pizza'], - 'address' => [{ - vocabulary: Mida::Vocabulary::Generic, - type: nil, id: nil, properties: { - 'firstline' => ['237 Italian Way'], - 'country' => ['United Kingdom'] - } - }], - 'rating' => ['4.5'] - } - }] - - test_parsing(@md, %r{}, expected_results) + it '#each should pass each item to the block' do + item_num = 0 + @md.each {|item| item.should == @md.items[item_num]; item_num += 1} end -end + it 'should have access to the Enumerable mixin methods such as #find' do + review = @md.find {|item| item.type == 'http://data-vocabulary.org/Review'} + review.type.should == 'http://data-vocabulary.org/Review' + review.properties['itemreviewed'].should == ["Romeo Pizza"] -describe Mida::Document, 'when run against a full html document containing one itemscope nested within another within another' do - - before do - html = ' - <html><body> - There is some text here - <div> - and also some here - <div itemscope> - <span itemprop="itemreviewed">Romeo Pizza</span> - <div itemprop="address" itemscope> - <div itemprop="firstline" itemscope> - <span itemprop="number">237</span> - <span itemprop="road">Italian Way</span> - </div> - <span itemprop="country">United Kingdom</span> - </div> - Rating: <span itemprop="rating">4.5</span> - </div> - </div> - </body></html> - ' - - @md = Mida::Document.new(html) + organization = @md.find {|item| item.type == 'http://data-vocabulary.org/Organization'} + organization.type.should == 'http://data-vocabulary.org/Organization' + organization.properties['name'].should == ["An org name"] end - - it_should_behave_like 'one root itemscope' - - it 'should return all the properties and types with the correct values' do - expected_results = [{ - vocabulary: Mida::Vocabulary::Generic, - type: nil, - id: nil, - properties: { - 'itemreviewed' => ['Romeo Pizza'], - 'address' => [{ - vocabulary: Mida::Vocabulary::Generic, - type: nil, - id: nil, - properties: { - 'firstline' => [{ - vocabulary: Mida::Vocabulary::Generic, - type: nil, - id: nil, - properties: { - 'number' => ['237'], - 'road' => ['Italian Way'] - }, - }], - 'country' => ['United Kingdom'] - }, - }], - 'rating' => ['4.5'] - } - }] - - test_parsing(@md, %r{^$}, expected_results) - end - end -describe Mida::Document, 'when run against a full html document containing one itemscope with an itemtype' do +describe Mida::Document, 'when run against a full html document containing itemscopes with and without itemtypes' do before do html = ' <html><body> There is some text here <div> and also some here <div itemscope itemtype="http://data-vocabulary.org/Review"> <span itemprop="itemreviewed">Romeo Pizza</span> - Reviewed by <span itemprop="reviewer">Ulysses Grant</span> on - <time itemprop="dtreviewed" datetime="2009-01-06">Jan 6</time>. - <span itemprop="summary">Delicious, tasty pizza in Eastlake!</span> - <span itemprop="description">This is a very nice pizza place.</span> Rating: <span itemprop="rating">4.5</span> </div> + <div itemscope> + <span itemprop="name">An org name</span> + <span itemprop="url">http://example.com</span> + </div> </div> </body></html> ' @md = Mida::Document.new(html) end - it_should_behave_like 'one root itemscope' - - it 'should find the correct number of itemscopes if outer specified' do - @md.search(%r{http://data-vocabulary.org/Review}).size.should == 1 + it '#search should be able to match against items without an itemtype' do + items = @md.search(%r{^$}) + items.size.should == 1 + items[0].properties['name'].should == ['An org name'] end - it 'should specify the correct type' do - @md.search(%r{http://data-vocabulary.org/Review}).first.type.should == 'http://data-vocabulary.org/Review' + it '#search should be able to match against items with an itemtype' do + items = @md.search(%r{^.+$}) + items.size.should == 1 + items[0].type.should == 'http://data-vocabulary.org/Review' end - - it 'should return all the properties and types with the correct values' do - expected_results = [{ - vocabulary: Mida::Vocabulary::Generic, - type: 'http://data-vocabulary.org/Review', - id: nil, - properties: { - 'itemreviewed' => ['Romeo Pizza'], - 'reviewer' => ['Ulysses Grant'], - 'dtreviewed' => ['2009-01-06'], - 'summary' => ['Delicious, tasty pizza in Eastlake!'], - 'description' => ['This is a very nice pizza place.'], - 'rating' => ['4.5'] - } - }] - test_parsing(@md, %r{http://data-vocabulary.org/Review}, expected_results) - end - end describe Mida::Document, 'when run against a full html document containing two non-nested itemscopes with itemtypes' do before do @@ -421,11 +158,10 @@ end it 'should return all the properties and types with the correct values for 1st itemscope' do expected_results = [{ - vocabulary: Mida::Vocabulary::Generic, type: 'http://data-vocabulary.org/Review', id: nil, properties: { 'itemreviewed' => ['Romeo Pizza'], 'rating' => ['4.5'] @@ -434,11 +170,10 @@ test_parsing(@md, %r{http://data-vocabulary.org/Review}, expected_results) end it 'should return all the properties from the text for 2nd itemscope' do expected_results = [{ - vocabulary: Mida::Vocabulary::Generic, type: 'http://data-vocabulary.org/Organization', id: nil, properties: { 'name' => ['An org name'], 'url' => ['http://example.com'] @@ -480,23 +215,22 @@ vocabularies = [ %r{http://data-vocabulary.org/Product}, %r{http://data-vocabulary.org/Review-aggregate} ] vocabularies.each {|vocabulary| @md.search(vocabulary).size.should == 1} + end context "when looking at the outer vocabulary" do it 'should return all the properties from the text with the correct values' do expected_results = [{ - vocabulary: Mida::Vocabulary::Generic, type: 'http://data-vocabulary.org/Product', id: nil, properties: { 'name' => ['DC07'], 'brand' => ['Dyson'], 'review' => [{ - vocabulary: Mida::Vocabulary::Generic, type: 'http://data-vocabulary.org/Review-aggregate', id: nil, properties: { 'count' => ['1'], 'rating' => ['5.0'] @@ -569,196 +303,6 @@ } @md.search('http://data-vocabulary.org/Product').first.should == expected_result end end -end - -describe Mida::Document, 'when run against a document using itemrefs' do - - before do - html = ' - <html><body> - <div itemscope id="amanda" itemref="a b"> - <span itemprop="age">30</span> - </div> - <p id="a">Name: <span itemprop="name">Amanda</span></p> - <div id="b" itemprop="band" itemscope itemref="c"></div> - <div id="c"> - <p>Band: <span itemprop="name">Jazz Band</span></p> - <p>Size: <span itemprop="size">12</span> players</p> - </div> - </body></html> - ' - - @md = Mida::Document.new(html) - end - - it 'should return all the properties from the text with the correct values' do - expected_results = [{ - vocabulary: Mida::Vocabulary::Generic, - type: nil, - id: nil, - properties: { - 'name' => ['Amanda'], - 'band' => [{ - vocabulary: Mida::Vocabulary::Generic, - type: nil, - id: nil, - properties: { - 'name' => ['Jazz Band'], - 'size' => ['12'] - } - }], - 'age' => ['30'] - } - }] - - test_parsing(@md, %r{}, expected_results) - end -end - -describe Mida::Document, 'when run against a document using multiple itemprops with the same name' do - - before do - html = ' - <html><body> - <div itemscope itemtype="icecreams"> - <p>Flavours in my favourite ice cream:</p> - <ul> - <li itemprop="flavour">Lemon sorbet</li> - <li itemprop="flavour">Apricot sorbet</li> - <li itemprop="flavour" itemscope itemtype="icecream-type"> - <span itemprop="fruit">Strawberry</span> - <span itemprop="style">Homemade</span> - </li> - </ul> - </div> - </body></html> - ' - - @md = Mida::Document.new(html) - end - - it_should_behave_like 'one root itemscope' - - it 'should return the correct number of itemscopes' do - vocabularies = [ - %r{icecreams}, - %r{icecream-type} - ] - vocabularies.each {|vocabulary| @md.search(vocabulary).size.should == 1} - end - - it 'should return all the properties from the text with the correct values' do - expected_results = [{ - vocabulary: Mida::Vocabulary::Generic, - type: 'icecreams', - id: nil, - properties: { - 'flavour' => [ - 'Lemon sorbet', - 'Apricot sorbet', - { vocabulary: Mida::Vocabulary::Generic, - type: 'icecream-type', - id: nil, - properties: { - 'fruit' => ['Strawberry'], - 'style' => ['Homemade'] - } - } - ] - } - }] - - test_parsing(@md, %r{icecreams}, expected_results) - end -end - -describe Mida::Document, 'when run against a document using an itemprop with multiple properties' do - - before do - html = ' - <html><body> - <div itemscope> - <span itemprop="favourite-colour favourite-fruit">orange</span> - </div> - </body></html> - ' - - @md = Mida::Document.new(html) - end - - it 'should return all the properties from the text with the correct values' do - expected_results = [{ - vocabulary: Mida::Vocabulary::Generic, - type: nil, - id: nil, - properties: { - 'favourite-colour' => ['orange'], - 'favourite-fruit' => ['orange'] - } - }] - - test_parsing(@md, %r{}, expected_results) - end -end - -describe Mida::Document, 'when run against a full html document containing an itemtype that matches a registered vocabulary' do - - before do - html = ' - <html><body> - There is some text here - <div> - and also some here - <div itemscope itemtype="http://data-vocabulary.org/Review"> - <span itemprop="itemreviewed">Romeo Pizza</span> - Reviewed by <span itemprop="reviewer">Ulysses Grant</span> on - <time itemprop="dtreviewed" datetime="2009-01-06">Jan 6</time>. - <span itemprop="summary">Delicious, tasty pizza in Eastlake!</span> - <span itemprop="description">This is a very nice pizza place.</span> - Rating: <span itemprop="rating">4.5</span> - </div> - </div> - </body></html> - ' - - class Review < Mida::VocabularyDesc - itemtype %r{http://data-vocabulary.org/Review} - has_one 'itemreviewed', 'reviewer', 'dtreviewed', 'summary' - has_one 'rating', 'description' - end - Mida::Vocabulary.register(Review) - - @md = Mida::Document.new(html) - - end - - it_should_behave_like 'one root itemscope' - - it '#search should match against Review' do - @md.search(Review).size.should == 1 - end - - it 'should specify the correct type' do - @md.search(Review).first.type.should == 'http://data-vocabulary.org/Review' - end - - it 'should return all the properties and types with the correct values' do - expected_results = [{ - vocabulary: Review, - type: 'http://data-vocabulary.org/Review', - id: nil, - properties: { - 'itemreviewed' => ['Romeo Pizza'], - 'reviewer' => ['Ulysses Grant'], - 'dtreviewed' => ['2009-01-06'], - 'summary' => ['Delicious, tasty pizza in Eastlake!'], - 'description' => ['This is a very nice pizza place.'], - 'rating' => ['4.5'] - } - }] - test_parsing(@md, Review, expected_results) - end - end