# -*- coding: utf-8 -*- #!/usr/bin/env ruby require 'test/unit' require 'hpricot' require 'load_files' class TestParser < Test::Unit::TestCase def test_set_attr @basic = Hpricot.parse(TestFiles::BASIC) @basic.search('//p').set('class', 'para') assert_equal 4, @basic.search('//p').length assert_equal 4, @basic.search('//p').find_all { |x| x['class'] == 'para' }.length end # Test creating a new element def test_new_element elem = Hpricot::Elem.new('form') assert_not_nil(elem) assert_not_nil(elem.attributes) end def test_scan_text assert_equal 'FOO', Hpricot.make("FOO").children.first.content end def test_filter_by_attr @boingboing = Hpricot.parse(TestFiles::BOINGBOING) # this link is escaped in the doc link = 'http://www.youtube.com/watch?v=TvSNXyNw26g&search=chris%20ware' assert_equal link, @boingboing.at("a[@href='#{link}']")['href'] end def test_filter_contains @basic = Hpricot.parse(TestFiles::BASIC) assert_equal 'Sample XHTML', @basic.search("title:contains('Sample')").to_s end def test_get_element_by_id @basic = Hpricot.parse(TestFiles::BASIC) assert_equal 'link1', @basic.get_element_by_id('link1')['id'] assert_equal 'link1', @basic.get_element_by_id('body1').get_element_by_id('link1').get_attribute('id') end def test_get_element_by_tag_name @basic = Hpricot.parse(TestFiles::BASIC) assert_equal 'link1', @basic.get_elements_by_tag_name('a')[0].get_attribute('id') assert_equal 'link1', @basic.get_elements_by_tag_name('body')[0].get_element_by_id('link1').get_attribute('id') end def test_get_elements_by_tag_name_star simple = Hpricot.parse("

First

Second

") assert_equal 3, simple.get_elements_by_tag_name("*").size assert_equal 1, simple.get_elements_by_tag_name("div").size assert_equal 2, simple.get_elements_by_tag_name("p").size end def test_output_basic @basic = Hpricot.parse(TestFiles::BASIC) @basic2 = Hpricot.parse(@basic.inner_html) scan_basic @basic2 end def test_scan_basic @basic = Hpricot.parse(TestFiles::BASIC) scan_basic @basic end def scan_basic doc assert_kind_of Hpricot::XMLDecl, doc.children.first assert_not_equal doc.children.first.to_s, doc.children[1].to_s assert_equal 'link1', doc.at('#link1')['id'] assert_equal 'link1', doc.at("p a")['id'] assert_equal 'link1', (doc/:p/:a).first['id'] assert_equal 'link1', doc.search('p').at('a').get_attribute('id') assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id') assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0] assert_equal (doc/'p')[2], (doc/'p').filter('[3]')[0] assert_equal 4, (doc/'p').filter('*').length assert_equal 4, (doc/'p').filter('* *').length eles = (doc/'p').filter('.ohmy') assert_equal 1, eles.length assert_equal 'ohmy', eles.first.get_attribute('class') assert_equal 3, (doc/'p:not(.ohmy)').length assert_equal 3, (doc/'p').not('.ohmy').length assert_equal 3, (doc/'p').not(eles.first).length assert_equal 2, (doc/'p').filter('[@class]').length assert_equal 'last final', (doc/'p[@class~="final"]').first.get_attribute('class') assert_equal 1, (doc/'p').filter('[@class~="final"]').length assert_equal 2, (doc/'p > a').length assert_equal 1, (doc/'p.ohmy > a').length assert_equal 2, (doc/'p / a').length assert_equal 2, (doc/'link ~ link').length assert_equal 3, (doc/'title ~ link').length assert_equal 5, (doc/"//p/text()").length assert_equal 6, (doc/"//p[a]//text()").length assert_equal 2, (doc/"//p/a/text()").length end def test_positional h = Hpricot( "

one

two

" ) assert_equal "

one

", h.search("//div/p:eq(0)").to_s assert_equal "

one

", h.search("//div/p:first").to_s assert_equal "

one

", h.search("//div/p:first()").to_s end def test_pace doc = Hpricot(TestFiles::PACE_APPLICATION) assert_equal 'get', doc.at('form[@name=frmSect11]')['method'] # assert_equal '2', doc.at('#hdnSpouse')['value'] end def test_scan_boingboing @boingboing = Hpricot.parse(TestFiles::BOINGBOING) assert_equal 60, (@boingboing/'p.posted').length assert_equal 1, @boingboing.search("//a[@name='027906']").length assert_equal 10, @boingboing.search("script comment()").length assert_equal 3, @boingboing.search("a[text()*='Boing']").length assert_equal 1, @boingboing.search("h3[text()='College kids reportedly taking more smart drugs']").length assert_equal 0, @boingboing.search("h3[text()='College']").length assert_equal 60, @boingboing.search("h3").length assert_equal 59, @boingboing.search("h3[text()!='College kids reportedly taking more smart drugs']").length assert_equal 17, @boingboing.search("h3[text()$='s']").length assert_equal 116, @boingboing.search("p[text()]").length assert_equal 211, @boingboing.search("p").length end def test_reparent doc = Hpricot(%{
}) div1 = doc.search('#blurb_1') div1.before('
') div0 = doc.search('#blurb_0') div0.before('
') assert_equal 'div', doc.at('#blurb_1').name end def test_siblings @basic = Hpricot.parse(TestFiles::BASIC) t = @basic.at(:title) e = t.next_sibling assert_equal 'test1.css', e['href'] assert_equal 'title', e.previous_sibling.name end def test_css_negation @basic = Hpricot.parse(TestFiles::BASIC) assert_equal 3, (@basic/'p:not(.final)').length end def test_remove_attribute @basic = Hpricot.parse(TestFiles::BASIC) (@basic/:p).each { |ele| ele.remove_attribute('class') } assert_equal 0, (@basic/'p[@class]').length end def test_abs_xpath @boingboing = Hpricot.parse(TestFiles::BOINGBOING) assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length assert_equal 18, @boingboing.search("//script").length divs = @boingboing.search("//script/../div") assert_equal 2, divs.length imgs = @boingboing.search('//div/p/a/img') assert_equal 16, imgs.length assert_equal 16, @boingboing.search('//div').search('p/a/img').length assert imgs.all? { |x| x.name == 'img' } end def test_predicates @boingboing = Hpricot.parse(TestFiles::BOINGBOING) assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length p_imgs = @boingboing.search('//div/p[/a/img]') assert_equal 16, p_imgs.length assert p_imgs.all? { |x| x.name == 'p' } p_imgs = @boingboing.search('//div/p[a/img]') assert_equal 16, p_imgs.length assert p_imgs.all? { |x| x.name == 'p' } assert_equal 1, @boingboing.search('//input[@checked]').length end def test_tag_case @tenderlove = Hpricot.parse(TestFiles::TENDERLOVE) assert_equal 2, @tenderlove.search('//a').length assert_equal 3, @tenderlove.search('//area').length assert_equal 2, @tenderlove.search('//meta').length end def test_alt_predicates @boingboing = Hpricot.parse(TestFiles::BOINGBOING) assert_equal 1, @boingboing.search('//table/tr:last').length @basic = Hpricot.parse(TestFiles::BASIC) assert_equal "

The third paragraph

", @basic.search('p:eq(2)').to_html assert_equal '

THE FINAL PARAGRAPH

', @basic.search('p:last').to_html assert_equal 'last final', @basic.search('//p:last-of-type').first.get_attribute('class') end def test_insert_after # ticket #63 doc = Hpricot('
') (doc/'div').each do |element| element.after('

Paragraph 1

Paragraph 2

') end assert_equal doc.to_html, '

Paragraph 1

Paragraph 2

' end def test_insert_before # ticket #61 doc = Hpricot('
') (doc/'div').each do |element| element.before('

Paragraph 1

Paragraph 2

') end assert_equal doc.to_html, '

Paragraph 1

Paragraph 2

' end def test_many_paths @boingboing = Hpricot.parse(TestFiles::BOINGBOING) assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length assert_equal 18, @boingboing.search('//div/p[a/img]|//link[@rel="alternate"]').length end def test_stacked_search @boingboing = Hpricot.parse(TestFiles::BOINGBOING) assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img') end def test_attr_casing doc = Hpricot("A simple test string.") assert_equal (doc % :a)[:href], "a" assert_equal (doc % :a)[:HREF], nil assert_equal (doc % :a)['href'], "a" assert_equal (doc % :a)['HREF'], nil end def test_class_search # test case sent by Chih-Chao Lam doc = Hpricot("
abc
") assert_equal 1, doc.search(".xyz").length doc = Hpricot("
abc
xyz
") assert_equal 1, doc.search(".xyz").length assert_equal 4, doc.search("*").length end def test_kleene_star # bug noticed by raja bhatia doc = Hpricot("1
2
3
4") assert_equal 2, doc.search("*[@class*='small']").length assert_equal 2, doc.search("*.small").length assert_equal 2, doc.search(".small").length assert_equal 2, doc.search(".large").length end def test_empty_comment doc = Hpricot("

") assert doc.children[0].children[0].comment? doc = Hpricot("

") assert doc.children[0].children[0].comment? end def test_body_newlines @immob = Hpricot.parse(TestFiles::IMMOB) body = @immob.at(:body) {'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10', 'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066', 'alink' => '#ff6600', 'hlink' => "#ff6600", 'vlink' => "#000000"}.each do |k, v| assert_equal v, body[k] end end def test_nested_twins @doc = Hpricot("
Hi
there
") assert_equal 1, (@doc/"div div").length end def test_wildcard @basic = Hpricot.parse(TestFiles::BASIC) assert_equal 3, (@basic/"*[@id]").length assert_equal 3, (@basic/"//*[@id]").length end def test_javascripts @immob = Hpricot.parse(TestFiles::IMMOB) assert_equal 3, (@immob/:script)[0].inner_html.scan(/
}, %{
}, %{
}, %{
}]. each do |str| doc = Hpricot(str) assert_equal 1, (doc/:form).length assert_equal '/units/a/login/1,13088,779-1,00.html', doc.at("form")['action'] end end def test_procins doc = Hpricot("\n") assert_equal "php", doc.children[0].target assert_equal "blah='blah'", doc.children[2].content end def test_no_buffer_error Hpricot(%{

\n\n\n\n

}) end def test_youtube_attr str = <<-edoc Lorem ipsum. Jolly roger, ding-dong sing-a-long Check out my posting, I have bright mice in large clown cars. HAI edoc assert_equal "HAI", doc.at("body").inner_text end # http://github.com/hpricot/hpricot/issues#issue/28 def test_invalid_inner_text assert_equal "A", Hpricot('A&B;').inner_text[0...1] end # http://github.com/hpricot/hpricot/issues#issue/25 def test_encoding_compatibility_error Hpricot("

\xC3\x9Cber

").inner_text end # Reported by Jonathan Nichols on the Hpricot list (24 May 2007) def test_self_closed_form doc = Hpricot(<<-edoc)
edoc assert_equal "button", doc.at("//form/input")['type'] end def test_filters @basic = Hpricot.parse(TestFiles::BASIC) assert_equal 0, (@basic/"title:parent").size assert_equal 3, (@basic/"p:parent").size assert_equal 3, (@basic/"link:empty").size assert_equal 1, (@basic/"span:empty").size end def test_keep_cdata str = %{} assert_equal str, Hpricot(str).to_html end def test_namespace chunk = <<-END hi END doc = Hpricot::XML(chunk) assert (doc/"//t:sam").size > 0 # at least this should probably work # assert (doc/"//sam").size > 0 # this would be nice end def test_uxs_ignores_non_entities assert_equal 'abc', Hpricot.uxs('abc') end def test_uxs_handles_gt_lt_amp_quot assert_equal '"&<>', Hpricot.uxs('"&<>') end def test_uxs_handles_numeric_values if String.method_defined? :encoding assert_equal "é", Hpricot.uxs('é') else assert_equal "\303\251", Hpricot.uxs('é') end end def test_uxs_handles_entities if String.method_defined? :encoding assert_equal "é", Hpricot.uxs('é') else assert_equal "\303\251", Hpricot.uxs('é') end end def test_cdata_inner_text xml = Hpricot.XML(%{ 96586 STDERR }) assert_equal "This is STDOUT", (xml/:peon/:stdout).inner_text assert_equal "This is STDERR", (xml/:peon/:stderr).inner_text end def test_parsing_html_with_noscript doc = Hpricot(<<-edoc)

Testing

edoc assert_equal "7ff5e90iormq5niy6x98j75", doc.at("/html/head/meta[@name='verification']")['content'] end end