require File.expand_path(File.join(File.dirname(__FILE__), "helper")) begin require 'rubygems' require 'hpricot' HAS_HPRICOT = true rescue LoadError HAS_HPRICOT = false end class TestConvertXPath < Nokogiri::TestCase def setup @N = Nokogiri(File.read(HTML_FILE)) @NH = Nokogiri.Hpricot(File.read(HTML_FILE)) # decorated document @H = Hpricot(File.read(HTML_FILE)) if HAS_HPRICOT end def assert_syntactical_equivalence(hpath, xpath, match, &blk) blk ||= lambda {|j| j.first} assert_equal match, blk.call(@N.search(xpath)), "xpath result did not match" if HAS_HPRICOT assert_equal match, blk.call(@H.search(hpath)), "hpath result did not match" end assert_equal [xpath], @NH.convert_to_xpath(hpath), "converted hpath did not match xpath" end def test_ordinary_xpath_conversions assert_equal(".//p", @NH.convert_to_xpath("p").first) assert_equal(".//p", @NH.convert_to_xpath(:p).first) assert_equal(".//p", @NH.convert_to_xpath("//p").first) assert_equal(".//p", @NH.convert_to_xpath(".//p").first) end def test_child_tag assert_syntactical_equivalence("h1[a]", ".//h1[child::a]", "Tender Lovemaking") do |j| j.inner_text end end def test_child_tag_equals assert_syntactical_equivalence("h1[a='Tender Lovemaking']", ".//h1[child::a = 'Tender Lovemaking']", "Tender Lovemaking") do |j| j.inner_text end end def test_filter_contains assert_syntactical_equivalence("title:contains('Tender')", ".//title[contains(., 'Tender')]", "Tender Lovemaking ") do |j| j.inner_text end end def test_filter_comment assert_syntactical_equivalence("div comment()[2]", ".//div//comment()[position() = 2]", "") do |j| j.first.to_s end end def test_filter_text assert_syntactical_equivalence("a[text()]", ".//a[normalize-space(child::text())]", "Tender Lovemaking") do |j| j.first.to_s end assert_syntactical_equivalence("a[text()='Tender Lovemaking']", ".//a[normalize-space(child::text()) = 'Tender Lovemaking']", "Tender Lovemaking") do |j| j.first.to_s end assert_syntactical_equivalence("a/text()", ".//a/child::text()", "Tender Lovemaking") do |j| j.first.to_s end assert_syntactical_equivalence("h2//a[text()!='Back Home!']", ".//h2//a[normalize-space(child::text()) != 'Back Home!']", "Meow meow meow meow meow") do |j| j.first.inner_text end end def test_filter_by_attr assert_syntactical_equivalence("a[@href='http://blog.geminigeek.com/wordpress-theme']", ".//a[@href = 'http://blog.geminigeek.com/wordpress-theme']", "http://blog.geminigeek.com/wordpress-theme") do |j| j.first["href"] end end def test_css_id assert_syntactical_equivalence("#linkcat-7", ".//*[@id = 'linkcat-7']", "linkcat-7") do |j| j.first["id"] end assert_syntactical_equivalence("li#linkcat-7", ".//li[@id = 'linkcat-7']", "linkcat-7") do |j| j.first["id"] end end def test_css_class assert_syntactical_equivalence(".cat-item-15", ".//*[contains(concat(' ', @class, ' '), ' cat-item-15 ')]", "cat-item cat-item-15") do |j| j.first["class"] end assert_syntactical_equivalence("li.cat-item-15", ".//li[contains(concat(' ', @class, ' '), ' cat-item-15 ')]", "cat-item cat-item-15") do |j| j.first["class"] end end def test_css_tags assert_syntactical_equivalence("div li a", ".//div//li//a", "http://brobinius.org/") do |j| j.first.inner_text end assert_syntactical_equivalence("div li > a", ".//div//li/a", "http://brobinius.org/") do |j| j.first.inner_text end assert_syntactical_equivalence("h1 ~ small", ".//small[preceding-sibling::h1]", "The act of making love, tenderly.") do |j| j.first.inner_text end assert_syntactical_equivalence("h1 ~ small", ".//small[preceding-sibling::h1]", "The act of making love, tenderly.") do |j| j.first.inner_text end end def test_positional ## # we are intentionally NOT staying compatible with nth-and-friends, as Hpricot has an OB1 bug. # # assert_syntactical_equivalence("div > div:eq(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j| # j.first.inner_text # end # assert_syntactical_equivalence("div/div:eq(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j| # j.first.inner_text # end # assert_syntactical_equivalence("div/div:nth(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j| # j.first.inner_text # end # assert_syntactical_equivalence("div/div:nth-of-type(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j| # j.first.inner_text # end assert_syntactical_equivalence("div/div:first()", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n".gsub(/[\r\n]/, '')) do |j| j.first.inner_text.gsub(/[\r\n]/, '') end assert_syntactical_equivalence("div/div:first", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n".gsub(/[\r\n]/, '')) do |j| j.first.inner_text.gsub(/[\r\n]/, '') end assert_syntactical_equivalence("div//a:last()", ".//div//a[position() = last()]", "Wordpress") do |j| j.last.inner_text end assert_syntactical_equivalence("div//a:last", ".//div//a[position() = last()]", "Wordpress") do |j| j.last.inner_text end end def test_multiple_filters assert_syntactical_equivalence("a[@rel='bookmark'][1]", ".//a[@rel = 'bookmark' and position() = 1]", "Back Home!") do |j| j.first.inner_text end end def test_compat_mode_namespaces assert_equal(".//*[name()='t:sam']", @NH.convert_to_xpath("//t:sam").first) assert_equal(".//*[name()='t:sam'][@rel='bookmark'][1]", @NH.convert_to_xpath("//t:sam[@rel='bookmark'][1]").first) end ## # 'and' is not supported by hpricot # def test_and # assert_syntactical_equivalence("div[h1 and small]", ".//div[h1 and small]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j| # j.inner_text # end # end # TODO: # doc/'title ~ link' -> links that are siblings of title # doc/'p[@class~="final"]' -> class includes string (whitespacy) # doc/'p[text()*="final"]' -> class includes string (index) (broken: always returns true?) # doc/'p[text()$="final"]' -> /final$/ # doc/'p[text()|="final"]' -> /^final$/ # doc/'p[text()^="final"]' -> string starts with 'final # nth_first # nth_last # even # odd # first-child, nth-child, last-child, nth-last-child, nth-last-of-type # only-of-type, only-child # parent # empty # root end