# coding: utf-8
require 'mechanize/test_case'
class TestMechanizePageLink < Mechanize::TestCase
WINDOWS_1255 = <<-HTML
hi
HTML
BAD = <<-HTML
Bia\xB3ystok
HTML
BAD.force_encoding Encoding::BINARY if defined? Encoding
SJIS_TITLE = "\x83\x65\x83\x58\x83\x67"
SJIS_AFTER_TITLE = <<-HTML
#{SJIS_TITLE}
HTML
SJIS_AFTER_TITLE.force_encoding Encoding::BINARY if defined? Encoding
SJIS_BAD_AFTER_TITLE = <<-HTML
#{SJIS_TITLE}
HTML
SJIS_BAD_AFTER_TITLE.force_encoding Encoding::BINARY if defined? Encoding
UTF8_TITLE = 'ใในใ'
UTF8 = <<-HTML
#{UTF8_TITLE}
HTML
ENCODING_ERROR_CLASS = Nokogiri::XML::SyntaxError
def setup
super
@uri = URI('http://example')
@res = { 'content-type' => 'text/html' }
@body = 'hi'
end
def util_page body = @body, res = @res
body.force_encoding Encoding::BINARY if body.respond_to? :force_encoding
Mechanize::Page.new @uri, res, body, 200, @mech
end
def test_initialize_supported_content_type
page = Mechanize::Page.new nil, 'content-type' => 'application/xhtml+xml'
assert page
assert_equal 'application/xhtml+xml', page.content_type
page = Mechanize::Page.new nil, 'content-type' => 'text/html'
assert page
assert_equal 'text/html', page.content_type
page = Mechanize::Page.new nil, 'content-type' => 'application/xhtml+xml;charset=utf-8'
assert page
assert_equal 'application/xhtml+xml;charset=utf-8', page.content_type
page = Mechanize::Page.new nil, 'content-type' => 'text/html;charset=utf-8'
assert page
assert_equal 'text/html;charset=utf-8', page.content_type
end
def test_initialize_unsupported_content_type
e = assert_raises Mechanize::ContentTypeError do
Mechanize::Page.new nil, 'content-type' => 'text/plain'
end
assert_equal 'text/plain', e.content_type
e = assert_raises Mechanize::ContentTypeError do
Mechanize::Page.new nil, 'content-type' => 'text/plain;charset=utf-8'
end
assert_equal 'text/plain;charset=utf-8', e.content_type
end
def test_override_content_type
page = Mechanize::Page.new nil, {'content-type' => 'text/html'}, WINDOWS_1255
assert page
assert_equal 'text/html; charset=windows-1255', page.content_type
end
def test_canonical_uri
page = @mech.get("http://localhost/canonical_uri.html")
assert_equal(URI("http://localhost/canonical_uri"), page.canonical_uri)
page = @mech.get("http://localhost/file_upload.html")
assert_equal(nil, page.canonical_uri)
end
def test_canonical_uri_unescaped
page = util_page <<-BODY
BODY
assert_equal @uri + '/white%20space', page.canonical_uri
end
def test_charset_from_content_type
charset = Mechanize::Page.__send__ :charset_from_content_type, 'text/html;charset=UTF-8'
assert_equal 'UTF-8', charset
end
def test_charset_from_bad_content_type
charset = Mechanize::Page.__send__ :charset_from_content_type, 'text/html'
assert_nil charset
end
def test_encoding
page = util_page WINDOWS_1255
assert_equal 'windows-1255', page.encoding
end
def test_encoding_charset_after_title
page = util_page SJIS_AFTER_TITLE
assert_equal false, page.encoding_error?
assert_equal 'Shift_JIS', page.encoding
end
def test_encoding_charset_after_title_bad
page = util_page UTF8
assert_equal false, page.encoding_error?
assert_equal 'UTF-8', page.encoding
end
def test_encoding_charset_after_title_double_bad
page = util_page SJIS_BAD_AFTER_TITLE
assert_equal false, page.encoding_error?
assert_equal 'SHIFT_JIS', page.encoding
end
def test_encoding_charset_bad
page = util_page "#{UTF8_TITLE}"
page.encodings.replace %w[
UTF-8
Shift_JIS
]
assert_equal false, page.encoding_error?
assert_equal 'UTF-8', page.encoding
end
def test_encoding_meta_charset
page = util_page ""
assert_equal 'UTF-8', page.encoding
end
def test_encoding_equals
page = util_page
page.meta_refresh
assert page.instance_variable_get(:@meta_refresh)
page.encoding = 'UTF-8'
assert_nil page.instance_variable_get(:@meta_refresh)
assert_equal 'UTF-8', page.encoding
assert_equal 'UTF-8', page.parser.encoding
end
def test_page_encoding_error?
page = util_page
page.parser.errors.clear
assert_equal false, page.encoding_error?
end
def test_detect_libxml2error_indicate_encoding
page = util_page
page.parser.errors.clear
# error in libxml2-2.7.8/parser.c, HTMLparser.c or parserInternals.c
page.parser.errors = [ENCODING_ERROR_CLASS.new("Input is not proper UTF-8, indicate encoding !\n")]
assert_equal true, page.encoding_error?
end
def test_detect_libxml2error_invalid_char
page = util_page
page.parser.errors.clear
# error in libxml2-2.7.8/HTMLparser.c
page.parser.errors = [ENCODING_ERROR_CLASS.new("Invalid char in CDATA 0x%X\n")]
assert_equal true, page.encoding_error?
end
def test_detect_libxml2error_input_conversion_failed
page = util_page
page.parser.errors.clear
# error in libxml2-2.7.8/encoding.c
page.parser.errors = [ENCODING_ERROR_CLASS.new("input conversion failed due to input error\n")]
assert_equal true, page.encoding_error?
end
def test_detect_libxml2error_which_unsupported_by_mechanize
page = util_page
page.parser.errors.clear
# error in libxml2-2.7.8/HTMLparser.c
page.parser.errors = [ENCODING_ERROR_CLASS.new("encoder error\n")]
assert_equal false, page.encoding_error?
end
def test_encoding_equals_before_parser
# document has a bad encoding information - windows-1255
page = util_page BAD
# encoding is wrong, so user wants to force ISO-8859-2
page.encoding = 'ISO-8859-2'
assert_equal false, page.encoding_error?
assert_equal 'ISO-8859-2', page.encoding
assert_equal 'ISO-8859-2', page.parser.encoding
end
def test_encoding_equals_after_parser
# document has a bad encoding information - windows-1255
page = util_page BAD
page.parser
# autodetection sets encoding to windows-1255
assert_equal 'windows-1255', page.encoding
# believe in yourself, not machine
assert_equal false, page.encoding_error?
# encoding is wrong, so user wants to force ISO-8859-2
page.encoding = 'ISO-8859-2'
assert_equal false, page.encoding_error?
assert_equal 'ISO-8859-2', page.encoding
assert_equal 'ISO-8859-2', page.parser.encoding
end
def test_frames_with
page = @mech.get("http://localhost/frame_test.html")
assert_equal(3, page.frames.size)
find_orig = page.frames.find_all { |f| f.name == 'frame1' }
find1 = page.frames_with(:name => 'frame1')
find_orig.zip(find1).each { |a,b|
assert_equal(a, b)
}
end
def test_links_with_dom_id
page = @mech.get("http://localhost/tc_links.html")
link = page.links_with(:dom_id => 'bold_aaron_link')
link_by_id = page.links_with(:id => 'bold_aaron_link')
assert_equal(1, link.length)
assert_equal('Aaron Patterson', link.first.text)
assert_equal(link, link_by_id)
end
def test_links_with_dom_class
page = @mech.get("http://localhost/tc_links.html")
link = page.links_with(:dom_class => 'thing_link')
link_by_class = page.links_with(:class => 'thing_link')
assert_equal(1, link.length)
assert_equal(link, link_by_class)
end
def test_link_with_encoded_space
page = @mech.get("http://localhost/tc_links.html")
link = page.link_with(:text => 'encoded space')
page = @mech.click link
end
def test_link_with_space
page = @mech.get("http://localhost/tc_links.html")
link = page.link_with(:text => 'not encoded space')
page = @mech.click link
end
def test_link_with_unusual_characters
page = @mech.get("http://localhost/tc_links.html")
link = page.link_with(:text => 'unusual characters')
@mech.click link
# HACK no assertion
end
def test_links
page = @mech.get("http://localhost/find_link.html")
assert_equal(18, page.links.length)
end
def test_links_with_bold
page = @mech.get("http://localhost/tc_links.html")
link = page.links_with(:text => /Bold Dude/)
assert_equal(1, link.length)
assert_equal('Bold Dude', link.first.text)
assert_equal [], link.first.rel
assert !link.first.rel?('me')
assert !link.first.rel?('nofollow')
link = page.links_with(:text => 'Aaron James Patterson')
assert_equal(1, link.length)
assert_equal('Aaron James Patterson', link.first.text)
assert_equal ['me'], link.first.rel
assert link.first.rel?('me')
assert !link.first.rel?('nofollow')
link = page.links_with(:text => 'Aaron Patterson')
assert_equal(1, link.length)
assert_equal('Aaron Patterson', link.first.text)
assert_equal ['me', 'nofollow'], link.first.rel
assert link.first.rel?('me')
assert link.first.rel?('nofollow')
link = page.links_with(:text => 'Ruby Rocks!')
assert_equal(1, link.length)
assert_equal('Ruby Rocks!', link.first.text)
end
def test_meta_refresh
page = @mech.get("http://localhost/find_link.html")
assert_equal(3, page.meta_refresh.length)
assert_equal(%w{
http://www.drphil.com/
http://www.upcase.com/
http://tenderlovemaking.com/ }.sort,
page.meta_refresh.map { |x| x.href.downcase }.sort)
end
def test_title
page = util_page
assert_equal('hi', page.title)
end
def test_title_none
page = util_page '' # invalid HTML
assert_equal(nil, page.title)
end
def test_page_decoded_with_charset
page = util_page @body, 'content-type' => 'text/html; charset=EUC-JP'
assert_equal 'EUC-JP', page.encoding
assert_equal 'EUC-JP', page.parser.encoding
end
def test_form
page = @mech.get("http://localhost/tc_form_action.html")
form = page.form(:name => 'post_form1')
assert form
yielded = false
form = page.form(:name => 'post_form1') { |f|
yielded = true
assert f
assert_equal(form, f)
}
assert yielded
form_by_action = page.form(:action => '/form_post?a=b&b=c')
assert form_by_action
assert_equal(form, form_by_action)
end
end