# -*- coding: utf-8 -*-
require 'helper'
require 'cgi'
# tests for Page encoding and charset and parsing
class TestMechanizePageEncoding < MiniTest::Unit::TestCase
MECH_ASCII_ENCODING = Mechanize::Util::NEW_RUBY_ENCODING ? 'US-ASCII' : 'ISO-8859-1'
def setup
@agent = Mechanize.new
@uri = URI('http://localhost/')
@response_headers = { 'content-type' => 'text/html' }
@body = '
hi'
end
def util_page body = @body, headers = @response_headers
body.force_encoding Encoding::BINARY if body.respond_to? :force_encoding
Mechanize::Page.new @uri, headers, body, 200, @agent
end
def test_page_charset
charset = Mechanize::Page.charset 'text/html;charset=vAlue'
assert_equal 'vAlue', charset
end
def test_page_charset_upcase
charset = Mechanize::Page.charset 'TEXT/HTML;CHARSET=UTF-8'
assert_equal 'UTF-8', charset
end
def test_page_semicolon
charset = Mechanize::Page.charset 'text/html;charset=UTF-8;'
assert_equal 'UTF-8', charset
end
def test_page_charset_no_chaset_token
charset = Mechanize::Page.charset 'text/html'
assert_nil charset
end
def test_page_charset_returns_nil_when_charset_says_none
charset = Mechanize::Page.charset 'text/html;charset=none'
assert_nil charset
end
def test_page_charset_multiple
charset = Mechanize::Page.charset 'text/html;charset=111;charset=222'
assert_equal '111', charset
end
def test_page_response_header_charset
headers = {'content-type' => 'text/html;charset=HEADER'}
charsets = Mechanize::Page.response_header_charset(headers)
assert_equal ['HEADER'], charsets
end
def test_page_response_header_charset_no_token
headers = {'content-type' => 'text/html'}
charsets = Mechanize::Page.response_header_charset(headers)
assert_equal [], charsets
headers = {'X-My-Header' => 'hello'}
charsets = Mechanize::Page.response_header_charset(headers)
assert_equal [], charsets
end
def test_response_header_charset
page = util_page nil, {'content-type' => 'text/html;charset=HEADER'}
assert_equal ['HEADER'], page.response_header_charset
end
def test_page_meta_charset
body = ''
charsets = Mechanize::Page.meta_charset(body)
assert_equal ['META'], charsets
end
def test_page_meta_charset_is_empty_when_no_charset_meta
body = ''
charsets = Mechanize::Page.meta_charset(body)
assert_equal [], charsets
end
def test_meta_charset
body = ''
page = util_page body
assert_equal ['META'], page.meta_charset
end
def test_detected_encoding
page = util_page
assert_equal MECH_ASCII_ENCODING, page.detected_encoding
end
def test_encodings
response = {'content-type' => 'text/html;charset=HEADER'}
body = ''
@agent.default_encoding = 'DEFAULT'
page = util_page body, response
assert_equal true, page.encodings.include?('HEADER')
assert_equal true, page.encodings.include?('META')
assert_equal true, page.encodings.include?(MECH_ASCII_ENCODING)
assert_equal true, page.encodings.include?('DEFAULT')
end
def test_parser_with_default_encoding
# pre test
assert_equal false, util_page.encodings.include?('Windows-1252')
@agent.default_encoding = 'Windows-1252'
page = util_page
assert_equal true, page.encodings.include?('Windows-1252')
end
def test_parser_force_default_encoding
@agent.default_encoding = 'Windows-1252'
@agent.force_default_encoding = true
page = util_page
assert page.encodings.include? 'Windows-1252'
end
def test_parser_encoding_equals_overwrites_force_default_encoding
@agent.default_encoding = 'Windows-1252'
@agent.force_default_encoding = true
page = util_page
assert_equal 'Windows-1252', page.encoding
page.encoding = 'ISO-8859-2'
assert_equal 'ISO-8859-2', page.encoding
end
end