# -*- coding: utf-8 -*-
require "helper"
module Nokogiri
module HTML
if RUBY_VERSION =~ /^1\.9/
class TestDocumentEncoding < Nokogiri::TestCase
def test_encoding
doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
hello = "こんにちは"
assert_match doc.encoding, doc.to_html
assert_match hello.encode('Shift_JIS'), doc.to_html
assert_equal 'Shift_JIS', doc.to_html.encoding.name
assert_match hello, doc.to_html(:encoding => 'UTF-8')
assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8')
assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
end
def test_default_to_encoding_from_string
bad_charset = <<-eohtml
blah!
eohtml
doc = Nokogiri::HTML(bad_charset)
assert_equal bad_charset.encoding.name, doc.encoding
doc = Nokogiri.parse(bad_charset)
assert_equal bad_charset.encoding.name, doc.encoding
end
def test_encoding_non_utf8
orig = '日本語が上手です'
bin = Encoding::ASCII_8BIT
[Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc|
html = <<-eohtml.encode(enc)
#{orig}
eohtml
text = Nokogiri::HTML.parse(html).at('title').inner_text
assert_equal(
orig.encode(enc).force_encoding(bin),
text.encode(enc).force_encoding(bin)
)
end
end
def test_encoding_with_a_bad_name
bad_charset = <<-eohtml
blah!
eohtml
doc = Nokogiri::HTML(bad_charset, nil, 'askldjfhalsdfjhlkasdfjh')
assert_equal ['http://tenderlovemaking.com/'],
doc.css('a').map { |a| a['href'] }
end
end
end
class TestDocumentEncodingDetection < Nokogiri::TestCase
if IO.respond_to?(:binread)
def binread(file)
IO.binread(file)
end
else
def binread(file)
IO.read(file)
end
end
def binopen(file)
File.open(file, 'rb')
end
def test_document_html_noencoding
from_stream = Nokogiri::HTML(binopen(NOENCODING_FILE))
from_string = Nokogiri::HTML(binread(NOENCODING_FILE))
assert_equal from_string.to_s.size, from_stream.to_s.size
end
def test_document_xhtml_enc
[ENCODING_XHTML_FILE, ENCODING_HTML_FILE].each { |file|
doc_from_string_enc = Nokogiri::HTML(binread(file), nil, 'Shift_JIS')
ary_from_string_enc = doc_from_string_enc.xpath('//p/text()').map { |text| text.text }
doc_from_string = Nokogiri::HTML(binread(file))
ary_from_string = doc_from_string.xpath('//p/text()').map { |text| text.text }
doc_from_file_enc = Nokogiri::HTML(binopen(file), nil, 'Shift_JIS')
ary_from_file_enc = doc_from_file_enc.xpath('//p/text()').map { |text| text.text }
doc_from_file = Nokogiri::HTML(binopen(file))
ary_from_file = doc_from_file.xpath('//p/text()').map { |text| text.text }
title = 'たこ焼き仮面'
assert_equal(title, doc_from_string_enc.at('//title/text()').text)
assert_equal(title, doc_from_string.at('//title/text()').text)
assert_equal(title, doc_from_file_enc.at('//title/text()').text)
assert_equal(title, doc_from_file.at('//title/text()').text)
evil = (0..72).map { |i| '超' * i + '悪い事を構想中。' }
assert_equal(evil, ary_from_string_enc)
assert_equal(evil, ary_from_string)
assert_equal(evil, ary_from_file_enc)
assert_equal(evil, ary_from_file)
}
end
end
end
end