# -*- coding: utf-8 -*-
require "helper"
module Nokogiri
module XML
module SAX
class TestParser < Nokogiri::SAX::TestCase
def setup
super
@parser = XML::SAX::Parser.new(Doc.new)
end
def test_parser_context_yielded_io
doc = Doc.new
parser = XML::SAX::Parser.new doc
xml = ""
block_called = false
parser.parse(StringIO.new(xml)) { |ctx|
block_called = true
ctx.replace_entities = true
}
assert block_called
assert_equal [['foo', [['a', '&b']]]], doc.start_elements
end
def test_parser_context_yielded_in_memory
doc = Doc.new
parser = XML::SAX::Parser.new doc
xml = ""
block_called = false
parser.parse(xml) { |ctx|
block_called = true
ctx.replace_entities = true
}
assert block_called
assert_equal [['foo', [['a', '&b']]]], doc.start_elements
end
def test_xml_decl
{
'' => nil,
'' => ['1.0'],
'' => ['1.0', 'UTF-8'],
'' => ['1.0', 'yes'],
'' => ['1.0', 'no'],
}.each do |decl,value|
parser = XML::SAX::Parser.new(Doc.new)
xml = "#{decl}\n"
parser.parse xml
assert parser.document.start_document_called, xml
assert_equal value, parser.document.xmldecls, xml
end
end
def test_parse_empty
assert_raises RuntimeError do
@parser.parse('')
end
end
def test_namespace_declaration_order_is_saved
@parser.parse <<-eoxml
eoxml
assert_equal 2, @parser.document.start_elements_namespace.length
el = @parser.document.start_elements_namespace.first
namespaces = el.last
assert_equal ['foo', 'http://foo.example.com/'], namespaces.first
assert_equal [nil, 'http://example.com/'], namespaces.last
end
def test_bad_document_calls_error_handler
@parser.parse('')
assert @parser.document.errors
assert @parser.document.errors.length > 0
end
def test_namespace_are_super_fun_to_parse
@parser.parse <<-eoxml
hello world
eoxml
assert @parser.document.start_elements_namespace.length > 0
el = @parser.document.start_elements_namespace[1]
assert_equal 'a', el.first
assert_equal 1, el[1].length
attribute = el[1].first
assert_equal 'bar', attribute.localname
assert_equal 'foo', attribute.prefix
assert_equal 'hello', attribute.value
assert_equal 'http://foo.example.com/', attribute.uri
end
def test_sax_v1_namespace_attribute_declarations
@parser.parse <<-eoxml
hello world
eoxml
assert @parser.document.start_elements.length > 0
elm = @parser.document.start_elements.first
assert_equal 'root', elm.first
assert elm[1].include?(['xmlns:foo', 'http://foo.example.com/'])
assert elm[1].include?(['xmlns', 'http://example.com/'])
end
def test_sax_v1_namespace_nodes
@parser.parse <<-eoxml
hello world
eoxml
assert_equal 5, @parser.document.start_elements.length
assert @parser.document.start_elements.map { |se|
se.first
}.include?('foo:bar')
assert @parser.document.end_elements.map { |se|
se.first
}.include?('foo:bar')
end
def test_start_is_called_without_namespace
@parser.parse(<<-eoxml)
eoxml
assert_equal ['root', 'foo:f', 'bar'],
@parser.document.start_elements.map { |x| x.first }
end
def test_parser_sets_encoding
parser = XML::SAX::Parser.new(Doc.new, 'UTF-8')
assert_equal 'UTF-8', parser.encoding
end
def test_errors_set_after_parsing_bad_dom
doc = Nokogiri::XML('')
assert doc.errors
@parser.parse('')
assert @parser.document.errors
assert @parser.document.errors.length > 0
if RUBY_VERSION =~ /^1\.9/
doc.errors.each do |error|
assert_equal 'UTF-8', error.message.encoding.name
end
end
# when using JRuby Nokogiri, more errors will be generated as the DOM
# parser continue to parse an ill formed document, while the sax parser
# will stop at the first error
unless Nokogiri.jruby?
assert_equal doc.errors.length, @parser.document.errors.length
end
end
def test_parse_with_memory_argument
@parser.parse(File.read(XML_FILE))
assert(@parser.document.cdata_blocks.length > 0)
end
def test_parse_with_io_argument
File.open(XML_FILE, 'rb') { |f|
@parser.parse(f)
}
assert(@parser.document.cdata_blocks.length > 0)
end
def test_parse_io
call_parse_io_with_encoding 'UTF-8'
end
# issue #828
def test_parse_io_lower_case_encoding
call_parse_io_with_encoding 'utf-8'
end
def call_parse_io_with_encoding encoding
File.open(XML_FILE, 'rb') { |f|
@parser.parse_io(f, encoding)
}
assert(@parser.document.cdata_blocks.length > 0)
if RUBY_VERSION =~ /^1\.9/
called = false
@parser.document.start_elements.flatten.each do |thing|
assert_equal 'UTF-8', thing.encoding.name
called = true
end
assert called
called = false
@parser.document.end_elements.flatten.each do |thing|
assert_equal 'UTF-8', thing.encoding.name
called = true
end
assert called
called = false
@parser.document.data.each do |thing|
assert_equal 'UTF-8', thing.encoding.name
called = true
end
assert called
called = false
@parser.document.comments.flatten.each do |thing|
assert_equal 'UTF-8', thing.encoding.name
called = true
end
assert called
called = false
@parser.document.cdata_blocks.flatten.each do |thing|
assert_equal 'UTF-8', thing.encoding.name
called = true
end
assert called
end
end
def test_parse_file
@parser.parse_file(XML_FILE)
assert_raises(ArgumentError) {
@parser.parse_file(nil)
}
assert_raises(Errno::ENOENT) {
@parser.parse_file('')
}
assert_raises(Errno::EISDIR) {
@parser.parse_file(File.expand_path(File.dirname(__FILE__)))
}
end
def test_render_parse_nil_param
assert_raises(ArgumentError) { @parser.parse_memory(nil) }
end
def test_bad_encoding_args
assert_raises(ArgumentError) { XML::SAX::Parser.new(Doc.new, 'not an encoding') }
assert_raises(ArgumentError) { @parser.parse_io(StringIO.new(''), 'not an encoding')}
end
def test_ctag
@parser.parse_memory(<<-eoxml)
Paragraph 1
eoxml
assert_equal [' This is a comment '], @parser.document.cdata_blocks
end
def test_comment
@parser.parse_memory(<<-eoxml)
Paragraph 1
eoxml
assert_equal [' This is a comment '], @parser.document.comments
end
def test_characters
@parser.parse_memory(<<-eoxml)
Paragraph 1
eoxml
assert_equal ['Paragraph 1'], @parser.document.data
end
def test_end_document
@parser.parse_memory(<<-eoxml)
Paragraph 1
eoxml
assert @parser.document.end_document_called
end
def test_end_element
@parser.parse_memory(<<-eoxml)
Paragraph 1
eoxml
assert_equal [["p"]],
@parser.document.end_elements
end
def test_start_element_attrs
@parser.parse_memory(<<-eoxml)
Paragraph 1
eoxml
assert_equal [["p", [["id", "asdfasdf"]]]],
@parser.document.start_elements
end
def test_start_element_attrs_include_namespaces
@parser.parse_memory(<<-eoxml)
Paragraph 1
eoxml
assert_equal [["p", [['xmlns:foo', 'http://foo.example.com/']]]],
@parser.document.start_elements
end
def test_processing_instruction
@parser.parse_memory(<<-eoxml)
eoxml
assert_equal [['xml-stylesheet', 'href="a.xsl" type="text/xsl"']],
@parser.document.processing_instructions
end
if Nokogiri.uses_libxml? # JRuby SAXParser only parses well-formed XML documents
def test_parse_document
@parser.parse_memory(<<-eoxml)
Paragraph 1
Paragraph 2
eoxml
end
end
def test_parser_attributes
xml = <<-eoxml
eoxml
block_called = false
@parser.parse(xml) { |ctx|
block_called = true
ctx.replace_entities = true
}
assert block_called
assert_equal [['root', []], ['foo', [['a', '&b'], ['c', '>d']]]], @parser.document.start_elements
end
end
end
end
end