#
# tests/testhtmlscan.rb
#
# Copyright (C) UENO Katsuhiro 2002
#
# $Id: testhtmlscan.rb,v 1.19 2003/02/28 12:31:07 katsu Exp $
#
require 'test/unit'
require 'deftestcase'
require 'xmlscan/htmlscan'
require 'visitor'
class TestHTMLScanner < Test::Unit::TestCase
include DefTestCase
Visitor = RecordingVisitor.new_class(XMLScan::Visitor)
private
def setup
@v = Visitor.new
@s = XMLScan::HTMLScanner.new(@v)
end
def parse(src)
@s.parse src
@v.result
end
public
deftestcase 'html_comment', <<-'TESTCASEEND'
''
[ :on_comment, ' hogefuga ' ]
''
[ :on_comment, ' hogefuga ' ]
''
[ :on_comment, ' hoge<<<>>><<>>fuga ' ]
''
[ :on_comment, ' hoge-fuga ' ]
''
[ :on_comment, ' hoge -- -- fuga ' ]
''
[ :parse_error, "only whitespace can appear between two comments" ]
[ :on_comment, ' hoge--fuga ' ]
''
[ :parse_error, "only whitespace can appear between two comments"]
[ :parse_error, "`-->' is found but comment must not end here"]
[ :on_comment, '- hogefuga -' ]
'' is found but comment must not end here"]
[ :on_comment, '- hogefuga -' ]
# should be parsed as ||
''
[ :on_comment, '----' ]
# should be parsed as |'
[ :parse_error, "`-->' is found but comment must not end here"]
[ :on_comment, '---' ]
# should be parsed as |'
[ :parse_error, "`-->' is found but comment must not end here"]
[ :on_comment, '--' ]
# should be parsed as |'
[ :parse_error, "only whitespace can appear between two comments"]
[ :parse_error, "`-->' is found but comment must not end here"]
[ :on_comment, '-' ]
''
[ :on_comment, '' ]
''
[ :parse_error, 'unterminated comment meets EOF' ]
[ :on_comment, '->' ]
''
[ :parse_error, 'unterminated comment meets EOF' ]
[ :on_comment, '>' ]
'fuga'
[ :on_comment, 'hoge' ]
[ :on_chardata, 'fuga' ]
'>'
[ :on_comment, 'hoge' ]
[ :on_chardata, '>' ]
'hoge'
[ :parse_error, "only whitespace can appear between two comments"]
[ :on_comment, 'hoge--fuga' ]
[ :on_chardata, 'hoge' ]
TESTCASEEND
deftestcase 'pi', <<-'TESTCASEEND'
''
[ :on_pi, '', 'hoge fuga?' ]
''
[ :on_pi, '', 'xml version="1.0"?' ]
''
[ :on_pi, '', 'hoge fuga' ]
''
[ :on_pi, '', 'hoge ' ]
''
[ :on_pi, '', 'hoge' ]
''
[ :on_pi, '', 'hoge <'
[ :on_pi, '', 'hoge<' ]
' >'
[ :on_pi, '', ' ' ]
'>'
[ :on_pi, '', '' ]
''
[ :parse_error, "unterminated PI meets EOF" ]
[ :on_pi, '', '' ]
'fuga'
[ :on_pi, '', 'hoge' ]
[ :on_chardata, 'fuga' ]
'>'
[ :on_pi, '', 'hoge' ]
[ :on_chardata, '>' ]
TESTCASEEND
deftestcase 'stag', <<-'TESTCASEEND'
''
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :parse_error, "unclosed start tag `hoge' meets another tag" ]
[ :on_stag_end, 'hoge' ]
[ :on_stag, 'fuga' ]
[ :on_stag_end, 'fuga' ]
''
[ :on_stag, 'hoge' ]
[ :parse_error, "unclosed start tag `hoge' meets another tag" ]
[ :on_stag_end, 'hoge' ]
[ :on_stag, 'fuga' ]
[ :on_stag_end, 'fuga' ]
'< hoge>'
[ :parse_error, "parse error at `<'" ]
[ :on_chardata, '< hoge>' ]
''
[ :on_stag, 'hoge' ]
[ :parse_error, "parse error at `/'" ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :parse_error, "parse error at `='" ]
[ :on_stag_end, 'hoge' ]
'<=hoge >'
[ :parse_error, "parse error at `<'" ]
[ :on_chardata, '<=hoge >' ]
'< =hoge >'
[ :parse_error, "parse error at `<'" ]
[ :on_chardata, '< =hoge >' ]
'fuga'
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
[ :on_chardata, 'fuga' ]
'>'
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
[ :on_chardata, '>' ]
'< hoge>fuga'
[ :parse_error, "parse error at `<'" ]
[ :on_chardata, '< hoge>' ]
[ :on_chardata, 'fuga' ]
'< hoge>>'
[ :parse_error, "parse error at `<'" ]
[ :on_chardata, '< hoge>' ]
[ :on_chardata, '>' ]
''
[ :on_stag, 'hoge' ]
[ :parse_error, "parse error at `/'" ]
[ :on_stag_end, 'hoge' ]
'fuga'
[ :on_stag, 'hoge' ]
[ :parse_error, "parse error at `/'" ]
[ :on_stag_end, 'hoge' ]
[ :on_chardata, 'fuga' ]
'>'
[ :on_stag, 'hoge' ]
[ :parse_error, "parse error at `/'" ]
[ :on_stag_end, 'hoge' ]
[ :on_chardata, '>' ]
TESTCASEEND
deftestcase 'attribute', <<-'TESTCASEEND'
''
[ :on_stag, 'hoge' ]
[ :on_attribute, 'foo' ]
[ :on_attr_value, 'bar' ]
[ :on_attribute_end, 'foo' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, 'foo' ]
[ :on_attribute_end, 'foo' ]
[ :on_stag_end, 'hoge' ]
""
[ :on_stag, 'hoge' ]
[ :on_attribute, 'foo' ]
[ :on_attr_value, ' bar ' ]
[ :on_attribute_end, 'foo' ]
[ :on_attribute, 'HOGE' ]
[ :on_attr_value, 'FUGA' ]
[ :on_attribute_end, 'HOGE' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, 'foo' ]
[ :on_attr_value, 'bar' ]
[ :on_attribute_end, 'foo' ]
[ :parse_error, "parse error at `/'" ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, 'foo' ]
[ :on_attr_value, 'bar/' ]
[ :on_attribute_end, 'foo' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, 'foo' ]
[ :on_attr_value, 'b' ]
[ :on_attr_value, '>' ]
[ :on_attr_value, 'a' ]
[ :on_attr_value, '>' ]
[ :on_attr_value, 'b' ]
[ :on_attr_value, '>' ]
[ :on_attr_value, 'c' ]
[ :on_attr_value, '>' ]
[ :on_attr_value, 'ar' ]
[ :on_attribute_end, 'foo' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, 'foo' ]
[ :on_attr_value, 'b' ]
[ :on_attr_value, '>' ]
[ :on_attr_value, '>' ]
[ :on_attr_value, 'a' ]
[ :on_attr_value, '>' ]
[ :on_attr_value, '>' ]
[ :on_attr_value, 'b' ]
[ :on_attr_value, '>' ]
[ :on_attr_value, '>' ]
[ :on_attr_value, 'c' ]
[ :on_attr_value, '>' ]
[ :on_attr_value, '>' ]
[ :on_attr_value, 'ar' ]
[ :on_attribute_end, 'foo' ]
[ :on_attribute, 'HOGE' ]
[ :on_attr_value, 'FUGA' ]
[ :on_attribute_end, 'HOGE' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, 'foo' ]
[ :on_attr_value, 'b' ]
[ :on_attr_value, '' ]
[ :on_attr_value, 'b' ]
[ :on_attr_value, '' ]
[ :on_attr_value, 'ar' ]
[ :on_attribute_end, 'foo' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, 'foo' ]
[ :on_attr_value, 'bar' ]
[ :on_attribute_end, 'foo' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, 'foo' ]
[ :on_attr_value, 'bar' ]
[ :on_attribute_end, 'foo' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, 'foo' ]
[ :on_attr_value, 'bar&fuga;bar' ]
[ :on_attribute_end, 'foo' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, 'foo' ]
[ :on_attr_value, 'bar/baz%fuga' ]
[ :on_attribute_end, 'foo' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, nil ]
[ :on_attr_value, 'foo' ]
[ :on_attribute_end, nil ]
[ :parse_error, "parse error at `/'" ]
[ :on_attribute, 'bar' ]
[ :on_attr_value, 'bar/baz%fuga' ]
[ :on_attribute_end, 'bar' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, 'foo' ]
[ :on_attr_value, 'bar' ]
[ :on_attribute_end, 'foo' ]
[ :on_attribute, 'hoge' ]
[ :on_attr_value, 'fuga' ]
[ :on_attribute_end, 'hoge' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, nil ]
[ :on_attr_value, 'foo' ]
[ :on_attribute_end, nil ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, nil ]
[ :on_attr_value, 'foo' ]
[ :on_attribute_end, nil ]
[ :on_attribute, nil ]
[ :on_attr_value, 'bar' ]
[ :on_attribute_end, nil ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, nil ]
[ :on_attr_value, 'foo&hoge;bar' ]
[ :on_attribute_end, nil ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, 'foo' ]
[ :on_attr_value, 'bar' ]
[ :on_attribute_end, 'foo' ]
[ :on_attribute, nil ]
[ :on_attr_value, 'hoge' ]
[ :on_attribute_end, nil ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, 'foo' ]
[ :on_attr_value, 'bar' ]
[ :on_attribute_end, 'foo' ]
[ :parse_error, "unclosed start tag `hoge' meets another tag" ]
[ :on_stag_end, 'hoge' ]
[ :on_stag, 'fuga' ]
[ :on_stag_end, 'fuga' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, 'foo' ]
[ :on_attr_value, 'bar' ]
[ :on_attribute_end, 'foo' ]
[ :on_attribute, 'HOGE' ]
[ :on_attr_value, 'FUGA' ]
[ :on_attribute_end, 'HOGE' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, 'foo' ]
[ :on_attr_value, 'bar' ]
[ :on_attribute_end, 'foo' ]
[ :parse_error, "parse error at `='" ]
[ :on_attribute, nil ]
[ :on_attr_value, 'fuga' ]
[ :on_attribute_end, nil ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, 'foo' ]
[ :on_attr_value, 'bar' ]
[ :on_attribute_end, 'foo' ]
[ :parse_error, "unclosed start tag `hoge' meets another tag" ]
[ :on_stag_end, 'hoge' ]
[ :on_stag, 'fuga' ]
[ :on_stag_end, 'fuga' ]
''
[ :on_stag, 'hoge' ]
[ :on_attribute, nil ]
[ :on_attr_value, 'foo' ]
[ :on_attribute_end, nil ]
[ :parse_error, "parse error at `='" ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :parse_error, "parse error at `='" ]
[ :parse_error, "parse error at `\"'" ]
[ :on_attribute, nil ]
[ :on_attr_value, 'fuga' ]
[ :on_attribute_end, nil ]
[ :parse_error, "parse error at `\"'" ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :parse_error, "parse error at `\"'" ]
[ :on_attribute, nil ]
[ :on_attr_value, 'fuga' ]
[ :on_attribute_end, nil ]
[ :parse_error, "parse error at `\"'" ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :parse_error, "parse error at `\"'" ]
[ :on_attribute, nil ]
[ :on_attr_value, 'fuga' ]
[ :on_attribute_end, nil ]
[ :parse_error, "parse error at `\"'" ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :parse_error, "parse error at `='" ]
[ :on_attribute, nil ]
[ :on_attr_value, 'fuga' ]
[ :on_attribute_end, nil ]
[ :on_stag_end, 'hoge' ]
TESTCASEEND
deftestcase 'bang_tag', <<-'TESTCASEEND'
''
[ :parse_error, "parse error at `'
[ :parse_error, "parse error at `'
[ :on_comment, '' ]
'fuga'
[ :parse_error, "parse error at `>'
[ :parse_error, "parse error at `' ]
TESTCASEEND
deftestcase 'internal_dtd', <<-'TESTCASEEND'
' ]>'
[ :on_doctype, 'hoge', nil, nil ]
[ :parse_error, "DTD subset is found but it is not permitted in HTML" ]
TESTCASEEND
deftestcase 'doctype', <<-'TESTCASEEND'
''
[ :on_doctype, 'hoge', 'fuga', nil ]
''
[ :on_doctype, 'hoge', nil, 'fuga' ]
''
[ :on_doctype, 'hoge', 'fuga', nil ]
''
[ :on_doctype, 'hoge', nil, 'fuga' ]
TESTCASEEND
deftestcase 'prolog', <<-'TESTCASEEND'
''
[ :on_doctype, 'hoge', nil, nil ]
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_doctype, 'hoge', nil, nil ]
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_doctype, 'hoge', nil, nil ]
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
[ :parse_error, "parse error at ` '
[ :on_prolog_space, ' ' ]
[ :on_doctype, 'hoge', nil, nil ]
[ :on_prolog_space, ' ' ]
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_comment, 'hoge' ]
[ :on_doctype, 'hoge', nil, nil ]
[ :on_pi, '', 'fuga?' ]
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
''
[ :on_comment, 'hoge' ]
[ :on_doctype, 'hoge', nil, nil ]
[ :on_comment, 'fuga' ]
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
' '
[ :on_comment, 'hoge' ]
[ :on_prolog_space, ' ' ]
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
' '
[ :on_pi, '', 'hoge?' ]
[ :on_prolog_space, ' ' ]
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
'hoge'
[ :on_chardata, 'hoge' ]
[ :parse_error, "parse error at `hoge'
[ :on_chardata, '>hoge' ]
[ :parse_error, "parse error at ` fuga '
[ :on_pi, '', 'hoge?' ]
[ :on_chardata, ' fuga ' ]
[ :parse_error, "parse error at `'
[ :on_doctype, 'hoge', nil, nil ]
[ :parse_error, "another document type declaration is found" ]
[ :on_doctype, 'fuga', nil, nil ]
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
TESTCASEEND
end
class TestHTMLScannerCDATA < Test::Unit::TestCase
include DefTestCase
class CDATAContentTestVisitor < TestHTMLScanner::Visitor
def make_scanner
@scanner = XMLScan::HTMLScanner.new(self)
end
def on_stag_end(name)
super
s = @scanner.get_cdata_content
@result.push [ :cdata_content, s ]
end
end
private
def setup
@v = CDATAContentTestVisitor.new
@s = @v.make_scanner
end
def parse(src)
@s.parse src
@v.result
end
public
deftestcase 'cdata_content', <<-'TESTCASEEND'
'fuga'
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
[ :cdata_content, 'fuga' ]
[ :on_etag, 'hoge' ]
'fugafuga'
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
[ :cdata_content, 'fugafuga' ]
[ :on_etag, 'hoge' ]
'><><><<><>><>'
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
[ :cdata_content, '><><><<><>><>' ]
[ :on_etag, 'hoge' ]
'fuga'
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
[ :cdata_content, 'fuga' ]
[ :parse_error, "parse error at `'" ]
[ :on_chardata, '' ]
'fuga<'
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
[ :cdata_content, 'fuga<' ]
'fuga>'
[ :on_stag, 'hoge' ]
[ :on_stag_end, 'hoge' ]
[ :cdata_content, 'fuga>' ]
TESTCASEEND
end
load "#{File.dirname($0)}/runtest.rb" if __FILE__ == $0