# # tests/testhtmlscan.rb # # Copyright (C) UENO Katsuhiro 2002 # # $Id: testhtmlscan.rb,v 1.19 2003/02/28 12:31:07 katsu Exp $ # require 'test/unit' require 'deftestcase' require 'xmlscan/htmlscan' require 'visitor' class TestHTMLScanner < Test::Unit::TestCase include DefTestCase Visitor = RecordingVisitor.new_class(XMLScan::Visitor) private def setup @v = Visitor.new @s = XMLScan::HTMLScanner.new(@v) end def parse(src) @s.parse src @v.result end public deftestcase 'html_comment', <<-'TESTCASEEND' '' [ :on_comment, ' hogefuga ' ] '' [ :on_comment, ' hogefuga ' ] '' [ :on_comment, ' hoge<<<>>><<>>fuga ' ] '' [ :on_comment, ' hoge-fuga ' ] '' [ :on_comment, ' hoge -- -- fuga ' ] '' [ :parse_error, "only whitespace can appear between two comments" ] [ :on_comment, ' hoge--fuga ' ] '' [ :parse_error, "only whitespace can appear between two comments"] [ :parse_error, "`-->' is found but comment must not end here"] [ :on_comment, '- hogefuga -' ] '' is found but comment must not end here"] [ :on_comment, '- hogefuga -' ] # should be parsed as || '' [ :on_comment, '----' ] # should be parsed as |' [ :parse_error, "`-->' is found but comment must not end here"] [ :on_comment, '---' ] # should be parsed as |' [ :parse_error, "`-->' is found but comment must not end here"] [ :on_comment, '--' ] # should be parsed as |' [ :parse_error, "only whitespace can appear between two comments"] [ :parse_error, "`-->' is found but comment must not end here"] [ :on_comment, '-' ] '' [ :on_comment, '' ] '' [ :parse_error, 'unterminated comment meets EOF' ] [ :on_comment, '->' ] '' [ :parse_error, 'unterminated comment meets EOF' ] [ :on_comment, '>' ] 'fuga' [ :on_comment, 'hoge' ] [ :on_chardata, 'fuga' ] '>' [ :on_comment, 'hoge' ] [ :on_chardata, '>' ] 'hoge' [ :parse_error, "only whitespace can appear between two comments"] [ :on_comment, 'hoge--fuga' ] [ :on_chardata, 'hoge' ] TESTCASEEND deftestcase 'pi', <<-'TESTCASEEND' '' [ :on_pi, '', 'hoge fuga?' ] '' [ :on_pi, '', 'xml version="1.0"?' ] '' [ :on_pi, '', 'hoge fuga' ] '' [ :on_pi, '', 'hoge ' ] '' [ :on_pi, '', 'hoge' ] '' [ :on_pi, '', 'hoge <' [ :on_pi, '', 'hoge<' ] '' [ :on_pi, '', ' ' ] '' [ :on_pi, '', '' ] 'fuga' [ :on_pi, '', 'hoge' ] [ :on_chardata, 'fuga' ] '>' [ :on_pi, '', 'hoge' ] [ :on_chardata, '>' ] TESTCASEEND deftestcase 'stag', <<-'TESTCASEEND' '' [ :on_stag, 'hoge' ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :parse_error, "unclosed start tag `hoge' meets another tag" ] [ :on_stag_end, 'hoge' ] [ :on_stag, 'fuga' ] [ :on_stag_end, 'fuga' ] '' [ :on_stag, 'hoge' ] [ :parse_error, "unclosed start tag `hoge' meets another tag" ] [ :on_stag_end, 'hoge' ] [ :on_stag, 'fuga' ] [ :on_stag_end, 'fuga' ] '< hoge>' [ :parse_error, "parse error at `<'" ] [ :on_chardata, '< hoge>' ] '' [ :on_stag, 'hoge' ] [ :parse_error, "parse error at `/'" ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :parse_error, "parse error at `='" ] [ :on_stag_end, 'hoge' ] '<=hoge >' [ :parse_error, "parse error at `<'" ] [ :on_chardata, '<=hoge >' ] '< =hoge >' [ :parse_error, "parse error at `<'" ] [ :on_chardata, '< =hoge >' ] 'fuga' [ :on_stag, 'hoge' ] [ :on_stag_end, 'hoge' ] [ :on_chardata, 'fuga' ] '>' [ :on_stag, 'hoge' ] [ :on_stag_end, 'hoge' ] [ :on_chardata, '>' ] '< hoge>fuga' [ :parse_error, "parse error at `<'" ] [ :on_chardata, '< hoge>' ] [ :on_chardata, 'fuga' ] '< hoge>>' [ :parse_error, "parse error at `<'" ] [ :on_chardata, '< hoge>' ] [ :on_chardata, '>' ] '' [ :on_stag, 'hoge' ] [ :parse_error, "parse error at `/'" ] [ :on_stag_end, 'hoge' ] 'fuga' [ :on_stag, 'hoge' ] [ :parse_error, "parse error at `/'" ] [ :on_stag_end, 'hoge' ] [ :on_chardata, 'fuga' ] '>' [ :on_stag, 'hoge' ] [ :parse_error, "parse error at `/'" ] [ :on_stag_end, 'hoge' ] [ :on_chardata, '>' ] TESTCASEEND deftestcase 'attribute', <<-'TESTCASEEND' '' [ :on_stag, 'hoge' ] [ :on_attribute, 'foo' ] [ :on_attr_value, 'bar' ] [ :on_attribute_end, 'foo' ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, 'foo' ] [ :on_attribute_end, 'foo' ] [ :on_stag_end, 'hoge' ] "" [ :on_stag, 'hoge' ] [ :on_attribute, 'foo' ] [ :on_attr_value, ' bar ' ] [ :on_attribute_end, 'foo' ] [ :on_attribute, 'HOGE' ] [ :on_attr_value, 'FUGA' ] [ :on_attribute_end, 'HOGE' ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, 'foo' ] [ :on_attr_value, 'bar' ] [ :on_attribute_end, 'foo' ] [ :parse_error, "parse error at `/'" ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, 'foo' ] [ :on_attr_value, 'bar/' ] [ :on_attribute_end, 'foo' ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, 'foo' ] [ :on_attr_value, 'b' ] [ :on_attr_value, '>' ] [ :on_attr_value, 'a' ] [ :on_attr_value, '>' ] [ :on_attr_value, 'b' ] [ :on_attr_value, '>' ] [ :on_attr_value, 'c' ] [ :on_attr_value, '>' ] [ :on_attr_value, 'ar' ] [ :on_attribute_end, 'foo' ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, 'foo' ] [ :on_attr_value, 'b' ] [ :on_attr_value, '>' ] [ :on_attr_value, '>' ] [ :on_attr_value, 'a' ] [ :on_attr_value, '>' ] [ :on_attr_value, '>' ] [ :on_attr_value, 'b' ] [ :on_attr_value, '>' ] [ :on_attr_value, '>' ] [ :on_attr_value, 'c' ] [ :on_attr_value, '>' ] [ :on_attr_value, '>' ] [ :on_attr_value, 'ar' ] [ :on_attribute_end, 'foo' ] [ :on_attribute, 'HOGE' ] [ :on_attr_value, 'FUGA' ] [ :on_attribute_end, 'HOGE' ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, 'foo' ] [ :on_attr_value, 'b' ] [ :on_attr_value, '' ] [ :on_attr_value, 'b' ] [ :on_attr_value, '' ] [ :on_attr_value, 'ar' ] [ :on_attribute_end, 'foo' ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, 'foo' ] [ :on_attr_value, 'bar' ] [ :on_attribute_end, 'foo' ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, 'foo' ] [ :on_attr_value, 'bar' ] [ :on_attribute_end, 'foo' ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, 'foo' ] [ :on_attr_value, 'bar&fuga;bar' ] [ :on_attribute_end, 'foo' ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, 'foo' ] [ :on_attr_value, 'bar/baz%fuga' ] [ :on_attribute_end, 'foo' ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, nil ] [ :on_attr_value, 'foo' ] [ :on_attribute_end, nil ] [ :parse_error, "parse error at `/'" ] [ :on_attribute, 'bar' ] [ :on_attr_value, 'bar/baz%fuga' ] [ :on_attribute_end, 'bar' ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, 'foo' ] [ :on_attr_value, 'bar' ] [ :on_attribute_end, 'foo' ] [ :on_attribute, 'hoge' ] [ :on_attr_value, 'fuga' ] [ :on_attribute_end, 'hoge' ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, nil ] [ :on_attr_value, 'foo' ] [ :on_attribute_end, nil ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, nil ] [ :on_attr_value, 'foo' ] [ :on_attribute_end, nil ] [ :on_attribute, nil ] [ :on_attr_value, 'bar' ] [ :on_attribute_end, nil ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, nil ] [ :on_attr_value, 'foo&hoge;bar' ] [ :on_attribute_end, nil ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, 'foo' ] [ :on_attr_value, 'bar' ] [ :on_attribute_end, 'foo' ] [ :on_attribute, nil ] [ :on_attr_value, 'hoge' ] [ :on_attribute_end, nil ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, 'foo' ] [ :on_attr_value, 'bar' ] [ :on_attribute_end, 'foo' ] [ :parse_error, "unclosed start tag `hoge' meets another tag" ] [ :on_stag_end, 'hoge' ] [ :on_stag, 'fuga' ] [ :on_stag_end, 'fuga' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, 'foo' ] [ :on_attr_value, 'bar' ] [ :on_attribute_end, 'foo' ] [ :on_attribute, 'HOGE' ] [ :on_attr_value, 'FUGA' ] [ :on_attribute_end, 'HOGE' ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, 'foo' ] [ :on_attr_value, 'bar' ] [ :on_attribute_end, 'foo' ] [ :parse_error, "parse error at `='" ] [ :on_attribute, nil ] [ :on_attr_value, 'fuga' ] [ :on_attribute_end, nil ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, 'foo' ] [ :on_attr_value, 'bar' ] [ :on_attribute_end, 'foo' ] [ :parse_error, "unclosed start tag `hoge' meets another tag" ] [ :on_stag_end, 'hoge' ] [ :on_stag, 'fuga' ] [ :on_stag_end, 'fuga' ] '' [ :on_stag, 'hoge' ] [ :on_attribute, nil ] [ :on_attr_value, 'foo' ] [ :on_attribute_end, nil ] [ :parse_error, "parse error at `='" ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :parse_error, "parse error at `='" ] [ :parse_error, "parse error at `\"'" ] [ :on_attribute, nil ] [ :on_attr_value, 'fuga' ] [ :on_attribute_end, nil ] [ :parse_error, "parse error at `\"'" ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :parse_error, "parse error at `\"'" ] [ :on_attribute, nil ] [ :on_attr_value, 'fuga' ] [ :on_attribute_end, nil ] [ :parse_error, "parse error at `\"'" ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :parse_error, "parse error at `\"'" ] [ :on_attribute, nil ] [ :on_attr_value, 'fuga' ] [ :on_attribute_end, nil ] [ :parse_error, "parse error at `\"'" ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :parse_error, "parse error at `='" ] [ :on_attribute, nil ] [ :on_attr_value, 'fuga' ] [ :on_attribute_end, nil ] [ :on_stag_end, 'hoge' ] TESTCASEEND deftestcase 'bang_tag', <<-'TESTCASEEND' '' [ :parse_error, "parse error at `' [ :parse_error, "parse error at `' [ :on_comment, '' ] 'fuga' [ :parse_error, "parse error at `>' [ :parse_error, "parse error at `' ] TESTCASEEND deftestcase 'internal_dtd', <<-'TESTCASEEND' ' ]>' [ :on_doctype, 'hoge', nil, nil ] [ :parse_error, "DTD subset is found but it is not permitted in HTML" ] TESTCASEEND deftestcase 'doctype', <<-'TESTCASEEND' '' [ :on_doctype, 'hoge', 'fuga', nil ] '' [ :on_doctype, 'hoge', nil, 'fuga' ] '' [ :on_doctype, 'hoge', 'fuga', nil ] '' [ :on_doctype, 'hoge', nil, 'fuga' ] TESTCASEEND deftestcase 'prolog', <<-'TESTCASEEND' '' [ :on_doctype, 'hoge', nil, nil ] [ :on_stag, 'hoge' ] [ :on_stag_end, 'hoge' ] '' [ :on_doctype, 'hoge', nil, nil ] [ :on_stag, 'hoge' ] [ :on_stag_end, 'hoge' ] '' [ :on_doctype, 'hoge', nil, nil ] [ :on_stag, 'hoge' ] [ :on_stag_end, 'hoge' ] '' [ :on_stag, 'hoge' ] [ :on_stag_end, 'hoge' ] [ :parse_error, "parse error at ` ' [ :on_prolog_space, ' ' ] [ :on_doctype, 'hoge', nil, nil ] [ :on_prolog_space, ' ' ] [ :on_stag, 'hoge' ] [ :on_stag_end, 'hoge' ] '' [ :on_comment, 'hoge' ] [ :on_doctype, 'hoge', nil, nil ] [ :on_pi, '', 'fuga?' ] [ :on_stag, 'hoge' ] [ :on_stag_end, 'hoge' ] '' [ :on_comment, 'hoge' ] [ :on_doctype, 'hoge', nil, nil ] [ :on_comment, 'fuga' ] [ :on_stag, 'hoge' ] [ :on_stag_end, 'hoge' ] ' ' [ :on_comment, 'hoge' ] [ :on_prolog_space, ' ' ] [ :on_stag, 'hoge' ] [ :on_stag_end, 'hoge' ] ' ' [ :on_pi, '', 'hoge?' ] [ :on_prolog_space, ' ' ] [ :on_stag, 'hoge' ] [ :on_stag_end, 'hoge' ] 'hoge' [ :on_chardata, 'hoge' ] [ :parse_error, "parse error at `hoge' [ :on_chardata, '>hoge' ] [ :parse_error, "parse error at ` fuga ' [ :on_pi, '', 'hoge?' ] [ :on_chardata, ' fuga ' ] [ :parse_error, "parse error at `' [ :on_doctype, 'hoge', nil, nil ] [ :parse_error, "another document type declaration is found" ] [ :on_doctype, 'fuga', nil, nil ] [ :on_stag, 'hoge' ] [ :on_stag_end, 'hoge' ] TESTCASEEND end class TestHTMLScannerCDATA < Test::Unit::TestCase include DefTestCase class CDATAContentTestVisitor < TestHTMLScanner::Visitor def make_scanner @scanner = XMLScan::HTMLScanner.new(self) end def on_stag_end(name) super s = @scanner.get_cdata_content @result.push [ :cdata_content, s ] end end private def setup @v = CDATAContentTestVisitor.new @s = @v.make_scanner end def parse(src) @s.parse src @v.result end public deftestcase 'cdata_content', <<-'TESTCASEEND' 'fuga' [ :on_stag, 'hoge' ] [ :on_stag_end, 'hoge' ] [ :cdata_content, 'fuga' ] [ :on_etag, 'hoge' ] 'fugafuga' [ :on_stag, 'hoge' ] [ :on_stag_end, 'hoge' ] [ :cdata_content, 'fugafuga' ] [ :on_etag, 'hoge' ] '><><><<><>><>' [ :on_stag, 'hoge' ] [ :on_stag_end, 'hoge' ] [ :cdata_content, '><><><<><>><>' ] [ :on_etag, 'hoge' ] 'fugafuga<' [ :on_stag, 'hoge' ] [ :on_stag_end, 'hoge' ] [ :cdata_content, 'fuga<' ] 'fuga>' [ :on_stag, 'hoge' ] [ :on_stag_end, 'hoge' ] [ :cdata_content, 'fuga>' ] TESTCASEEND end load "#{File.dirname($0)}/runtest.rb" if __FILE__ == $0