# :stopdoc: require 'htree/scan' require 'htree/htmlinfo' require 'htree/text' require 'htree/tag' require 'htree/leaf' require 'htree/doc' require 'htree/elem' require 'htree/raw_string' require 'htree/context' require 'htree/encoder' require 'htree/fstr' module HTree # :nodoc: # HTree.parse parses input and return a document tree. # represented by HTree::Doc. # # input should be a String or # an object which respond to read or open method. # For example, IO, StringIO, Pathname, URI::HTTP and URI::FTP are acceptable. # Note that the URIs need open-uri. # # HTree.parse guesses input is HTML or not and XML or not. # # If it is guessed as HTML, the default namespace in the result is set to http://www.w3.org/1999/xhtml # regardless of input has XML namespace declaration or not nor even it is pre-XML HTML. # # If it is guessed as HTML and not XML, all element and attribute names are downcaseed. # # If opened file or read content has charset method, # HTree.parse decode it according to $KCODE before parsing. # Otherwise HTree.parse assumes the character encoding of the content is # compatible to $KCODE. # Note that the charset method is provided by URI::HTTP with open-uri. def HTree.parse(input) HTree.with_frozen_string_hash { parse_as(input, false) } end # HTree.parse_xml parses input as XML and # return a document tree represented by HTree::Doc. # # It behaves almost same as HTree.parse but it assumes input is XML # even if no XML declaration. # The assumption causes following differences. # * doesn't downcase element name. # * The content of