module Nokogiri module HTML class Document < Nokogiri::XML::Document ### # Get the meta tag encoding for this document. If there is no meta tag, # then nil is returned def meta_encoding return nil unless meta = css('meta').find { |node| node['http-equiv'] =~ /Content-Type/i } /charset\s*=\s*([\w-]+)/i.match(meta['content'])[1] end ### # Set the meta tag encoding for this document. If there is no meta # content tag, nil is returned and the encoding is not set. def meta_encoding= encoding return nil unless meta = css('meta').find { |node| node['http-equiv'] =~ /Content-Type/i } meta['content'] = "text/html; charset=%s" % encoding encoding end #### # Serialize Node using +options+. Save options can also be set using a # block. See SaveOptions. # # These two statements are equivalent: # # node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML) # # or # # node.serialize(:encoding => 'UTF-8') do |config| # config.format.as_xml # end # def serialize options = {}, &block options[:save_with] ||= XML::Node::SaveOptions::FORMAT | XML::Node::SaveOptions::AS_HTML | XML::Node::SaveOptions::NO_DECLARATION | XML::Node::SaveOptions::NO_EMPTY_TAGS super end #### # Create a Nokogiri::XML::DocumentFragment from +tags+ def fragment tags = nil DocumentFragment.new(self, tags, self.root) end class << self ### # Parse HTML. +thing+ may be a String, or any object that # responds to _read_ and _close_ such as an IO, or StringIO. # +url+ is resource where this document is located. +encoding+ is the # encoding that should be used when processing the document. +options+ # is a number that sets options in the parser, such as # Nokogiri::XML::ParseOptions::RECOVER. See the constants in # Nokogiri::XML::ParseOptions. def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options # Give the options to the user yield options if block_given? if string_or_io.respond_to?(:encoding) unless string_or_io.encoding.name == "ASCII-8BIT" encoding ||= string_or_io.encoding.name end end if string_or_io.respond_to?(:read) url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil return read_io(string_or_io, url, encoding, options.to_i) end # read_memory pukes on empty docs return new if string_or_io.nil? or string_or_io.empty? read_memory(string_or_io, url, encoding, options.to_i) end end end end end