module RDF::RDFa class Reader < RDF::Reader ## # Nokogiri implementation of an XML parser. # # @see http://nokogiri.org/ module Nokogiri ## # Returns the name of the underlying XML library. # # @return [Symbol] def self.library :nokogiri end # Proxy class to implement uniform element accessors class NodeProxy attr_reader :node attr_reader :parent def initialize(node, parent = nil) @node = node @parent = parent end ## # Element language # # From HTML5 [3.2.3.3] # If both the lang attribute in no namespace and the lang attribute in the XML namespace are set # on an element, user agents must use the lang attribute in the XML namespace, and the lang # attribute in no namespace must be ignored for the purposes of determining the element's # language. # # @return [String] def language language = case when @node.document.is_a?(::Nokogiri::HTML::Document) && @node.attributes["xml:lang"] @node.attributes["xml:lang"].to_s when @node.document.is_a?(::Nokogiri::HTML::Document) && @node.attributes["lang"] @node.attributes["lang"].to_s when @node.attribute_with_ns("lang", RDF::XML.to_s) @node.attribute_with_ns("lang", RDF::XML.to_s) when @node.attribute("lang") @node.attribute("lang").to_s end end ## # Return xml:base on element, if defined # # @return [String] def base @node.attribute_with_ns("base", RDF::XML.to_s) end def display_path @display_path ||= begin path = [] path << parent.display_path if parent path << @node.name case @node when ::Nokogiri::XML::Element then path.join("/") when ::Nokogiri::XML::Attr then path.join("@") else path.join("?") end end end ## # Return true of all child elements are text # # @return [Array<:text, :element, :attribute>] def text_content? @node.children.all? {|c| c.text?} end ## # Retrieve XMLNS definitions for this element # # @return [Hash{String => String}] def namespaces @node.namespace_definitions.inject({}) {|memo, ns| memo[ns.prefix] = ns.href.to_s; memo } end ## # Children of this node # # @return [NodeSetProxy] def children NodeSetProxy.new(@node.children, self) end ## # Proxy for everything else to @node def method_missing(method, *args) @node.send(method, *args) end end ## # NodeSet proxy class NodeSetProxy attr_reader :node_set attr_reader :parent def initialize(node_set, parent) @node_set = node_set @parent = parent end ## # Return a proxy for each child # # @yield(child) # @yieldparam(NodeProxy) def each @node_set.each do |c| yield NodeProxy.new(c, parent) end end ## # Proxy for everything else to @node_set def method_missing(method, *args) @node_set.send(method, *args) end end ## # Initializes the underlying XML library. # # @param [Hash{Symbol => Object}] options # @return [void] def initialize_xml(input, options = {}) require 'nokogiri' unless defined?(::Nokogiri) @doc = case input when ::Nokogiri::HTML::Document, ::Nokogiri::XML::Document input else # Try to detect charset from input options[:encoding] ||= input.charset if input.respond_to?(:charset) # Otherwise, default is utf-8 options[:encoding] ||= 'utf-8' case @host_language when :html4, :html5 ::Nokogiri::HTML.parse(input, base_uri.to_s, options[:encoding]) else ::Nokogiri::XML.parse(input, base_uri.to_s, options[:encoding]) end end end # Determine the host language and/or version from options and the input document def detect_host_language_version(input, options) @host_language = options[:host_language] ? options[:host_language].to_sym : nil @version = options[:version] ? options[:version].to_sym : nil return if @host_language && @version # Snif version based on input case input when ::Nokogiri::XML::Document, ::Nokogiri::HTML::Document doc_type_string = input.children.detect {|c| c.is_a?(::Nokogiri::XML::DTD)} version_attr = input.root && input.root.attribute("version").to_s root_element = input.root.name.downcase root_namespace = input.root.namespace.to_s root_attrs = input.root.attributes content_type = case when root_element == "html" && input.is_a?(Nokogiri::HTML::Document) "text/html" when root_element == "html" && input.is_a?(Nokogiri::XML::Document) "application/xhtml+html" end else content_type = input.content_type if input.respond_to?(:content_type) # Determine from head of document head = if input.respond_to?(:read) input.rewind string = input.read(1000) input.rewind string.to_s else input.to_s[0..1000] end doc_type_string = head.match(%r(]*>)m).to_s root = head.match(%r(<[^!\?>]*>)m).to_s root_element = root.match(%r(^<(\S+)[ >])) ? $1 : "" version_attr = root.match(/version\s+=\s+(\S+)[\s">]/m) ? $1 : "" head_element = head.match(%r(