require 'htmlentities' module RDF::RDFa class Reader < RDF::Reader ## # REXML implementation of an XML parser. # # @see http://www.germane-software.com/software/rexml/ module REXML ## # Returns the name of the underlying XML library. # # @return [Symbol] def self.library :rexml end # Proxy class to implement uniform element accessors class NodeProxy attr_reader :node attr_reader :parent def initialize(node, parent = nil) @node = node @parent = parent end ## # Element language # # From HTML5 [3.2.3.3] # If both the lang attribute in no namespace and the lang attribute in the XML namespace are set # on an element, user agents must use the lang attribute in the XML namespace, and the lang # attribute in no namespace must be ignored for the purposes of determining the element's # language. # # @return [String] def language language = case when @node.attribute("lang", RDF::XML.to_s) @node.attribute("lang", RDF::XML.to_s) when @node.attribute("lang") @node.attribute("lang").to_s end end ## # Return xml:base on element, if defined # # @return [String] def base @node.attribute("base", RDF::XML.to_s) end def display_path @display_path ||= begin path = [] path << parent.display_path if parent path << @node.name case @node when ::REXML::Element then path.join("/") when ::REXML::Attribute then path.join("@") else path.join("?") end end end ## # Return true of all child elements are text # # @return [Array<:text, :element, :attribute>] def text_content? @node.children.all? {|c| c.is_a?(::REXML::Text)} end ## # Retrieve XMLNS definitions for this element # # @return [Hash{String => String}] def namespaces ns_decls = {} @node.attributes.each do |name, attr| next unless name =~ /^xmlns(?:\:(.+))?/ ns_decls[$1] = attr end ns_decls end ## # Children of this node # # @return [NodeSetProxy] def children NodeSetProxy.new(@node.children, self) end # Ancestors of this element, in order def ancestors @ancestors ||= parent ? parent.ancestors + [parent] : [] end ## # Inner text of an element # # @see http://apidock.com/ruby/REXML/Element/get_text#743-Get-all-inner-texts # @return [String] def inner_text coder = HTMLEntities.new ::REXML::XPath.match(@node,'.//text()').map { |e| coder.decode(e) }.join end ## # Inner text of an element # # @see http://apidock.com/ruby/REXML/Element/get_text#743-Get-all-inner-texts # @return [String] def inner_html @node.children.map(&:to_s).join end ## # Node type accessors # # @return [Boolean] def element? @node.is_a?(::REXML::Element) end def attribute_nodes @attribute_nodes ||= NodeSetProxy.new(@node.children.select {|n| n.is_a?(::REXML::Attribute)}, self) end def xpath(*args) #NodeSetProxy.new(::REXML::XPath.match(@node, path, namespaces), self) ::REXML::XPath.match(@node, *args).map do |n| # Get node ancestors parent = n.ancestors.reverse.inject(nil) do |p,node| NodeProxy.new(node, p) end rescue nil NodeProxy.new(n, parent) end end # Simple case for