require 'nokogiri' require File.join(File.dirname(__FILE__), 'hashtml', 'hash.rb') # # HashTML translates between HTML documents and Ruby Hash-like objects. # This work is loosely inspired on the work done by CobraVsMongoose. # (see http://cobravsmongoose.rubyforge.org/) # class HashTML attr_reader :root_node # Returns a Hash corresponding to the data structure of the given HTML, # which should be a Nokogiri::HTML::Document or anything that responds to to_s # with a string of valid HTML. #@param html [Nokogiri::HTML::Document], or #@param html [String] document to parse #@return [Hash] #@example # html = 'true' # HashTML.new(xml).to_h # # => { # "span" => { # :children => [ # { # :text => "true" # } # ], # :attributes => { # "id" => "row_29", # "class" => "step param" # } # } # } # def initialize(html) doc = (html.is_a?(Nokogiri::HTML::Document) ? html : Nokogiri::HTML(html.to_s)) @root_node = HashTML::Node.new(doc) end # Returns an HTML string corresponding to the data structure of the given Hash. #@return [String] #@example # hash = { "span" => # { # :children => [ { :text => "true" } ], # :attributes => { "id" => "row_29", "class" => "step param" } # } # } # HTMLParser.hash_to_html(hash) # # => "true" # def to_html @root_node.to_html end def to_h @root_node.to_h end def method_missing(method, *args) method = method.to_s attributes, _nil = args attributes ||= {} if method.end_with?("?") key = method[0..-2] _check_for_presence(key, attributes) else _get_value(method, attributes) end end private def _check_for_presence(key, attributes={}) !!_get_value(key, attributes) end def _get_value(key, attributes={}) #$logger.debug("Looking for '#{key}'") #$logger.debug('It\'s the root node!') return nil unless @root_node.name == key return @root_node unless attributes return ((@root_node.attributes and @root_node.attributes.include_pairs?(attributes)) ? @root_node : nil) end class InvalidAttributeValuePairError < StandardError end public class << self def to_hashtml(hash) convert_to_hashtml(hash) end def to_html(hash) convert_to_hashtml(hash).to_html end private def convert_to_hashtml(hash) #$logger.warn(hash) hashtml = nil hash.each do |key, value| return HashTML::Text.new(value) if key == :text hashtml = HashTML::Node.new hashtml.name = key hashtml.attributes = (value[:attributes] or {}) hashtml.children = value[:children].map { |child| convert_to_hashtml(child) } end #$logger.debug hashtml hashtml end end class Node attr_accessor :name, :attributes, :children def initialize(node=nil) return unless node #$logger.warn "Analysing node: #{node.name}\n#{node}" @name = node.name @attributes = node.respond_to?(:attributes) ? get_html_node_attributes(node) : {} @children = get_html_node_children(node) #$logger.debug(@children, 'Children:') end def to_h { @name => { children: @children.map { |child| child.to_h }, attributes: @attributes } } end def to_html space = (@attributes.any? ? ' ' : '') children_html = @children.map { |child| child.to_html }.join attribute_list = @attributes.map { |k, v| "#{k}=\"#{v}\"" }.join(' ') "<#{@name}#{space}#{attribute_list}>#{children_html}" end def method_missing(method, *args) method = method.to_s #$logger.debug(method) #$logger.debug(args) attributes, new_value, _nil = args attributes ||= {} if method.end_with?("?") key = method[0..-2] _check_for_presence(key, attributes) elsif method.end_with?("=") key = method[0..-2] new_value, attributes = attributes, {} if new_value.nil? _change_value(key, attributes, new_value) else _get_value(method, attributes) end end private def _check_for_presence(key, attributes={}) !!_get_value(key, attributes) end def _get_value(key, attributes={}) #$logger.debug("Looking for '#{key}'") #$logger.debug('It\'s a child node!') if key == 'text' #$logger.debug('Getting node text...') return @children.map { |child| child.text if child.is_a?(HashTML::Text) }.reject(&:nil?).join else @children.each do |child| next if child.is_a?(HashTML::Text) #$logger.debug child.attributes return child if (child.name == key and child.attributes.include_pairs?(attributes)) end end nil end def _change_value(key, attributes, new_value) #$logger.debug("Looking for '#{key}'") #$logger.debug('It\'s a child node!') if key == 'text' #$logger.debug("Changing node text to '#{new_value}'...") #$logger.warn(@children) new_children = @children.select { |child| !child.is_a?(HashTML::Text) } @children = new_children.empty? ? [HashTML::Text.new(new_value)] : [new_children, HashTML::Text.new(new_value)] #$logger.warn(@children) else #$logger.debug('Changing node value...') @children.each_with_index do |child, index| next if child.is_a?(HashTML::Text) if child.name == key and child.attributes.include_pairs?(attributes) @children[index] = new_value end end end end def get_html_node_children(node) #$logger.debug "Node children:\n#{node.children}" node.children.map do |child| #$logger.info("Child:\n#{child}\nChild class: '#{child.class}'") #$logger.info(child) case child.class.to_s when 'Nokogiri::XML::Text', 'Nokogiri::XML::CDATA' HashTML::Text.new(child.to_s) when 'Nokogiri::XML::Element' HashTML::Node.new(child) else nil end end.reject(&:nil?) end def get_html_node_attributes(node) #$logger.debug "Node attributes: #{node.attributes}" Hash[node.attributes.map { |name, value| [name, value.value] }] end end class Text attr_accessor :text def initialize(text) @text = text end def to_h { text: @text } end def to_html @text end end class ParseError < RuntimeError end end