module Sablon class HTMLConverter # Converts a nokogiri HTML fragment into an equivalent AST structure class ASTBuilder attr_reader :nodes def self.html_to_ast(env, nodes, properties) builder = new(env, nodes, properties) builder.nodes end # Checks if there are any block level tags in the current node set # this is used at the root level to determine if top level text nodes # should be removed def self.any_block_tags?(nodes) nodes.detect { |node| fetch_tag(node.name).type == :block } end # Retrieves a HTMLTag instance from the permitted_html_tags hash or # raises an ArgumentError if the tag is not registered def self.fetch_tag(tag_name) tag_name = tag_name.to_sym unless Sablon::Configuration.instance.permitted_html_tags[tag_name] raise ArgumentError, "Don't know how to handle HTML tag: #{tag_name}" end Sablon::Configuration.instance.permitted_html_tags[tag_name] end private def initialize(env, nodes, properties) @env = env @nodes = process_nodes(nodes, properties).compact end # Loops over HTML nodes converting them to their configured AST class def process_nodes(html_nodes, properties) html_nodes.flat_map do |node| # get tags from config parent_tag = fetch_tag(node.parent.name) if node.parent.name tag = fetch_tag(node.name) # remove all text nodes if the tag doesn't accept them node.search('./text()').remove if drop_text?(tag) # check node hierarchy validate_structure(parent_tag, tag) # merge properties local_props = merge_node_properties(node, tag, properties) if tag.ast_class tag.ast_class.new(@env, node, local_props) else process_nodes(node.children, local_props) end end end # retrieves a HTMLTag instance from the cpermitted_html_tags hash or # raises an ArgumentError if the tag is not registered in the hash def fetch_tag(tag_name) self.class.fetch_tag(tag_name) end # Checking that the current tag is an allowed child of the parent_tag. # If the parent tag is nil then a block level tag is required. def validate_structure(parent, child) return unless parent && !parent.allowed_child?(child) msg = "#{child.name} is not a valid child element of #{parent.name}." raise ContextError, "Invalid HTML structure: #{msg}" end # If the node doesn't allow inline elements, or text specifically, # drop all text nodes. This is largely meant to prevent whitespace # between tags from rasing an invalid structure error. Although it # will purge the node whether it contains nonblank characters or not. def drop_text?(child) text = fetch_tag(:text) !child.allowed_child?(text) end # Merges node properties in a sppecifc def merge_node_properties(node, tag, parent_properties) # Process any styles, defined on the node into a hash if node['style'] style_props = node['style'].split(';').map do |prop| prop.split(':').map(&:strip) end style_props = Hash[style_props] else style_props = {} end # allow inline styles to override parent styles passed down parent_properties.merge(tag.properties).merge(style_props) end end end end