Sha256: 6d8607510733739958a2990997b37c7f445361c79718461192e93b9227d1ee82

Contents?: true

Size: 1.4 KB

Versions: 1

Compression:

Stored size: 1.4 KB

Contents

module Escapement
  # A tag represents an entity that may or may not have child elements.
  # Once we extract the data about this DOM node, we recursively continue
  # the traversal until we reach the leaf text node.
  class Tag
    include Traversal

    attr_reader :node, :entities

    def initialize(node, start_position)
      @node = node
      @start_position = @current_position = start_position
      @entities = []
    end

    def process
      @entities << {
        type: node_to_type,
        html_tag: node.name,
        position: [@current_position, @current_position + node.text.length],
        attributes: Hash[filtered_attributes.map { |k, v| [k, v.value] }]
      }

      process_children
    end

    private

    def node_to_type
      case node.name
      when 'p' then 'paragraph'
      when 'a' then 'link'
      when 'i', 'em' then 'italic'
      when 'u' then 'underline'
      when 'strong', 'b' then 'bold'
      when 'abbr' then 'abbreviation'
      when 'q' then 'quote'
      when 'pre' then 'preformatted'
      when 'img' then 'image'
      when 'li' then 'list_item'
      when 'sup' then 'superscript'
      when 'sub' then 'subscript'
      when /h\d/ then 'header'
      else node.name
      end
    end

    def filtered_attributes
      method_name = Attributes.respond_to?(node.name) ? node.name : :default
      node.attributes.select(&Attributes.method(method_name))
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
escapement-1.0.0 lib/escapement/tag.rb