Sha256: ecb9ef14e0d7a04313e369885639b2ffd61e61e4f4a76f78c0c1283feafdbb3a

Contents?: true

Size: 1.43 KB

Versions: 2

Compression:

Stored size: 1.43 KB

Contents

module Escapement
  # A tag represents an entity that may or may not have child elements.
  # Once we extract the data about this DOM node, we recursively continue
  # the traversal until we reach the leaf text node.
  class Tag
    include Traversal

    attr_reader :node, :entities

    def initialize(node, start_position)
      @node = node
      @start_position = @current_position = start_position
      @entities = []
    end

    def process
      @entities << {
        type: node_to_type,
        html_tag: node.name,
        position: [@current_position, @current_position + node.text.length],
        attributes: Hash[filtered_attributes.map { |k, v| [k, v.value] }]
      }

      process_children
    end

    private

    def node_to_type
      case node.name
      when 'p' then 'paragraph'
      when 'a' then 'link'
      when 'i', 'em' then 'italic'
      when 'u' then 'underline'
      when 'strong', 'b' then 'bold'
      when 'abbr' then 'abbreviation'
      when 'q' then 'quote'
      when 'pre' then 'preformatted'
      when 'img' then 'image'
      when 'li' then 'list_item'
      when 'sup' then 'superscript'
      when 'sub' then 'subscript'
      when 'br' then 'line_break'
      when /h\d/ then 'header'
      else node.name
      end
    end

    def filtered_attributes
      method_name = Attributes.respond_to?(node.name) ? node.name : :default
      node.attributes.select(&Attributes.method(method_name))
    end
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
escapement-0.1.3 lib/escapement/tag.rb
escapement-0.1.2 lib/escapement/tag.rb