# encoding: utf-8
require 'htmlentities'
module Infoboxer
module Tree
# This is the base class for all parse tree nodes.
#
# Basically, you'll
# never create instances of this class or its descendants by yourself,
# you will receive it from tree and use for navigations.
#
class Node
include ProcMe
def initialize(params = {})
@params = params
end
# Hash of node "params".
#
# Params notin is roughly the same as tag attributes in HTML. This
# is actual for complex nodes like images, tables, raw HTML tags and
# so on.
#
# The most actual params are typically exposed by node as instance
# methods (like {Heading#level}).
#
# @return [Hash]
attr_reader :params
# Node's parent in tree
# @return {Node}
attr_accessor :parent
def ==(other)
self.class == other.class && _eq(other)
end
# Position in parent's children array (zero-based)
def index
parent ? parent.index_of(self) : 0
end
# List of all sibling nodes (children of same parent)
def siblings
parent ? parent.children - [self] : Nodes[]
end
# List of siblings before this one
def prev_siblings
siblings.select { |n| n.index < index }
end
# List of siblings after this one
def next_siblings
siblings.select { |n| n.index > index }
end
# Node children list
def children
Nodes[] # redefined in descendants
end
# @private
# Used only during tree construction in {Parser}.
def can_merge?(_other)
false
end
# @private
# Whether node is empty (definition of "empty" varies for different
# kinds of nodes). Used mainly in {Parser}.
def empty?
false
end
# Textual representation of this node and its children, ready for
# pretty-printing. Use it like this:
#
# ```ruby
# puts page.lookup(:Paragraph).first.to_tree
# # Prints something like
# #
# # This
# # is
# #
# # pretty
# # complicated
# ```
#
# Useful for understanding page structure, and Infoboxer's representation
# of this structure
def to_tree(level = 0)
indent(level) + "<#{descr}>\n"
end
def inspect
text.empty? ? "#<#{descr}>" : "#<#{descr}: #{shorten_text}>"
end
# Node text representation. It is defined for all nodes so, that
# entire `Document#text` produce readable text-only representation
# of Wiki page. Therefore, rules are those:
# * inline-formatting nodes (text, bold, italics) just return the
# text;
# * paragraph-level nodes (headings, paragraphs, lists) add `"\n\n"`
# after text;
# * list items add marker before text;
# * nodes, not belonging to "main" text flow (references, templates)
# produce empty text.
#
# If you want just the text of some heading or list item (without
# "formatting" quircks), you can use {Node#text_} method.
#
def text
'' # redefined in descendants
end
# "Clean" version of node text: without trailing linefeeds, list
# markers and other things added for formatting.
#
def text_
text.strip
end
# See {Node#text_}
def to_s
# just aliases will not work when #text will be redefined in subclasses
text_
end
private
MAX_CHARS = 30
def shorten_text
txt = text_.sub(/^([^\n]+)\n.+$/m, '\1...')
txt.length > MAX_CHARS ? txt[0..MAX_CHARS] + '...' : txt
end
def clean_class
self.class.name.sub(/^.*::/, '')
end
def descr
if !params || params.empty?
clean_class.to_s
else
"#{clean_class}(#{show_params})"
end
end
def show_params(prms = nil)
(prms || params).map { |k, v| "#{k}: #{v.inspect}" }.join(', ')
end
def indent(level)
' ' * level
end
def _eq(_other)
fail(NotImplementedError, "#_eq should be defined in subclasses (called for #{self.class})")
end
def decode(str)
Node.coder.decode(str)
end
class << self
# Internal: descendandts DSL
def def_readers(*keys)
keys.each do |k|
define_method(k) { params[k] }
end
end
# Internal: HTML entities decoder.
def coder
@coder ||= HTMLEntities.new
end
end
end
end
end