module HTML
class Pipeline
# HTML filter that adds a 'name' attribute to all headers
# in a document, so they can be accessed from a table of contents
#
# TODO: besides adding the name attribute, we should get around to
# eventually generating the Table of Contents itself, with links
# to each header
class TableOfContentsFilter < Filter
def call
headers = Hash.new(0)
doc.css('h1, h2, h3, h4, h5, h6').each do |node|
name = node.text.downcase
name.gsub!(/[^\w\- ]/, '') # remove punctuation
name.gsub!(' ', '-') # replace spaces with dash
name = EscapeUtils.escape_uri(name) # escape extended UTF-8 chars
uniq = (headers[name] > 0) ? "-#{headers[name]}" : ''
headers[name] += 1
if header_content = node.children.first
header_content.add_previous_sibling(%Q{})
end
end
doc
end
end
end
end