require 'nokogiri'
module Jekyll
module TableOfContents
PUNCTUATION_REGEXP = RUBY_VERSION > '1.9' ? /[^\p{Word}\- ]/u : /[^\w\- ]/
class Parser
attr_reader :doc
def initialize(html)
@doc = Nokogiri::HTML::DocumentFragment.parse(html)
@entries = parse_content
end
def build_toc
toc = %Q{
'
end
def inject_anchors_into_html
@entries.each do |entry|
entry[:content_node].add_previous_sibling(%Q{})
end
@doc.inner_html
end
def toc
build_toc + inject_anchors_into_html
end
# parse logic is from html-pipeline toc_filter
# https://github.com/jch/html-pipeline/blob/v1.1.0/lib/html/pipeline/toc_filter.rb
private
def parse_content
entries = []
headers = Hash.new(0)
@doc.css('h1, h2, h3, h4, h5, h6').each do |node|
text = node.text
id = text.downcase
id.gsub!(PUNCTUATION_REGEXP, '') # remove punctuation
id.gsub!(' ', '-') # replace spaces with dash
uniq = (headers[id] > 0) ? "-#{headers[id]}" : ''
headers[id] += 1
if header_content = node.children.first
entries << {
id: id,
uniq: uniq,
text: text,
node_name: node.name,
content_node: header_content
}
end
end
entries
end
end
end
module TableOfContentsFilter
def toc_only(html)
page = @context.registers[:page]
return html unless page['toc']
Jekyll::TableOfContents::Parser.new(html).build_toc
end
def inject_anchors(html)
page = @context.registers[:page]
return html unless page['toc']
Jekyll::TableOfContents::Parser.new(html).inject_anchors_into_html
end
def toc(html)
page = @context.registers[:page]
return html unless page['toc']
Jekyll::TableOfContents::Parser.new(html).toc
end
end
end
Liquid::Template.register_filter(Jekyll::TableOfContentsFilter)