require 'hpricot' module Webby module Filters # The Outline filter is used to insert outline numbering into HTML heading # tags (h1, h2, h3, etc.) and to generate a table of contents based on the # heading tags. The table of contents is inserted into the page at the # location of the <toc /> tag. If there is no <toc /> tag, then a table of # contents will not be created but outline numbering will still take place. # # If a table of contents is desired without outline number being inserted # into the heading tags, this can be specified in the attibutes of the # <toc /> tag itself. # # <toc numbering="off" /> # # This will generate a table of contents, but not insert outline numbering # into the heading tags. # # The Outline filter will only work on valid HTML or XHTML pages. Therefore # it should be used after any markup langauge filters (textile, markdown, # etc.). # # The following attributes can be specified in the <toc /> tag itself to # control how outline numbering is performed by the filter. The attributes # can be used in combination with one another. # # === numbering # # If set to "off", this will prevent numbers from being inserted into the # page. The default is "on". # # <toc numbering="off" /> # # === numbering_start # # This is the number to start with when inserting outline numbers into a # page. The default is 1. # # <toc numbering_start="3" /> # # === toc_style # # The style of the Table of Contents list to generated. This will be # either "ol" for an ordered list or "ul" for an unordered list. The # default is an ordered list. # # <toc toc_style="ul" /> # # === toc_range # # This limits the numbering to only a subset of the HTML heading tags. The # defaul is to number all the heading tags. # # <toc toc_range="h1-h3" /> # # In this example, only the heading tags h1, h2, and h3 will be numbered # and included in the table of contents listing. # # ==== Example # # Generate a table of contents using an unordered list, starting with the # number 2, and only numbering heading levels 2, 3, and 4. # # <toc numbering_start="2" toc_style="ul" toc_range="h2-h4" /> # class Outline include ERB::Util # call-seq: # Outline.new( html ) # # Creates a new outline filter that will operate on the given # _html_ string. # def initialize( str ) @str = str @cur_level, @base_level, @cur_depth = nil @level = [0] * 6 @h_rgxp = %r/^h(\d)$/o @numbering = true @numbering_start = 1 @toc = [] @toc_style = 'ol' @toc_range = 'h1-h6' @list_opening = nil end # call-seq: # filter => html # # Process the original html document passed to the filter when it was # created. The document will be scanned for heading tags (h1, h2, etc.) # and outline numbering and id attributes will be inserted. A table of # contents will also be created and inserted into the page if a <toc /> # tag is found. # # For example, if there is a heading tag # # <h3>Get Fuzzy</h3> # # somewhere in a page about comic strips, the tag might be altered as such # # <h3 id="h2_2_1"><span class="heading-num">2.2.1</span>Get Fuzzy</h3> # # The id attribute is used to generate a linke from the table of contents # to this particular heading tag. The original text of the tag is used in # the table of contents -- "Get Fuzzy" in this example. # def filter doc = Hpricot.XML(@str) # extract directives from the "toc" tag toc_elem = doc.search('toc').first unless toc_elem.nil? @numbering = toc_elem['numbering'] !~ %r/off/i @numbering_start = Integer(toc_elem['numbering_start']) if toc_elem.has_attribute? 'numbering_start' @toc_style = toc_elem['toc_style'] if toc_elem.has_attribute? 'toc_style' @toc_range = toc_elem['toc_range'] if toc_elem.has_attribute? 'toc_range' end unless %w[ul ol].include? @toc_style raise ArgumentError, "unknown ToC list type '#{@toc_style}'" end m = %r/h(\d)\s*-\s*h(\d)/i.match @toc_range @toc_range = Integer(m[1])..Integer(m[2]) @list_opening = build_list_opening(toc_elem) headers = @toc_range.map {|x| "h#{x}"} doc.traverse_element(*headers) do |elem| text, id = heading_info(elem) add_to_toc(text, id) if @toc_range.include? current_level end toc_elem.swap(toc) unless toc_elem.nil? doc.to_html end private def build_list_opening( elem ) lo = "<#{@toc_style}" unless elem.nil? %w[class style id].each do |atr| next unless elem.has_attribute? atr lo << " %s=\"%s\"" % [atr, elem[atr]] end end if @toc_style == 'ol' and @numbering_start != 1 lo << " start=\"#{@numbering_start}\"" end lo << ">" end # Returns information for the given heading element. The information is # returned as a two element array: [text, id]. # # This method will also insert outline numbering and an id attribute. The # outline numbering can be disabled, but the id attribute must be present # for TOC generation. # def heading_info( elem ) m = @h_rgxp.match(elem.name) level = Integer(m[1]) self.current_level = level text = elem.inner_text lbl = label if numbering? elem.children.first.before %Q{<span class="heading-num">#{lbl}</span>} end elem['id'] = "h#{lbl.tr('.','_')}" if elem['id'].nil? return [text, elem['id']] end # Set the current heading level. This will set the label and depth as # well. An error will be raised if the _level_ is less than the base # heading level. # # The base heading level will be set to the _level_ if it has not already # been set. Therefore, the first heading tag encountered defines the base # heading level. # def current_level=( level ) if @base_level.nil? @base_level = @cur_level = level @level[@base_level-1] = @numbering_start-1 end if level < @base_level raise ::Webby::Error, "heading tags are not in order, cannot outline" end if level == @cur_level @level[level-1] += 1 elsif level > @cur_level @cur_level.upto(level-1) {|ii| @level[ii] += 1} else @cur_level.downto(level+1) {|ii| @level[ii-1] = 0} @level[level-1] += 1 end @cur_level = level end # Returns the current heading level number. # def current_level @cur_level end # Return the label string for the current heading level. # def label rv = @level.dup rv.delete(0) rv.join('.') end # Return the nesting depth of the current heading level with respect to the # base heading level. This is a one-based number. # def depth @cur_level - @base_level + 1 end # Add the given text and id reference to the table of contents. # def add_to_toc( text, id ) a = "<a href=\"##{id}\">#{h(text)}</a>" @toc << [depth, a] end # Returns the table of contents as a collection of nested ordered lists. # This is fully formatted HTML. # def toc ary = [] lopen = "<#@toc_style>" lclose = "</#@toc_style>" prev_depth = open = 0 @toc.each do |a| cur = a.first # close out the previous list item if we're at the same level if cur == prev_depth ary << "</li>" # if we are increasing the level, then start a new list elsif cur > prev_depth ary << if ary.empty? then @list_opening else lopen end open += 1 # we are decreasing the level; close out tags but ensure we don't # close out all the tags (leave one open) else (prev_depth - cur).times { ary << "</li>" << lclose open -= 1 break if open <= 0 } if open > 0 ary << "</li>" else ary << lopen open += 1 end end # add the current element ary << "<li>" << a.last prev_depth = cur end # close out the remaingling tags ary << "</li>" << lclose ary.join("\n") end # Returns +true+ if outline numbering should be inserted into the heading # tags. Returns +false+ otherwise. # def numbering? @numbering end end # class Outline # Generate a outline numbering and/or a table of contents in the input HTML # text. # register :outline do |input| Outline.new(input).filter end end # module Filters end # module Webby # EOF