Simple but very useful HTML/XHTML cascading parser.

Usage

  # sample html
  stext = <<-EOF
  <body> This is a test...
    <sub> S1 </sub> <sub> S2 </sub>
    <DL>
      <DT> A1
      <DT> A2
      <DT> A3
    </DL>
    <DL>
      <DT> B1
      <DT> B2
      <DT> B3
    </DL>
    <NEST>
      <P ALIGN="R">TOP</P>
      <NEST>
        <P>SECOND</P>
        <OL>
          <LI>C1
          <LI>C2
          <LI>C3
          <LI>C4
        </OL>
      </NEST>
      <OL>
        <LI>D1
        <LI>D2
        <LI>D3
        <LI>D4
      </OL>
    </NEST>
  </body>
  EOF

  a = TagIterator.new(stext)
  a.first("body") do |y|
    y.nth("dl",2) do |dl|
      dl.enumtag("dt") do |t|
        puts t.text.strip
      end
    end
    y.first("nest") do |n|
      n.first("p") do |c|
        print c.text, ' '
        puts c.attributes.collect{ |k,v| "#{k}=#{v}" }
      end.next("nest") do |m|
        m.first("p") do |c|
          puts c.text
        end.next("ol") do |o|
          o.enumtag("li") do |i| puts i.text.strip end
        end
      end.next("ol") do |o|
        o.enumtag("li") do |i| puts i.text.strip end
      end
    end
  end
  a.each_block("sub") do |y|
    puts y.text.strip
  end

produces

  B1
  B2
  B3
  TOP align=R
  SECOND
  C1
  C2
  C3
  C4
  D1
  D2
  D3
  D4
  S1
  S2

Author(s)

*ɂႷ <nyasu@osk.3web.ne.jp>

Methods
Attributes
[R] attributes
[RW] option
[R] tag
[R] text
Public Class methods
new(text,tag=nil,attributes={}) [ source ]
Public Instance methods
collect(*arg) [ source ]
each_block(tag,closetag=nil) {|self.class.new(@text[s..e],tag,parse_attribute(d))| ...} [ source ]
enumcollect(tag) [ source ]
enumtag(tag) {|self.class.new(@text[s..e],tag,parse_attribute(d))| ...} [ source ]
first(tag,*arg) {|f end end| ...} [ source ]
for_this() {|self| ...} [ source ]
get_first(*arg) [ source ]
get_nth(*arg) [ source ]
nth(tag,n,closetag=nil) {|self.class.new(text[s..e],tag,parse_attribute(d))| ...} [ source ]
nth_tailer(tag,n) [ source ]
tagexist?(tag,st=0) [ source ]
tagnext() [ source ]