Tagiter

Simple but very useful HTML/XHTML cascading parser for those quick and dirty web page parse jobs.

Usage

  # sample html
  stext = <<-EOF
  <body> This is a test...
    <sub> S1 </sub> <sub> S2 </sub>
    <DL>
      <DT> A1
      <DT> A2
      <DT> A3
    </DL>
    <DL>
      <DT> B1
      <DT> B2
      <DT> B3
    </DL>
    <NEST>
      <P ALIGN="R">TOP</P>
      <NEST>
        <P>SECOND</P>
        <OL>
          <LI>C1
          <LI>C2
          <LI>C3
          <LI>C4
        </OL>
      </NEST>
      <OL>
        <LI>D1
        <LI>D2
        <LI>D3
        <LI>D4
      </OL>
    </NEST>
  </body>
  EOF

  a = TextTagIterator.new(stext)
  a.first("body") do |y|
    y.nth("dl",2) do |dl|
      dl.enumtag("dt") do |t|
        puts t.text.strip
      end
    end
    y.first("nest") do |n|
      n.first("p") do |c|
        print c.text, ' '
        puts c.attributes.collect{ |k,v| "#{k}=#{v}" }
      end.next("nest") do |m|
        m.first("p") do |c|
          puts c.text
        end.next("ol") do |o|
          o.enumtag("li") do |i| puts i.text.strip end
        end
      end.next("ol") do |o|
        o.enumtag("li") do |i| puts i.text.strip end
      end
    end
  end
  a.each_block("sub") do |y|
    puts y.text.strip
  end

produces

  B1
  B2
  B3
  TOP align=R
  SECOND
  C1
  C2
  C3
  C4
  D1
  D2
  D3
  D4
  S1
  S2

Author

*ɂႷ <nyasu@osk.3web.ne.jp>

Tagiter

Usage

Author

Methods

Attributes

Public Class methods

Public Instance methods

Private Instance methods