Class | TextTagIterator |
In: |
lib/facet/tagiter.rb
|
Parent: | Object |
Simple but very useful HTML/XHTML cascading parser for those quick and dirty web page parse jobs.
# sample html stext = <<-EOF <body> This is a test... <sub> S1 </sub> <sub> S2 </sub> <DL> <DT> A1 <DT> A2 <DT> A3 </DL> <DL> <DT> B1 <DT> B2 <DT> B3 </DL> <NEST> <P ALIGN="R">TOP</P> <NEST> <P>SECOND</P> <OL> <LI>C1 <LI>C2 <LI>C3 <LI>C4 </OL> </NEST> <OL> <LI>D1 <LI>D2 <LI>D3 <LI>D4 </OL> </NEST> </body> EOF a = TextTagIterator.new(stext) a.first("body") do |y| y.nth("dl",2) do |dl| dl.enumtag("dt") do |t| puts t.text.strip end end y.first("nest") do |n| n.first("p") do |c| print c.text, ' ' puts c.attributes.collect{ |k,v| "#{k}=#{v}" } end.next("nest") do |m| m.first("p") do |c| puts c.text end.next("ol") do |o| o.enumtag("li") do |i| puts i.text.strip end end end.next("ol") do |o| o.enumtag("li") do |i| puts i.text.strip end end end end a.each_block("sub") do |y| puts y.text.strip end
produces
B1 B2 B3 TOP align=R SECOND C1 C2 C3 C4 D1 D2 D3 D4 S1 S2
*ɂႷ <nyasu@osk.3web.ne.jp>
attributes | [R] | |
option | [RW] | |
tag | [R] | |
text | [R] |