Simple but very useful HTML/XHTML cascading parser.
Usage
# sample html stext = <<-EOF <body> This is a test... <sub> S1 </sub> <sub> S2 </sub> <DL> <DT> A1 <DT> A2 <DT> A3 </DL> <DL> <DT> B1 <DT> B2 <DT> B3 </DL> <NEST> <P ALIGN="R">TOP</P> <NEST> <P>SECOND</P> <OL> <LI>C1 <LI>C2 <LI>C3 <LI>C4 </OL> </NEST> <OL> <LI>D1 <LI>D2 <LI>D3 <LI>D4 </OL> </NEST> </body> EOF a = TagIterator.new(stext) a.first("body") do |y| y.nth("dl",2) do |dl| dl.enumtag("dt") do |t| puts t.text.strip end end y.first("nest") do |n| n.first("p") do |c| print c.text, ' ' puts c.attributes.collect{ |k,v| "#{k}=#{v}" } end.next("nest") do |m| m.first("p") do |c| puts c.text end.next("ol") do |o| o.enumtag("li") do |i| puts i.text.strip end end end.next("ol") do |o| o.enumtag("li") do |i| puts i.text.strip end end end end a.each_block("sub") do |y| puts y.text.strip end
produces
B1 B2 B3 TOP align=R SECOND C1 C2 C3 C4 D1 D2 D3 D4 S1 S2
Author(s)
*ɂႷ <nyasu@osk.3web.ne.jp>
Methods
- collect
- each_block
- enumcollect
- enumtag
- first
- for_this
- get_first
- get_nth
- new
- nth
- nth_tailer
- tagexist?
- tagnext
Attributes
[R] | attributes | |
[RW] | option | |
[R] | tag | |
[R] | text |
Public Class methods
new(text,tag=nil,attributes={})
[ source ]
Public Instance methods
collect(*arg)
[ source ]
each_block(tag,closetag=nil) {|self.class.new(@text[s..e],tag,parse_attribute(d))| ...}
[ source ]
enumcollect(tag)
[ source ]
enumtag(tag) {|self.class.new(@text[s..e],tag,parse_attribute(d))| ...}
[ source ]
first(tag,*arg) {|f end end| ...}
[ source ]
for_this() {|self| ...}
[ source ]
get_first(*arg)
[ source ]
get_nth(*arg)
[ source ]
nth(tag,n,closetag=nil) {|self.class.new(text[s..e],tag,parse_attribute(d))| ...}
[ source ]
nth_tailer(tag,n)
[ source ]
tagexist?(tag,st=0)
[ source ]
tagnext()
[ source ]