Sha256: 766ec0e6842170e394c96fc1026610d899a75bc88dbbce30ad8a246b71c457b5
Contents?: true
Size: 828 Bytes
Versions: 5
Compression:
Stored size: 828 Bytes
Contents
require 'rubygems' require 'regexp_crawler' crawler = RegexpCrawler::Crawler.new( :start_page => "http://github.com/flyerhzm", :continue_regexp => %r{<div class="title"><b><a href="(/flyerhzm/.*?)">}m, :capture_regexp => %r{<a href="http://github.com/flyerhzm/[^/"]*?(?:/tree)?">(.*?)</a>.*<span id="repository_description".*?>(.*?)</span>.*(<div class="(?:wikistyle|plain)">.*?</div>)</div>}m, :named_captures => ['title', 'description', 'body'], :save_method => Proc.new do |result, page| puts '=============================' puts page puts result[:title] puts result[:description] puts result[:body][0..100] + "..." end, :need_parse => Proc.new do |page, response_body| page =~ %r{http://github.com/flyerhzm/\w+} && !response_body.index(/Fork of.*?<a href=".*?">/) end) crawler.start
Version data entries
5 entries across 5 versions & 2 rubygems