script/collect_non_twss.rb in twss-0.0.3 vs script/collect_non_twss.rb in twss-0.0.4

- old
+ new

@@ -1,4 +1,21 @@ -require File.expand_path('../lib/twss', File.dirname(__FILE__)) -require File.expand_path('../lib/twss/tweet_collector', File.dirname(__FILE__)) +require 'rubygems' +require 'open-uri' +require 'hpricot' -TWSS::TweetCollector.new(':)', File.join(File.dirname(__FILE__), '../data/non_twss.txt')).run +f = File.open(File.expand_path("../../data/non_twss.txt", __FILE__), "w") + +domain = "http://www.fmylife.com" + +200.times do |i| + url = domain + "/intimacy?page=#{i}" + puts url + body = open(url).read + doc = Hpricot(body) + doc.search('div.post p a.fmllink') do |story| + f.puts story.to_plain_text + end + f.flush + sleep rand * 3.0 +end + +f.close