script/collect_non_twss.rb in twss-0.0.3 vs script/collect_non_twss.rb in twss-0.0.4
- old
+ new
@@ -1,4 +1,21 @@
-require File.expand_path('../lib/twss', File.dirname(__FILE__))
-require File.expand_path('../lib/twss/tweet_collector', File.dirname(__FILE__))
+require 'rubygems'
+require 'open-uri'
+require 'hpricot'
-TWSS::TweetCollector.new(':)', File.join(File.dirname(__FILE__), '../data/non_twss.txt')).run
+f = File.open(File.expand_path("../../data/non_twss.txt", __FILE__), "w")
+
+domain = "http://www.fmylife.com"
+
+200.times do |i|
+ url = domain + "/intimacy?page=#{i}"
+ puts url
+ body = open(url).read
+ doc = Hpricot(body)
+ doc.search('div.post p a.fmllink') do |story|
+ f.puts story.to_plain_text
+ end
+ f.flush
+ sleep rand * 3.0
+end
+
+f.close