require 'spec_helper' require 'webrick' HTML_TEMPLATE = <
HTML HTML_FUGA = HTML_TEMPLATE.sub("%code%", < 3) { document.getElementById('fuga').textContent = xhr.responseText; Sunscraper.finish(); } }; xhr.open('GET', '/comicstrip', 1); xhr.send(); CODE HTML_USERAGENT = HTML_TEMPLATE.sub("%code%", < port, :Logger => WEBrick::Log.new('/dev/null'), :AccessLog => [] server.mount_proc '/' do |req, res| res.body = html end server.mount_proc '/comicstrip' do |req, res| res.body = 'Go Get a Roomie!' end thread = Thread.new { server.start } yield "http://127.0.0.1:#{port}/" ensure server.shutdown thread.join end define_tests = lambda do |klass, worker| describe klass do before(:all) do Sunscraper.worker = worker end after(:all) do sleep(5) # let threads rest in peace end it "can scrape an HTML provided as a string" do Sunscraper.scrape_html(HTML_FUGA).should include('It works!') end it "can scrape an URL" do with_webserver(HTML_FUGA) do |url| Sunscraper.scrape_url(url).should include('It works!') end end it "should time out if callback is not called" do lambda { Sunscraper.scrape_html("", "about:blank", 500) }. should raise_exception(Sunscraper::ScrapeTimeout) end it "respects baseUrl parameter" do with_webserver("") do |url| Sunscraper.scrape_html(HTML_BASEURL, url).should include('Go Get a Roomie') end end it "should identify itself as Sunscraper" do Sunscraper.scrape_html(HTML_USERAGENT).should include("Sunscraper") end it "should work with window.localStorage through webserver" do with_webserver(HTML_LOCALSTORAGE) do |url| Sunscraper.scrape_url(url).should include("OK") end end it "should withstand a lot of concurrent threads" do 500.times.map { Thread.new { Sunscraper.scrape_html(HTML_FUGA) } }.each(&:join). map(&:value). each { |result| result.should include('It works!') } end end end unless Sunscraper.os_x? # This part currently crashes on OS X (and will forever). define_tests.("Sunscraper-Embed", :embed) end if !(RUBY_ENGINE =~ /rbx/ || RUBY_ENGINE =~ /jruby/) || ENV['EXPERIMENTAL'] == 'true' # This part currently crashes Rubinius (as of Mar 09, 2012), # and crashes jruby < 1.7.0, and uses Unix sockets which don't # work even on jruby master (as of Mar 09, 2012). define_tests.("Sunscraper-Standalone", :standalone) end