spec/sunscraper_spec.rb in sunscraper-1.1.0.beta3 vs spec/sunscraper_spec.rb in sunscraper-1.2.0.beta1
- old
+ new
@@ -1,91 +1,133 @@
require 'spec_helper'
-
require 'webrick'
-HTML = <<HTML
+HTML_TEMPLATE = <<HTML
<html>
<head>
<script type="text/javascript">
document.addEventListener("DOMContentLoaded", function() {
- document.getElementById('fuga').textContent =
- ("!skrow tI").split("").reverse().join("");
- Sunscraper.finish();
+ %code%
}, true);
</script>
</head>
<body>
<div id='fuga'></div>
</body>
</html>
HTML
-PORT = 45555
+HTML_FUGA = HTML_TEMPLATE.sub("%code%", <<CODE)
+ document.getElementById('fuga').textContent =
+ ("!skrow tI").split("").reverse().join("");
+ Sunscraper.finish();
+CODE
-def with_webserver
- server = WEBrick::HTTPServer.new :Port => PORT, :Logger => WEBrick::Log.new('/dev/null'), :AccessLog => []
+HTML_BASEURL = HTML_TEMPLATE.sub("%code%", <<CODE)
+ var xhr = new XMLHttpRequest();
+ xhr.onreadystatechange = function() {
+ if(xhr.readyState > 3) {
+ document.getElementById('fuga').textContent = xhr.responseText;
+ Sunscraper.finish();
+ }
+ };
+ xhr.open('GET', '/comicstrip', 1);
+ xhr.send();
+CODE
+
+HTML_USERAGENT = HTML_TEMPLATE.sub("%code%", <<CODE)
+ document.getElementById('fuga').textContent =
+ window.navigator.userAgent;
+ Sunscraper.finish();
+CODE
+
+HTML_LOCALSTORAGE = HTML_TEMPLATE.sub("%code%", <<CODE)
+ window.localStorage.setItem("key", ["O", "K"].join(""))
+ document.getElementById('fuga').textContent =
+ window.localStorage.getItem("key");
+ Sunscraper.finish();
+CODE
+
+def with_webserver(html)
+ port = 45555
+ server = WEBrick::HTTPServer.new :Port => port, :Logger => WEBrick::Log.new('/dev/null'), :AccessLog => []
server.mount_proc '/' do |req, res|
- res.body = HTML
+ res.body = html
end
- Thread.new { server.start }
+ server.mount_proc '/comicstrip' do |req, res|
+ res.body = 'Go Get a Roomie!'
+ end
+ thread = Thread.new { server.start }
- yield PORT
+ yield "http://127.0.0.1:#{port}/"
ensure
- server.shutdown if server
+ server.shutdown
+ thread.join
end
-class String
- def to_v
- split(".").map(&:to_i).extend Comparable
- end
-end
+define_tests = lambda do |klass, worker|
+ describe klass do
+ before(:all) do
+ Sunscraper.worker = worker
+ end
-unless Sunscraper.os_x?
- # This part currently crashes on OS X (and will forever).
- describe "Sunscraper::Library" do
- before do
- Sunscraper.worker = :embed
+ after(:all) do
+ sleep(5) # let threads rest in peace
end
it "can scrape an HTML provided as a string" do
- Sunscraper.scrape_html(HTML).should include('It works!')
+ Sunscraper.scrape_html(HTML_FUGA).should include('It works!')
end
it "can scrape an URL" do
- with_webserver do |port|
- Sunscraper.scrape_url("http://127.0.0.1:#{port}/").should include('It works!')
+ with_webserver(HTML_FUGA) do |url|
+ Sunscraper.scrape_url(url).should include('It works!')
end
end
it "should time out if callback is not called" do
- lambda { Sunscraper.scrape_html("<!-- nothing. at least no callbacks -->", 1000) }.
+ lambda { Sunscraper.scrape_html("<!-- nothing. at least no callbacks -->", "about:blank", 500) }.
should raise_exception(Sunscraper::ScrapeTimeout)
end
- end
-end
-if !(RUBY_ENGINE =~ /rbx/ || RUBY_ENGINE =~ /jruby/) ||
- ENV['EXPERIMENTAL'] == 'true'
- # This part currently crashes Rubinius (as of Mar 09, 2012),
- # and crashes jruby < 1.7.0, and uses Unix sockets which don't
- # work even on jruby master (as of Mar 09, 2012).
- describe "Sunscraper::Standalone" do
- before do
- Sunscraper.worker = :standalone
+ it "respects baseUrl parameter" do
+ with_webserver("<!-- nothing -->") do |url|
+ Sunscraper.scrape_html(HTML_BASEURL, url).should include('Go Get a Roomie')
+ end
end
- it "can scrape an HTML provided as a string" do
- Sunscraper.scrape_html(HTML).should include('It works!')
+ it "should identify itself as Sunscraper" do
+ Sunscraper.scrape_html(HTML_USERAGENT).should include("Sunscraper")
end
- it "can scrape an URL" do
- with_webserver do |port|
- Sunscraper.scrape_url("http://127.0.0.1:#{port}/").should include('It works!')
+ it "should work with window.localStorage through webserver" do
+ with_webserver(HTML_LOCALSTORAGE) do |url|
+ Sunscraper.scrape_url(url).should include("OK")
end
end
- it "should time out if callback is not called" do
- lambda { Sunscraper.scrape_html("<!-- nothing. at least no callbacks -->", 1000) }.
- should raise_exception(Sunscraper::ScrapeTimeout)
+ it "should withstand a lot of concurrent threads" do
+ 500.times.map {
+ Thread.new {
+ Sunscraper.scrape_html(HTML_FUGA)
+ }
+ }.each(&:join).
+ map(&:value).
+ each { |result|
+ result.should include('It works!')
+ }
end
end
+end
+
+unless Sunscraper.os_x?
+ # This part currently crashes on OS X (and will forever).
+ define_tests.("Sunscraper-Embed", :embed)
+end
+
+if !(RUBY_ENGINE =~ /rbx/ || RUBY_ENGINE =~ /jruby/) ||
+ ENV['EXPERIMENTAL'] == 'true'
+ # This part currently crashes Rubinius (as of Mar 09, 2012),
+ # and crashes jruby < 1.7.0, and uses Unix sockets which don't
+ # work even on jruby master (as of Mar 09, 2012).
+ define_tests.("Sunscraper-Standalone", :standalone)
end
\ No newline at end of file