lib/gscraper/search/query.rb in gscraper-0.1.2 vs lib/gscraper/search/query.rb in gscraper-0.1.3
- old
+ new
@@ -1,6 +1,7 @@
require 'gscraper/search/result'
+require 'gscraper/search/page'
require 'gscraper/extensions/uri'
require 'gscraper/licenses'
require 'gscraper/gscraper'
require 'hpricot'
@@ -78,11 +79,11 @@
#
# Creates a new Query object from the given search options. If a
# block is given, it will be passed the newly created query object.
#
- # Query.new(:query => 'ruby', :with_words => 'rspec rails')
+ # Query.new(:query => 'ruby', :with_words => 'sow rspec')
#
# Query.new(:exact_phrase => 'fluent interfaces') do |q|
# q.within_past_week = true
# end
#
@@ -322,29 +323,30 @@
return url
end
#
- # Returns an array of Result objects at the specified _page_index_.
- # If _opts_ are given, they will be used in accessing the SEARCH_URL.
+ # Returns a Page object containing Result objects at the specified
+ # _page_index_. If _opts_ are given, they will be used in accessing
+ # the SEARCH_URL.
#
def page(page_index,opts={})
- results = []
doc = Hpricot(GScraper.open(page_url(page_index),opts))
+ new_page = Page.new
doc.search('//div.g').each_with_index do |result,index|
rank = page_index_offset(page_index) + (index + 1)
title = result.search('//h2.r').first.inner_text
url = result.search('//h2.r/a').first.get_attribute('href')
# TODO: exclude URL and Links from summary text
summary = result.search('//td.j').first.inner_text
# TODO: scrape Cached and Similar links
- results << Result.new(rank,title,url,summary)
+ new_page << Result.new(rank,title,url,summary)
end
- return results
+ return new_page
end
#
# Returns the results on the first page. If _opts_ are given, they
# will be used in accessing the SEARCH_URL.