lib/gscraper/search/query.rb in gscraper-0.1.2 vs lib/gscraper/search/query.rb in gscraper-0.1.3

- old
+ new

@@ -1,6 +1,7 @@ require 'gscraper/search/result' +require 'gscraper/search/page' require 'gscraper/extensions/uri' require 'gscraper/licenses' require 'gscraper/gscraper' require 'hpricot' @@ -78,11 +79,11 @@ # # Creates a new Query object from the given search options. If a # block is given, it will be passed the newly created query object. # - # Query.new(:query => 'ruby', :with_words => 'rspec rails') + # Query.new(:query => 'ruby', :with_words => 'sow rspec') # # Query.new(:exact_phrase => 'fluent interfaces') do |q| # q.within_past_week = true # end # @@ -322,29 +323,30 @@ return url end # - # Returns an array of Result objects at the specified _page_index_. - # If _opts_ are given, they will be used in accessing the SEARCH_URL. + # Returns a Page object containing Result objects at the specified + # _page_index_. If _opts_ are given, they will be used in accessing + # the SEARCH_URL. # def page(page_index,opts={}) - results = [] doc = Hpricot(GScraper.open(page_url(page_index),opts)) + new_page = Page.new doc.search('//div.g').each_with_index do |result,index| rank = page_index_offset(page_index) + (index + 1) title = result.search('//h2.r').first.inner_text url = result.search('//h2.r/a').first.get_attribute('href') # TODO: exclude URL and Links from summary text summary = result.search('//td.j').first.inner_text # TODO: scrape Cached and Similar links - results << Result.new(rank,title,url,summary) + new_page << Result.new(rank,title,url,summary) end - return results + return new_page end # # Returns the results on the first page. If _opts_ are given, they # will be used in accessing the SEARCH_URL.