lib/gscraper/search/page.rb in gscraper-0.2.4 vs lib/gscraper/search/page.rb in gscraper-0.3.0
- old
+ new
@@ -1,7 +1,6 @@
#
-#--
# GScraper - A web-scraping interface to various Google Services.
#
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
#
# This program is free software; you can redistribute it and/or modify
@@ -15,296 +14,503 @@
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-#++
#
require 'gscraper/search/result'
require 'gscraper/page'
module GScraper
module Search
class Page < GScraper::Page
+ alias results_with select
+
#
- # Selects the results using the specified _block_.
+ # Selects the results with the matching title.
#
- # page.results_with { |result| result.title =~ /blog/ }
+ # @param [String, Regexp] title
+ # The title to search for.
#
- def results_with(&block)
- select(&block)
- end
-
+ # @yield [result]
+ # The given block will be passed each matching result.
#
- # Selects the results with the matching _title_. The _title_ may be
- # either a String or a Regexp. If _block_ is given, each matching
- # result will be passed to the _block_.
+ # @yieldparam [Result] result
+ # A result with the matching title.
#
+ # @return [Array<Result>]
+ # The results with the matching title.
+ #
+ # @example
# page.results_with_title('hackety org') #=> Page
#
+ # @example
# page.results_with_title(/awesome/) do |result|
# puts result.url
# end
#
- def results_with_title(title,&block)
- if title.kind_of?(Regexp)
- results = results_with { |result| result.title =~ title }
+ def results_with_title(title)
+ unless block_given?
+ enum_for(:results_with_title,title)
else
- results = results_with { |result| result.title == title }
- end
+ results_with do |result|
+ if result.title.match(title)
+ yield result
- results.each(&block) if block
- return results
+ true
+ end
+ end
+ end
end
#
- # Selects the results with the matching _url_. The _url_ may be
- # either a String or a Regexp. If _block_ is given, each matching
- # result will be passed to the _block_.
+ # Selects the results with the matching URL.
#
+ # @param [String, Regexp] url
+ # The URL to search for.
+ #
+ # @yield [result]
+ # The given block will be passed each matching result.
+ #
+ # @yieldparam [Result] result
+ # A result with the matching URL.
+ #
+ # @return [Array<Result>]
+ # The results with the matching URL.
+ #
+ # @example
# page.results_with_url(/\.com/) # => Page
#
+ # @example
# page.results_with_url(/^https:\/\//) do |result|
# puts result.title
# end
#
- def results_with_url(url,&block)
- if url.kind_of?(Regexp)
- results = results_with { |result| result.url =~ url }
+ def results_with_url(url)
+ unless block_given?
+ enum_for(:results_with_url,url)
else
- results = results_with { |result| result.url == url }
- end
+ results_with do |result|
+ if result.url.match(url)
+ yield result
- results.each(&block) if block
- return results
+ true
+ end
+ end
+ end
end
#
- # Selects the results with the matching _summary_. The _summary_ may
- # be either a String or a Regexp. If _block_ is given, each matching
- # result will be passed to the _block_.
+ # Selects the results with the matching summary.
#
+ # @param [String, Regexp] summary
+ # The summary to search for.
+ #
+ # @yield [result]
+ # The given block will be passed each matching result.
+ #
+ # @yieldparam [Result] result
+ # A result with the matching summary.
+ #
+ # @return [Array<Result>]
+ # The results with the matching summary.
+ #
+ # @example
# page.results_with_summary(/cheese cake/) # => Page
#
+ # @example
# page.results_with_summary(/Scientifically/) do |result|
# puts result.url
# end
#
- def results_with_summary(summary,&block)
- if summary.kind_of?(Regexp)
- results = results_with { |result| result.summary =~ summary }
+ def results_with_summary(summary)
+ unless block_given?
+ enum_for(:results_with_summary,summary)
else
- results = results_with { |result| result.summary == summary }
- end
+ results_with do |result|
+ if result.summary.match(summary)
+ yield result
- results.each(&block) if block
- return results
+ true
+ end
+ end
+ end
end
#
- # Returns an Array containing the ranks of the results within the
- # Page.
+ # Iterates over each result's rank within the page.
#
- # page.ranks # => [...]
+ # @yield [rank]
+ # The given block will be passed the ranks of each result in
+ # the page.
#
- def ranks
- map { |result| result.rank }
+ # @yieldparam [Integer] rank
+ # The rank of a result in the page.
+ #
+ # @return [Enumerator]
+ # If no block is given, an Enumerator object will be returned.
+ #
+ # @example
+ # each_rank { |rank| puts rank }
+ #
+ def each_rank
+ unless block_given?
+ enum_for(:each_rank)
+ else
+ each { |result| yield result.rank }
+ end
end
#
- # Returns an Array containing the titles of the results within the
- # Page.
+ # Iterates over each result's title within the page.
#
- # page.titles # => [...]
+ # @yield [title]
+ # The given block will be passed the title of each result in
+ # the page.
+ #
+ # @yieldparam [String] title
+ # The title of a result in the page.
#
- def titles
- map { |result| result.title }
+ # @return [Enumerator]
+ # If no block is given, an Enumerator object will be returned.
+ #
+ # @example
+ # each_title { |title| puts title }
+ #
+ def each_title
+ unless block_given?
+ enum_for(:each_title)
+ else
+ each { |result| yield result.title }
+ end
end
#
- # Returns an Array containing the URLs of the results within the
- # Page.
+ # Iterates over each result's url within the page.
#
- # page.urls # => [...]
+ # @yield [url]
+ # The given block will be passed the URL of each result in
+ # the page.
+ #
+ # @yieldparam [URI::HTTP] url
+ # The URL of a result in the page.
#
- def urls
- map { |result| result.url }
+ # @return [Enumerator]
+ # If no block is given, an Enumerator object will be returned.
+ #
+ # @example
+ # each_url { |url| puts url }
+ #
+ def each_url
+ unless block_given?
+ enum_for(:each_url)
+ else
+ each { |result| yield result.url }
+ end
end
#
- # Returns an Array containing the summaries of the results within the
- # Page.
+ # Iterates over each result's summary within the page.
#
- # page.summaries # => [...]
+ # @yield [summary]
+ # The given block will be passed the summary of each result in
+ # the page.
+ #
+ # @yieldparam [String] summary
+ # The summary of a result in the page.
#
- def summaries
- map { |result| result.summary }
+ # @return [Enumerator]
+ # If no block is given, an Enumerator object will be returned.
+ #
+ # @example
+ # each_summary { |summary| puts summary }
+ #
+ def each_summary
+ unless block_given?
+ enum_for(:each_summary)
+ else
+ each { |result| yield result.summary }
+ end
end
#
- # Returns an Array containing the cached URLs of the results within
- # the Page.
+ # Iterates over each result's cached URLs within the page.
#
- # page.cached_urls # => [...]
+ # @yield [cached_url]
+ # The given block will be passed the Cached URL of each result in
+ # the page.
+ #
+ # @yieldparam [URI::HTTP] cached_url
+ # The Cached URL of a result in the page.
#
- def cached_urls
- map { |result| result.cached_url }.compact
+ # @return [Enumerator]
+ # If no block is given, an Enumerator object will be returned.
+ #
+ # @example
+ # each_cached_url { |cached_url| puts cached_url }
+ #
+ def each_cached_url
+ unless block_given?
+ enum_for(:each_cached_url)
+ else
+ each do |result|
+ yield result.cached_url if result.cached_url
+ end
+ end
end
#
- # Returns an Array containing the cached pages of the results within
- # the Page.
+ # Iterates over each result's cached pages within the page.
#
- # page.cached_pages # => [...]
+ # @yield [cached_page]
+ # The given block will be passed the Cached Page of each result in
+ # the page.
+ #
+ # @yieldparam [Mechanize::Page] cached_page
+ # The Cached Page of a result in the page.
#
- def cached_pages
- map { |result| result.cached_page }.compact
+ # @return [Enumerator]
+ # If no block is given, an Enumerator object will be returned.
+ #
+ # @example
+ # each_cached_page { |page| puts page.readlines }
+ #
+ def each_cached_page
+ unless block_given?
+ enum_for(:each_cached_page)
+ else
+ each do |result|
+ yield result.cached_page if result.cached_page
+ end
+ end
end
#
- # Returns an Array containing the similar Query URLs of the results
- # within the Page.
+ # Iterates over each result's similar Query URLs within the page.
#
- # page.similar_urls # => [...]
+ # @yield [similar_url]
+ # The given block will be passed the Similar Query URL of each
+ # result in the page.
+ #
+ # @yieldparam [URI::HTTP] similar_url
+ # The Cached URL of a result in the page.
#
- def similar_urls
- map { |result| result.similar_url }.compact
+ # @return [Enumerator]
+ # If no block is given, an Enumerator object will be returned.
+ #
+ # @example
+ # each_similar_url { |similar_url| puts similar_url }
+ #
+ def each_similar_url
+ unless block_given?
+ enum_for(:each_similar_url)
+ else
+ each do |result|
+ yield result.similar_url if result.similar_url
+ end
+ end
end
#
- # Iterates over each result's rank within the Page, passing each to
- # the given _block_.
+ # Returns the ranks of the results in the page.
#
- # each_rank { |rank| puts rank }
+ # @return [Array<Integer>]
+ # The ranks of the results.
#
- def each_rank(&block)
- ranks.each(&block)
+ def ranks
+ each_rank.to_a
end
#
- # Iterates over each result's title within the Page, passing each to
- # the given _block_.
+ # Returns the titles of the results in the page.
#
- # each_title { |title| puts title }
+ # @return [Array<String>]
+ # The titles of the results.
#
- def each_title(&block)
- titles.each(&block)
+ def titles
+ each_title.to_a
end
#
- # Iterates over each result's url within the Page, passing each to
- # the given _block_.
+ # Returns the URLs of the results in the page.
#
- # each_url { |url| puts url }
+ # @return [Array<URI::HTTP>]
+ # The URLs of the results.
#
- def each_url(&block)
- urls.each(&block)
+ def urls
+ each_url.to_a
end
#
- # Iterates over each result's summary within the Page, passing each
- # to the given _block_.
+ # Returns the summaries of the results in the page.
#
- # each_summary { |summary| puts summary }
+ # @return [Array<String>]
+ # The summaries of the results.
#
- def each_summary(&block)
- summaries.each(&block)
+ def summaries
+ each_summary.to_a
end
#
- # Iterates over each result's cached URLs within the Page, passing
- # each to the given _block_.
+ # Returns the Cached URLs of the results in the page.
#
- # each_cached_url { |url| puts url }
+ # @return [Array<URI::HTTP>]
+ # The Cached URLs of the results.
#
- def each_cached_url(&block)
- cached_urls.each(&block)
+ def cached_urls
+ each_cached_url.to_a
end
#
- # Iterates over each result's cached pages within the Page, passing
- # each to the given _block_.
+ # Returns the Cached Pages of the results in the page.
#
- # each_cached_page { |page| puts page.readlines }
+ # @return [Array<Mechanize::Page>]
+ # The Cached Pages of the results.
#
- def each_cached_page(&block)
- cached_pages.each(&block)
+ def cached_pages
+ each_cached_page.to_a
end
#
- # Iterates over each result's similar Query URLs within the Page,
- # passing each to the given _block_.
+ # Returns the Similar Query URLs of the results in the page.
#
- # each_similar_url { |url| puts url }
+ # @return [Array<URI::HTTP>]
+ # The Similar Query URLs of the results.
#
- def each_similar_url(&block)
- similar_urls.each(&block)
+ def similar_urls
+ each_similar_url.to_a
end
#
- # Returns the ranks of the results that match the specified _block_.
+ # Returns the ranks of the results that match the given block.
#
+ # @yield [result]
+ # The given block will be used to filter the results in the page.
+ #
+ # @yieldparam [Result] result
+ # A result in the page.
+ #
+ # @return [Array<Integer>]
+ # The ranks of the results which match the given block.
+ #
+ # @example
# page.ranks_of { |result| result.title =~ /awesome/ }
#
def ranks_of(&block)
results_with(&block).ranks
end
#
- # Returns the titles of the results that match the specified _block_.
+ # Returns the titles of the results that match the given block.
#
+ # @yield [result]
+ # The given block will be used to filter the results in the page.
+ #
+ # @yieldparam [Result] result
+ # A result in the page.
+ #
+ # @return [Array<String>]
+ # The titles of the results which match the given block.
+ #
+ # @example
# page.titles_of { |result| result.url.include?('www') }
#
def titles_of(&block)
results_with(&block).titles
end
#
- # Returns the urls of the results that match the specified _block_.
+ # Returns the urls of the results that match the given block.
#
+ # @yield [result]
+ # The given block will be used to filter the results in the page.
+ #
+ # @yieldparam [Result] result
+ # A result in the page.
+ #
+ # @return [Array<URI::HTTP>]
+ # The URLs of the results which match the given block.
+ #
+ # @example
# page.urls_of { |result| result.summary =~ /awesome pants/ }
#
def urls_of(&block)
results_with(&block).urls
end
#
- # Returns the summaries of the results that match the specified
- # _block_.
+ # Returns the summaries of the results that match the given block.
#
+ # @yield [result]
+ # The given block will be used to filter the results in the page.
+ #
+ # @yieldparam [Result] result
+ # A result in the page.
+ #
+ # @return [Array<String>]
+ # The summaries of the results which match the given block.
+ #
+ # @example
# page.summaries_of { |result| result.title =~ /what if/ }
#
def summaries_of(&block)
results_with(&block).summaries
end
#
- # Returns the cached URLs of the results that match the specified
- # _block_.
+ # Returns the Cached URLs of the results that match the given block.
#
+ # @yield [result]
+ # The given block will be used to filter the results in the page.
+ #
+ # @yieldparam [Result] result
+ # A result in the page.
+ #
+ # @return [Array<URI::HTTP>]
+ # The Cached URLs of the results which match the given block.
+ #
+ # @example
# page.cached_urls_of { |result| result.title =~ /howdy/ }
#
def cached_urls_of(&block)
results_with(&block).cached_urls
end
#
- # Returns the cached pages of the results that match the specified
- # _block_. If _options_ are given, they will be used in accessing
- # the cached pages.
+ # Returns the cached pages of the results that match the given block.
#
+ # @yield [result]
+ # The given block will be used to filter the results in the page.
+ #
+ # @yieldparam [Result] result
+ # A result in the page.
+ #
+ # @return [Array<Mechanize::Page>]
+ # The Cached Page of the results which match the given block.
+ #
+ # @example
# page.cached_pages_of { |result| result.title =~ /dude/ }
#
- def cached_pages_of(options={},&block)
- results_with(&block).cached_pages(options)
+ def cached_pages_of(&block)
+ results_with(&block).cached_pages
end
#
- # Returns the similar query URLs of the results that match the
- # specified _block_.
+ # Returns the Similar Query URLs of the results that match the given
+ # block.
#
+ # @yield [result]
+ # The given block will be used to filter the results in the page.
+ #
+ # @yieldparam [Result] result
+ # A result in the page.
+ #
+ # @return [Array<URI::HTTP>]
+ # The Similar Query URLs of the results which match the given block.
+ #
+ # @example
# page.similar_urls_of { |result| result.title =~ /what if/ }
#
def similar_urls_of(&block)
results_with(&block).similar_urls
end