lib/digger/page.rb in digger-0.1.6 vs lib/digger/page.rb in digger-0.1.7
- old
+ new
@@ -26,20 +26,16 @@
# Response time of the request for this page in milliseconds
attr_accessor :response_time
# OpenStruct it holds users defined data
attr_accessor :user_data
- attr_accessor :aliases
+ attr_accessor :aliases, :domain_aliases, :fetched_at
- attr_accessor :domain_aliases
-
# Whether the current page should be stored
# Default: true
attr_accessor :storable
- attr_accessor :fetched_at
-
#
# Create a new page
#
def initialize(url, params = {})
@url = URI(url)
@@ -59,11 +55,11 @@
@storable = true
@fetched_at = params[:fetched_at]
end
def title
- doc.title if doc
+ doc&.title
end
#
# Array of distinct A tag HREFs from the page
#
@@ -73,10 +69,11 @@
return [] unless doc
doc.search('//a[@href]').each do |a|
u = a['href']
next if u.nil? || u.empty?
+
abs = to_absolute(u) rescue next
@links << abs if abs && in_domain?(abs)
end
end
@links.to_a
@@ -99,11 +96,11 @@
def json
@json ||= JSON.parse body
end
def jsonp
- @jsonp ||= JSON.parse body.match(/^[^\(]+?\((.+)\)[^\)]*$/)[1]
+ @jsonp ||= JSON.parse body.match(/^[^(]+?\((.+)\)[^)]*$/)[1]
end
#
# Discard links, a next call of page.links will return an empty array
#
@@ -161,11 +158,11 @@
#
# Returns +true+ if the page was not found (returned 404 code),
# returns +false+ otherwise.
#
def not_found?
- 404 == @code
+ @code == 404
end
#
# Base URI from the HTML doc head element
# http://www.w3.org/TR/html4/struct/links.html#edef-BASE
@@ -175,10 +172,11 @@
href = doc.search('//head/base/@href')
URI(href.to_s) unless href.nil? rescue nil
end unless @base
return nil if @base && @base.to_s.empty?
+
@base
end
#
# Converts relative URL *link* into an absolute URL based on the
@@ -243,9 +241,10 @@
@storable
end
def expired?(ttl)
return false if fetched_at.nil?
+
(Time.now.to_i - ttl) > fetched_at
end
def self.from_hash(hash)
page = new(URI(hash['url']))
\ No newline at end of file