lib/digger/page.rb in digger-0.1.6 vs lib/digger/page.rb in digger-0.1.7

- old
+ new

@@ -26,20 +26,16 @@ # Response time of the request for this page in milliseconds attr_accessor :response_time # OpenStruct it holds users defined data attr_accessor :user_data - attr_accessor :aliases + attr_accessor :aliases, :domain_aliases, :fetched_at - attr_accessor :domain_aliases - # Whether the current page should be stored # Default: true attr_accessor :storable - attr_accessor :fetched_at - # # Create a new page # def initialize(url, params = {}) @url = URI(url) @@ -59,11 +55,11 @@ @storable = true @fetched_at = params[:fetched_at] end def title - doc.title if doc + doc&.title end # # Array of distinct A tag HREFs from the page # @@ -73,10 +69,11 @@ return [] unless doc doc.search('//a[@href]').each do |a| u = a['href'] next if u.nil? || u.empty? + abs = to_absolute(u) rescue next @links << abs if abs && in_domain?(abs) end end @links.to_a @@ -99,11 +96,11 @@ def json @json ||= JSON.parse body end def jsonp - @jsonp ||= JSON.parse body.match(/^[^\(]+?\((.+)\)[^\)]*$/)[1] + @jsonp ||= JSON.parse body.match(/^[^(]+?\((.+)\)[^)]*$/)[1] end # # Discard links, a next call of page.links will return an empty array # @@ -161,11 +158,11 @@ # # Returns +true+ if the page was not found (returned 404 code), # returns +false+ otherwise. # def not_found? - 404 == @code + @code == 404 end # # Base URI from the HTML doc head element # http://www.w3.org/TR/html4/struct/links.html#edef-BASE @@ -175,10 +172,11 @@ href = doc.search('//head/base/@href') URI(href.to_s) unless href.nil? rescue nil end unless @base return nil if @base && @base.to_s.empty? + @base end # # Converts relative URL *link* into an absolute URL based on the @@ -243,9 +241,10 @@ @storable end def expired?(ttl) return false if fetched_at.nil? + (Time.now.to_i - ttl) > fetched_at end def self.from_hash(hash) page = new(URI(hash['url'])) \ No newline at end of file