Sha256: 17d1eef7af2f076560b1d4ea18b14f1958de1697512786ff765b5e046407fe47

Contents?: true

Size: 1.93 KB

Versions: 4

Compression:

Stored size: 1.93 KB

Contents

class Relevance::Tarantula::Link
  include Relevance::Tarantula
  
  class << self
    include ActionView::Helpers::UrlHelper
    # method_javascript_function needs this method
    def protect_against_forgery?
      false
    end
  end
  
  METHOD_REGEXPS = {}
  [:put, :delete, :post].each do |m|
    # remove submit from the end so we'll match with or without forgery protection
    s = method_javascript_function(m).gsub( /f.submit();/, "" )
    # don't just match this.href in case a different url was passed originally
    s = Regexp.escape(s).gsub( /this.href/, ".*" )
    METHOD_REGEXPS[m] = /#{s}/
  end
  
  attr_accessor :href, :crawler, :referrer
  
  def initialize(link, crawler, referrer)
    @crawler, @referrer = crawler, referrer
    
    if String === link || link.nil?
      @href = transform_url(link)
      @method = :get
    else # should be a tag
      @href = link['href'] ? transform_url(link['href'].downcase) : nil
      @tag = link
    end
  end
  
  def crawl
    response = crawler.follow(method, href)
    log "Response #{response.code} for #{self}"
    crawler.handle_link_results(self, make_result(response))
  end
  
  def make_result(response)
    crawler.make_result(:method    => method,
                        :url       => href,
                        :response  => response,
                        :referrer  => referrer)
  end
  
  def method
    @method ||= begin
      (@tag &&
       [:put, :delete, :post].detect do |m| # post should be last since it's least specific
         @tag['onclick'] =~ METHOD_REGEXPS[m]
       end) ||
      :get
    end
  end
  
  def transform_url(link)
    crawler.transform_url(link)
  end
  
  def ==(obj)
    obj.respond_to?(:href) && obj.respond_to?(:method) &&
      self.href.to_s == obj.href.to_s && self.method.to_s == obj.method.to_s
  end
  alias :eql? :==
  
  def hash
    to_s.hash
  end
  
  def to_s
    "<Relevance::Tarantula::Link href=#{href}, method=#{method}>"
  end
  
end

Version data entries

4 entries across 4 versions & 3 rubygems

Version Path
relevance-tarantula-0.3.2 lib/relevance/tarantula/link.rb
relevance-tarantula-0.3.3 lib/relevance/tarantula/link.rb
panmind-tarantula-0.3.3 lib/relevance/tarantula/link.rb
tarantula-0.3.3 lib/relevance/tarantula/link.rb