# frozen_string_literal: true module Wayfarer # @!attribute [r] task # @return [Wayfarer::Task] the current task # @!attribute [r] uri # @return [Addressable::URI] Parsed task URL # @!attribute [r] user_agent # @return [Object] the user agent that retrieved the page # @!attribute [r] action # @return [Symbol, Object] action that the task URL was routed to. # @!attribute [r] params # @return [HashWithIndifferentAccess] path parameters collected from routes module Base extend ActiveSupport::Concern # @!method stage(urls) # Adds URLs to an internal staging set so that they get enqueued # eventually, once the job executed successfully. # @overload stage(urls) # @param urls [Array] URLs to add to the staging set. # @overload stage(url) # @param url [String] URL to add to the staging set. # @!method fetch(url, follow: 3) # @param url [String] URL to fetch using plain HTTP(S). # @param follow [Fixnum] Number of redirects to follow. # Retrieves the given URL to a {Page}. # @!method page(live: false) # @param url [live] whether to retrieve a new {Page}. # @return [Wayfarer::Page] # Returns the most recently retrieved page or a new page # for the current task URL if the `follow` keyword is passed. # @!scope class # @!attribute [r] route # @return [Wayfarer::Routing::DSL] # The job's {Wayfarer::Routing::DSL} that maps URLs to instance methods # or to a {Handler}. # @example Append a host route # route.host "examplxe.com", to: :index # @!method content_types(*content_types) # @param content_types [*Array] Content-Types to whitelist # Whitelists Content-Types. Once at least one Content-Type is set, only # those Content-Types will be processed. # @!group Callbacks # @!method before_fetch # @overload before_fetch(callback) # @param callback [Symbol] Instance method to call # @overload before_fetch(&block) # @yield [Wayfarer::Task] # Registers a callback that is called before the page is fetched. # If a symbol is passed, an instance method with the same name will be # called. # @example Accessing the user agent in {#before_fetch} # before_fetch do |task| # user_agent # => the user agent that will fetch the page # end # @!method around_fetch # @overload around_fetch(callback) # @param callback [Symbol] Instance method to call # @overload around_fetch(&block) # @yield [Wayfarer::Task] # Registers a callback that is called around the page getting fetched. # If a symbol is passed, an instance method with the same name will be # called. # @!method after_fetch # @overload after_fetch(callback) # @param callback [Symbol] Instance method to call # @overload after_fetch(&block) # @yield [Wayfarer::Task] # Registers a callback that is called after the page was fetched. # If a symbol is passed, an instance method with the same name will be # called. # @!method before_perform # @overload before_perform(callback) # @param callback [Symbol] Instance method to call # @overload before_perform(&block) # @yield [Wayfarer::Task] # Registers a callback that is called before the task is performed. # If a symbol is passed, an instance method with the same name will be # called. # @!method around_perform # @overload around_perform(callback) # @param callback [Symbol] Instance method to call # @overload around_perform(&block) # @yield [Wayfarer::Task] # Registers a callback that is called around the task getting performed. # If a symbol is passed, an instance method with the same name will be # called. # @!method after_perform # @overload after_perform(callback) # @param callback [Symbol] Instance method to call # @overload after_perform(&block) # @yield [Wayfarer::Task] # Registers a callback that is called after the task was performed. # If a symbol is passed, an instance method with the same name will be # called. # @!endgroup included do include Wayfarer::Middleware::Controller # Implement ActiveJob's #perform by calling into our own middleware chain alias_method :perform, :call # Middleware stack use Wayfarer::Middleware::Redis use Wayfarer::Middleware::BatchCompletion use Wayfarer::Middleware::UriParser use Wayfarer::Middleware::Normalize use Wayfarer::Middleware::Dedup use Wayfarer::Middleware::Stage use Wayfarer::Middleware::Router use Wayfarer::Middleware::UserAgent use Wayfarer::Middleware::ContentType use Wayfarer::Middleware::Dispatch end class_methods do def crawl(url, batch: SecureRandom.uuid) Task.new(url, batch).tap do |task| perform_later(task) end end end end end