Sha256: c9e42eafb82858e8f2b9680918e1d685675928515ff84e13f00e2f1698ab8bee

Contents?: true

Size: 1.47 KB

Versions: 5

Compression:

Stored size: 1.47 KB

Contents

require 'uri'

module Spidr
  #
  # The {Sanitizers} module adds methods to {Agent} which control the
  # sanitization of incoming links.
  #
  module Sanitizers
    def self.included(base)
      base.module_eval do
        # Specifies whether the Agent will strip URI fragments
        attr_accessor :strip_fragments

        # Specifies whether the Agent will strip URI queries
        attr_accessor :strip_query
      end
    end

    #
    # Initializes the sanitization rules.
    #
    # @param [Hash] options
    #   Additional options.
    #
    # @option options [Boolean] :strip_fragments (true)
    #   Specifies whether or not to strip the fragment component from URLs.
    #
    # @option options [Boolean] :strip_query (false)
    #   Specifies whether or not to strip the query component from URLs.
    #
    # @since 0.2.2
    #
    def initialize(options={})
      @strip_fragments = true
      
      if options.has_key?(:strip_fragments)
        @strip_fragments = options[:strip_fragments]
      end

      @strip_query = (options[:strip_query] || false)
    end

    #
    # Sanitizes a URL based on filtering options.
    #
    # @param [URI::HTTP, URI::HTTPS, String] url
    #   The URL to be sanitized
    #
    # @return [URI::HTTP, URI::HTTPS]
    #   The new sanitized URL.
    #
    # @since 0.2.2
    #
    def sanitize_url(url)
      url = URI(url.to_s)

      url.fragment = nil if @strip_fragments
      url.query = nil if @strip_query

      return url
    end
  end
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
spidr-0.2.7 lib/spidr/sanitizers.rb
spidr-0.2.6 lib/spidr/sanitizers.rb
spidr-0.2.5 lib/spidr/sanitizers.rb
spidr-0.2.4 lib/spidr/sanitizers.rb
spidr-0.2.3 lib/spidr/sanitizers.rb