Sha256: 4faa493b2a571ad2eeb73444aebad52b07771a82ee13c2e1803c28ba24dee743

Contents?: true

Size: 1.39 KB

Versions: 8

Compression:

Stored size: 1.39 KB

Contents

module VoightKampff
  class Test
    CRAWLERS_FILENAME = 'crawler-user-agents.json'

    attr_accessor :user_agent_string

    def initialize(user_agent_string)
      @user_agent_string = user_agent_string
    end

    def agent
      @agent ||= matching_crawler || {}
    end

    def human?
      agent.empty?
    end

    def bot?
      !human?
    end
    alias :replicant? :bot?

    private

    def lookup_paths
      # These paths should be orderd by priority
      base_paths = []
      base_paths << Rails.root if defined? Rails
      base_paths << VoightKampff.root

      base_paths.map { |p| p.join('config', CRAWLERS_FILENAME) }
    end

    def preferred_path
      lookup_paths.find { |path| File.exists? path }
    end

    def matching_crawler
      if match = crawler_regexp.match(@user_agent_string)
        index = match.names.first.sub(/match/, '').to_i
        crawlers[index]
      end
    end

    def crawler_regexp
      @@crawler_regexp ||= begin
        # NOTE: This is admittedly a bit convoluted but the performance gains make it worthwhile
        index = -1
        crawler_patterns = crawlers.map{|c| index += 1; "(?<match#{index}>#{c["pattern"]})" }.join("|")
        crawler_patterns = "(#{crawler_patterns})"
        Regexp.new(crawler_patterns, Regexp::IGNORECASE)
      end
    end

    def crawlers
      @@crawlers ||= JSON.load(File.open(preferred_path, 'r'))
    end
  end
end

Version data entries

8 entries across 8 versions & 1 rubygems

Version Path
voight_kampff-1.1.4 lib/voight_kampff/test.rb
voight_kampff-1.1.3 lib/voight_kampff/test.rb
voight_kampff-1.1.2 lib/voight_kampff/test.rb
voight_kampff-1.1.1 lib/voight_kampff/test.rb
voight_kampff-1.1.0 lib/voight_kampff/test.rb
voight_kampff-1.0.4 lib/voight_kampff/test.rb
voight_kampff-1.0.3 lib/voight_kampff/test.rb
voight_kampff-1.0.2 lib/voight_kampff/test.rb