Sha256: 4faa493b2a571ad2eeb73444aebad52b07771a82ee13c2e1803c28ba24dee743
Contents?: true
Size: 1.39 KB
Versions: 8
Compression:
Stored size: 1.39 KB
Contents
module VoightKampff class Test CRAWLERS_FILENAME = 'crawler-user-agents.json' attr_accessor :user_agent_string def initialize(user_agent_string) @user_agent_string = user_agent_string end def agent @agent ||= matching_crawler || {} end def human? agent.empty? end def bot? !human? end alias :replicant? :bot? private def lookup_paths # These paths should be orderd by priority base_paths = [] base_paths << Rails.root if defined? Rails base_paths << VoightKampff.root base_paths.map { |p| p.join('config', CRAWLERS_FILENAME) } end def preferred_path lookup_paths.find { |path| File.exists? path } end def matching_crawler if match = crawler_regexp.match(@user_agent_string) index = match.names.first.sub(/match/, '').to_i crawlers[index] end end def crawler_regexp @@crawler_regexp ||= begin # NOTE: This is admittedly a bit convoluted but the performance gains make it worthwhile index = -1 crawler_patterns = crawlers.map{|c| index += 1; "(?<match#{index}>#{c["pattern"]})" }.join("|") crawler_patterns = "(#{crawler_patterns})" Regexp.new(crawler_patterns, Regexp::IGNORECASE) end end def crawlers @@crawlers ||= JSON.load(File.open(preferred_path, 'r')) end end end
Version data entries
8 entries across 8 versions & 1 rubygems