lib/woothee/crawler.rb in woothee-0.3.0 vs lib/woothee/crawler.rb in woothee-0.3.2
- old
+ new
@@ -43,20 +43,20 @@
false
end
def self.challenge_crawlers(ua, result)
- if ua.index('Yahoo') or ua.index('listing.yahoo.co.jp/support/faq/')
- if ua.index('compatible; Yahoo! Slurp;')
+ if ua.index('Yahoo') or ua.index('help.yahoo.co.jp/help/jp/') or ua.index('listing.yahoo.co.jp/support/faq/')
+ if ua.index('compatible; Yahoo! Slurp')
update_map(result, Woothee::DataSet.get('YahooSlurp'))
return true
end
if ua.index('YahooFeedSeekerJp') or ua.index('YahooFeedSeekerBetaJp')
update_map(result, Woothee::DataSet.get('YahooJP'))
return true
end
- if ua.index('crawler (http://listing.yahoo.co.jp/support/faq/')
+ if ua.index('crawler (http://listing.yahoo.co.jp/support/faq/') || ua.index('crawler (http://help.yahoo.co.jp/help/jp/')
update_map(result, Woothee::DataSet.get('YahooJP'))
return true
end
if ua.index('Yahoo Pipes')
update_map(result, Woothee::DataSet.get('YahooPipes'))
@@ -93,29 +93,53 @@
update_map(result, Woothee::DataSet.get('facebook'))
return true
end
if ua.index('ichiro')
if ua.index('http://help.goo.ne.jp/door/crawler.html') or ua.index('compatible; ichiro/mobile goo;')
- update_map(result, Woothee::DataSet.get('gooIchiro'))
+ update_map(result, Woothee::DataSet.get('goo'))
return true
end
end
+ if ua.index('gooblogsearch/')
+ update_map(result, Woothee::DataSet.get('goo'))
+ return true
+ end
if ua.index('Apple-PubSub')
update_map(result, Woothee::DataSet.get('ApplePubSub'))
return true
end
+ if ua.index("(www.radian6.com/crawler)")
+ update_map(result, Woothee::DataSet.get("radian6"))
+ return true
+ end
+ if ua.index('Genieo/')
+ update_map(result, Woothee::DataSet.get("Genieo"))
+ return true
+ end
+ if ua.index("labs.topsy.com/butterfly/")
+ update_map(result, Woothee::DataSet.get("topsyButterfly"))
+ return true
+ end
+ if ua.index("rogerbot/1.0 (http://www.seomoz.org/dp/rogerbot")
+ update_map(result, Woothee::DataSet.get("rogerbot"))
+ return true
+ end
+ if ua.index("compatible; AhrefsBot/")
+ update_map(result, Woothee::DataSet.get("AhrefsBot"))
+ return true
+ end
if ua.index('livedoor FeedFetcher') or ua.index('Fastladder FeedFetcher')
update_map(result, Woothee::DataSet.get('livedoorFeedFetcher'))
return true
end
if ua.index('Hatena ')
if ua.index('Hatena Antenna') or ua.index('Hatena Pagetitle Agent') or ua.index('Hatena Diary RSS')
update_map(result, Woothee::DataSet.get('Hatena'))
return true
end
end
- if ua.index('mixi-check') or ua.index('mixi-news-crawler')
+ if ua.index('mixi-check') or ua.index('mixi-crawler') or ua.index('mixi-news-crawler')
update_map(result, Woothee::DataSet.get('mixi'))
return true
end
if ua.index('Indy Library')
if ua.index('compatible; Indy Library')
@@ -126,10 +150,10 @@
false
end
def self.challenge_maybe_crawler(ua, result)
- if ua =~ /bot(?:[-_ .\/;@()]|$)/oi
+ if ua =~ /(bot|crawler|spider)(?:[-_ .\/;@()]|$)/oi
update_map(result, Woothee::DataSet.get('VariousCrawler'))
return true
end
if ua =~ /(?:Rome Client |UnwindFetchor\/|ia_archiver |Summify |PostRank\/)/o or ua.index('ASP-Ranker Feed Crawler')