lib/woothee/crawler.rb in woothee-0.3.0 vs lib/woothee/crawler.rb in woothee-0.3.2

- old
+ new

@@ -43,20 +43,20 @@ false end def self.challenge_crawlers(ua, result) - if ua.index('Yahoo') or ua.index('listing.yahoo.co.jp/support/faq/') - if ua.index('compatible; Yahoo! Slurp;') + if ua.index('Yahoo') or ua.index('help.yahoo.co.jp/help/jp/') or ua.index('listing.yahoo.co.jp/support/faq/') + if ua.index('compatible; Yahoo! Slurp') update_map(result, Woothee::DataSet.get('YahooSlurp')) return true end if ua.index('YahooFeedSeekerJp') or ua.index('YahooFeedSeekerBetaJp') update_map(result, Woothee::DataSet.get('YahooJP')) return true end - if ua.index('crawler (http://listing.yahoo.co.jp/support/faq/') + if ua.index('crawler (http://listing.yahoo.co.jp/support/faq/') || ua.index('crawler (http://help.yahoo.co.jp/help/jp/') update_map(result, Woothee::DataSet.get('YahooJP')) return true end if ua.index('Yahoo Pipes') update_map(result, Woothee::DataSet.get('YahooPipes')) @@ -93,29 +93,53 @@ update_map(result, Woothee::DataSet.get('facebook')) return true end if ua.index('ichiro') if ua.index('http://help.goo.ne.jp/door/crawler.html') or ua.index('compatible; ichiro/mobile goo;') - update_map(result, Woothee::DataSet.get('gooIchiro')) + update_map(result, Woothee::DataSet.get('goo')) return true end end + if ua.index('gooblogsearch/') + update_map(result, Woothee::DataSet.get('goo')) + return true + end if ua.index('Apple-PubSub') update_map(result, Woothee::DataSet.get('ApplePubSub')) return true end + if ua.index("(www.radian6.com/crawler)") + update_map(result, Woothee::DataSet.get("radian6")) + return true + end + if ua.index('Genieo/') + update_map(result, Woothee::DataSet.get("Genieo")) + return true + end + if ua.index("labs.topsy.com/butterfly/") + update_map(result, Woothee::DataSet.get("topsyButterfly")) + return true + end + if ua.index("rogerbot/1.0 (http://www.seomoz.org/dp/rogerbot") + update_map(result, Woothee::DataSet.get("rogerbot")) + return true + end + if ua.index("compatible; AhrefsBot/") + update_map(result, Woothee::DataSet.get("AhrefsBot")) + return true + end if ua.index('livedoor FeedFetcher') or ua.index('Fastladder FeedFetcher') update_map(result, Woothee::DataSet.get('livedoorFeedFetcher')) return true end if ua.index('Hatena ') if ua.index('Hatena Antenna') or ua.index('Hatena Pagetitle Agent') or ua.index('Hatena Diary RSS') update_map(result, Woothee::DataSet.get('Hatena')) return true end end - if ua.index('mixi-check') or ua.index('mixi-news-crawler') + if ua.index('mixi-check') or ua.index('mixi-crawler') or ua.index('mixi-news-crawler') update_map(result, Woothee::DataSet.get('mixi')) return true end if ua.index('Indy Library') if ua.index('compatible; Indy Library') @@ -126,10 +150,10 @@ false end def self.challenge_maybe_crawler(ua, result) - if ua =~ /bot(?:[-_ .\/;@()]|$)/oi + if ua =~ /(bot|crawler|spider)(?:[-_ .\/;@()]|$)/oi update_map(result, Woothee::DataSet.get('VariousCrawler')) return true end if ua =~ /(?:Rome Client |UnwindFetchor\/|ia_archiver |Summify |PostRank\/)/o or ua.index('ASP-Ranker Feed Crawler')