lib/ids_please/grabbers/twitter.rb in ids_please-2.1.2 vs lib/ids_please/grabbers/twitter.rb in ids_please-2.2.0
- old
+ new
@@ -1,23 +1,29 @@
class IdsPlease
module Grabbers
class Twitter < IdsPlease::Grabbers::Base
def grab_link
- @page_source ||= open(link).read
- @network_id = @page_source.scan(/data-user-id="(\d+)"/).flatten.first
- @avatar = @page_source.scan(/ProfileAvatar-image " src="([^"]+)"/).flatten.first
- @display_name = @page_source.scan(/ProfileHeaderCard-nameLink[^>]+>([^<]+)</).flatten.first
- @username = @page_source.scan(/<title>[^\(]+\(@([^\)]+)\)/).flatten.first
+ @network_id = page_source.scan(/data-user-id="(\d+)"/).flatten.first
+ @avatar = page_source.scan(/ProfileAvatar-image " src="([^"]+)"/).flatten.first
+ @display_name = page_source.scan(/ProfileHeaderCard-nameLink[^>]+>([^<]+)</).flatten.first
+ @username = page_source.scan(/<title>[^\(]+\(@([^\)]+)\)/).flatten.first
@data = {}
{
- description: @page_source.scan(/ProfileHeaderCard-bio[^>]+>([^<]+)</).flatten.first.encode('utf-8'),
- location: @page_source.scan(/ProfileHeaderCard-locationText[^>]+>([^<]+)</).flatten.first.encode('utf-8'),
- join_date: @page_source.scan(/ProfileHeaderCard-joinDateText[^>]+>([^<]+)</).flatten.first.encode('utf-8'),
+ description: page_source.scan(/ProfileHeaderCard-bio[^>]+>([^<]+)</).flatten.first.to_s.encode('utf-8'),
+ location: page_source.scan(/ProfileHeaderCard-locationText[^>]+>([^<]+)</).flatten.first.to_s.encode('utf-8'),
+ join_date: page_source.scan(/ProfileHeaderCard-joinDateText[^>]+>([^<]+)</).flatten.first.to_s.encode('utf-8'),
}.each do |k, v|
next if v.nil? || v == ''
- @data[k] = CGI.unescapeHTML(v)
+ @data[k] = CGI.unescapeHTML(v).strip
end
+ @counts = {
+ tweets: page_source.scan(/statuses_count":(\d+),"/).flatten.first.to_i,
+ following: page_source.scan(/friends_count":(\d+),"/).flatten.first.to_i,
+ followers: page_source.scan(/followers_count":(\d+),"/).flatten.first.to_i,
+ favorites: page_source.scan(/favourites_count":(\d+),"/).flatten.first.to_i,
+ lists: page_source.scan(/listed_count":(\d+),"/).flatten.first.to_i,
+ }
self
rescue => e
p e
return self
end