lib/twitterscraper/tweet.rb in twitterscraper-ruby-0.16.0 vs lib/twitterscraper/tweet.rb in twitterscraper-ruby-0.17.0

- old
+ new

@@ -4,10 +4,11 @@ class Tweet KEYS = [ :screen_name, :name, :user_id, + :profile_image_url, :tweet_id, :text, :links, :hashtags, :image_urls, @@ -49,10 +50,15 @@ tweet['created_at'] = Time.parse(tweet['created_at']) new(tweet) end end + # .js-stream-item + # .js-stream-tweet{data: {screen-name:, tweet-id:}} + # .stream-item-header + # .js-tweet-text-container + # .stream-item-footer def from_html(text) html = Nokogiri::HTML(text) from_tweets_html(html.xpath("//li[@class[contains(., 'js-stream-item')]]/div[@class[contains(., 'js-stream-tweet')]]")) end @@ -70,10 +76,12 @@ Twitterscraper.logger.warn "html doesn't include div.js-tweet-text-container url=https://twitter.com/#{screen_name}/status/#{tweet_id}" return nil end inner_html = Nokogiri::HTML(html.inner_html) + + profile_image_url = inner_html.xpath("//img[@class[contains(., 'js-action-profile-avatar')]]").first.attr('src').gsub(/_bigger/, '') text = inner_html.xpath("//div[@class[contains(., 'js-tweet-text-container')]]/p[@class[contains(., 'js-tweet-text')]]").first.text links = inner_html.xpath("//a[@class[contains(., 'twitter-timeline-link')]]").map { |elem| elem.attr('data-expanded-url') }.select { |link| link && !link.include?('pic.twitter') } image_urls = inner_html.xpath("//div[@class[contains(., 'AdaptiveMedia-photoContainer')]]").map { |elem| elem.attr('data-image-url') } video_url = inner_html.xpath("//div[@class[contains(., 'PlayableMedia-container')]]/a").map { |elem| elem.attr('href') }[0] has_media = !image_urls.empty? || (video_url && !video_url.empty?) @@ -97,9 +105,10 @@ timestamp = inner_html.xpath("//span[@class[contains(., 'js-short-timestamp')]]").first.attr('data-time').to_i new( screen_name: screen_name, name: html.attr('data-name'), user_id: html.attr('data-user-id').to_i, + profile_image_url: profile_image_url, tweet_id: tweet_id, text: text, links: links, hashtags: text.scan(/#\w+/).map { |tag| tag.delete_prefix('#') }, image_urls: image_urls,