lib/twitterscraper/tweet.rb in twitterscraper-ruby-0.11.0 vs lib/twitterscraper/tweet.rb in twitterscraper-ruby-0.12.0
- old
+ new
@@ -57,16 +57,23 @@
end
def from_tweets_html(html)
html.map do |tweet|
from_tweet_html(tweet)
- end
+ end.compact
end
def from_tweet_html(html)
+ screen_name = html.attr('data-screen-name')
+ tweet_id = html.attr('data-tweet-id')&.to_i
+
+ unless html.to_s.include?('js-tweet-text-container')
+ Twitterscraper.logger.warn "html doesn't include div.js-tweet-text-container url=https://twitter.com/#{screen_name}/status/#{tweet_id}"
+ return nil
+ end
+
inner_html = Nokogiri::HTML(html.inner_html)
- tweet_id = html.attr('data-tweet-id').to_i
text = inner_html.xpath("//div[@class[contains(., 'js-tweet-text-container')]]/p[@class[contains(., 'js-tweet-text')]]").first.text
links = inner_html.xpath("//a[@class[contains(., 'twitter-timeline-link')]]").map { |elem| elem.attr('data-expanded-url') }.select { |link| link && !link.include?('pic.twitter') }
image_urls = inner_html.xpath("//div[@class[contains(., 'AdaptiveMedia-photoContainer')]]").map { |elem| elem.attr('data-image-url') }
video_url = inner_html.xpath("//div[@class[contains(., 'PlayableMedia-container')]]/a").map { |elem| elem.attr('href') }[0]
has_media = !image_urls.empty? || (video_url && !video_url.empty?)
@@ -87,10 +94,10 @@
reply_to_users = inner_html.xpath("//div[@class[contains(., 'ReplyingToContextBelowAuthor')]]/a").map { |user| {screen_name: user.text.delete_prefix('@'), user_id: user.attr('data-user-id')} }
end
timestamp = inner_html.xpath("//span[@class[contains(., 'js-short-timestamp')]]").first.attr('data-time').to_i
new(
- screen_name: html.attr('data-screen-name'),
+ screen_name: screen_name,
name: html.attr('data-name'),
user_id: html.attr('data-user-id').to_i,
tweet_id: tweet_id,
text: text,
links: links,