lib/feed2gram/parses_entries.rb in feed2gram-1.0.0 vs lib/feed2gram/parses_entries.rb in feed2gram-1.1.0
- old
+ new
@@ -1,21 +1,57 @@
require "nokogiri"
require "open-uri"
module Feed2Gram
- Post = Struct.new(:url, :images, :caption, keyword_init: true)
+ Media = Struct.new(:media_type, :url, keyword_init: true) do
+ def video?
+ media_type == "VIDEO"
+ end
+ end
+ Post = Struct.new(:media_type, :url, :medias, :caption, keyword_init: true)
class ParsesEntries
def parse(feed_url)
feed = Nokogiri::XML(URI.parse(feed_url).open)
feed.xpath("//*:entry").map { |entry|
html = Nokogiri::HTML(entry.xpath("*:content[1]").text)
+ medias = html.xpath("//figure[1]/img").map { |img|
+ Media.new(
+ media_type: (img["data-media-type"] || "image").upcase,
+ url: img["src"]
+ )
+ }
Post.new(
+ media_type: determine_post_media_type(html, medias),
url: entry.xpath("*:id[1]").text,
- images: html.xpath("//figure[1]/img").map { |img| img["src"] },
+ medias: medias,
caption: html.xpath("//figure[1]/figcaption").text.strip
)
- }.reject { |post| post.images.empty? }
+ }.select { |post|
+ if post.medias.empty?
+ warn "Skipping post with no <img> tag: #{post.url}"
+ elsif ["STORIES", "REELS"].include?(post.media_type) && post.medias.size > 1
+ warn "Skipping #{post.media_type.downcase} with more than one <img> tag (only one allowed): #{post.url}"
+ else
+ true
+ end
+ }
+ end
+
+ private
+
+ def determine_post_media_type(html, medias)
+ post_type = html.at("//figure[1]")["data-post-type"]&.upcase
+ if ["STORIES", "REELS"].include?(post_type)
+ post_type
+ elsif medias.size > 1
+ "CAROUSEL"
+ elsif medias.first.media_type == "VIDEO"
+ # The VIDEO value for media_type is deprecated outside carousel items. Use the REELS media type to publish a video to your Instagram feed. Please visit https://developers.facebook.com/docs/instagram-api/reference/ig-user/media#creating to publish a video.
+ "REELS"
+ else
+ "IMAGE"
+ end
end
end
end