require "nokogiri"
require "open-uri"
module Feed2Gram
Media = Struct.new(:media_type, :url, :cover_url, keyword_init: true) do
def video?
media_type == "VIDEO"
end
end
Post = Struct.new(:media_type, :url, :medias, :caption, keyword_init: true)
class ParsesEntries
def parse(feed_url)
feed = Nokogiri::XML(URI.parse(feed_url).open)
feed.xpath("//*:entry").map { |entry|
html = Nokogiri::HTML(entry.xpath("*:content[1]").text)
medias = html.xpath("//figure[1]/img").map { |img|
Media.new(
media_type: (img["data-media-type"] || "image").upcase,
url: img["src"],
cover_url: img["data-cover-url"]
)
}
Post.new(
media_type: determine_post_media_type(html, medias),
url: entry.xpath("*:id[1]").text,
medias: medias,
caption: html.xpath("//figure[1]/figcaption").text.strip
)
}.select { |post|
if post.medias.empty?
warn "Skipping post with no tag: #{post.url}"
elsif ["STORIES", "REELS"].include?(post.media_type) && post.medias.size > 1
warn "Skipping #{post.media_type.downcase} with more than one tag (only one allowed): #{post.url}"
else
true
end
}
end
private
def determine_post_media_type(html, medias)
post_type = html.at("//figure[1]")["data-post-type"]&.upcase
if ["STORIES", "REELS"].include?(post_type)
post_type
elsif medias.size > 1
"CAROUSEL"
elsif medias.first.media_type == "VIDEO"
# The VIDEO value for media_type is deprecated outside carousel items. Use the REELS media type to publish a video to your Instagram feed. Please visit https://developers.facebook.com/docs/instagram-api/reference/ig-user/media#creating to publish a video.
"REELS"
else
"IMAGE"
end
end
end
end