lib/feed_tools/feed_item.rb in feedtools-0.2.26 vs lib/feed_tools/feed_item.rb in feedtools-0.2.27

- old
+ new

@@ -35,11 +35,25 @@ @xml_document = nil @root_node = nil @title = nil @id = nil @time = Time.now.gmtime + @version = FeedTools::FEED_TOOLS_VERSION::STRING end + + # Breaks any references that the feed entry may be keeping around, thus + # making the job of the garbage collector much, much easier. Call this + # method prior to feed entries going out of scope to prevent memory leaks. + def dispose() + @feed_data = nil + @feed_data_type = nil + @xml_document = nil + @root_node = nil + @title = nil + @id = nil + @time = nil + end # Returns the parent feed of this feed item # Warning, this method may be slow if you have a # large number of FeedTools::Feed objects. Can't # use a direct reference to the parent because it plays @@ -67,10 +81,62 @@ end end return parent_feed end + # Does a full parse of the feed item. + def full_parse + self.configurations + + self.encoding + self.xml_document + self.root_node + + self.feed_type + self.feed_version + + self.id + self.title + self.content + self.summary + self.links + self.link + self.comments + self.time + self.updated + self.published + self.source + self.categories + self.tags + self.images + self.rights + self.author + self.publisher + + self.itunes_summary + self.itunes_subtitle + self.itunes_image_link + self.itunes_author + self.itunes_duration + + self.media_text + self.media_thumbnail_link + + self.explicit? + end + + # Returns a duplicate object suitable for serialization + def serializable + self.full_parse() + feed_item_to_dump = self.dup + feed_item_to_dump.author + feed_item_to_dump.publisher + feed_item_to_dump.instance_variable_set("@xml_document", nil) + feed_item_to_dump.instance_variable_set("@root_node", nil) + return feed_item_to_dump + end + # Returns the load options for this feed. def configurations if @configurations.blank? parent_feed = self.feed if parent_feed != nil @@ -88,11 +154,16 @@ end # Returns the feed item's encoding. def encoding if @encoding.nil? - @encoding = self.feed.encoding + parent_feed = self.feed + if parent_feed != nil + @encoding = parent_feed.encoding + else + @encoding = nil + end end return @encoding end # Returns the feed item's raw data. @@ -112,39 +183,44 @@ end # Sets the feed item's data type. def feed_data_type=(new_feed_data_type) @feed_data_type = new_feed_data_type + if self.feed_data_type != :xml + @xml_document = nil + end end # Returns a REXML Document of the feed_data def xml_document - if self.feed_data_type != :xml - @xml_document = nil - else - if @xml_document.nil? + if @xml_document.nil? + return nil if self.feed_data.blank? + if self.feed_data_type != :xml + @xml_document = nil + else # TODO: :ignore_whitespace_nodes => :all # Add that? # ====================================== @xml_document = REXML::Document.new(self.feed_data) end end return @xml_document end - # Returns the first node within the root_node that matches the xpath query. + # Returns the first node within the root_node that matches the xpath + # query. def find_node(xpath, select_result_value=false) - if feed.feed_data_type != :xml + if self.feed_data_type != :xml raise "The feed data type is not xml." end return FeedTools::XmlHelper.try_xpaths(self.root_node, [xpath], :select_result_value => select_result_value) end # Returns all nodes within the root_node that match the xpath query. def find_all_nodes(xpath, select_result_value=false) - if feed.feed_data_type != :xml + if self.feed_data_type != :xml raise "The feed data type is not xml." end return FeedTools::XmlHelper.try_xpaths_all(self.root_node, [xpath], :select_result_value => select_result_value) end @@ -189,10 +265,14 @@ # Returns the feed items's unique id def id if @id.nil? @id = FeedTools::XmlHelper.try_xpaths(self.root_node, [ + "atom10:id/@gr:original-id", + "atom03:id/@gr:original-id", + "atom:id/@gr:original-id", + "id/@gr:original-id", "atom10:id/text()", "atom03:id/text()", "atom:id/text()", "id/text()", "guid/text()" @@ -213,14 +293,15 @@ title_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [ "atom10:title", "atom03:title", "atom:title", "title", - "dc:title" + "dc:title", + "headline" ]) @title = FeedTools::HtmlHelper.process_text_construct(title_node, - self.feed_type, self.feed_version) + self.feed_type, self.feed_version, [self.base_uri]) if self.feed_type == "atom" || self.configurations[:always_strip_wrapper_elements] @title = FeedTools::HtmlHelper.strip_wrapper_element(@title) end if !@title.blank? && self.configurations[:strip_comment_count] @@ -249,10 +330,11 @@ repair_entities = false content_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [ "atom10:content", "atom03:content", "atom:content", + "body/datacontent", "xhtml:body", "body", "xhtml:div", "div", "p:payload", @@ -271,22 +353,22 @@ "abstract", "blurb", "info" ]) @content = FeedTools::HtmlHelper.process_text_construct(content_node, - self.feed_type, self.feed_version) + self.feed_type, self.feed_version, [self.base_uri]) if self.feed_type == "atom" || self.configurations[:always_strip_wrapper_elements] @content = FeedTools::HtmlHelper.strip_wrapper_element(@content) end - if @content.blank? + if @content.nil? @content = self.media_text end - if @content.blank? + if @content.nil? @content = self.itunes_summary end - if @content.blank? + if @content.nil? @content = self.itunes_subtitle end end return @content end @@ -321,14 +403,15 @@ "encoded", "atom10:content", "atom03:content", "atom:content", "content", - "info" + "info", + "body/datacontent" ]) @summary = FeedTools::HtmlHelper.process_text_construct(summary_node, - self.feed_type, self.feed_version) + self.feed_type, self.feed_version, [self.base_uri]) if self.feed_type == "atom" || self.configurations[:always_strip_wrapper_elements] @summary = FeedTools::HtmlHelper.strip_wrapper_element(@summary) end if @summary.blank? @@ -347,147 +430,10 @@ # Sets the feed item summary def summary=(new_summary) @summary = new_summary end - # Returns the contents of the itunes:summary element - def itunes_summary - if @itunes_summary.nil? - @itunes_summary = FeedTools::XmlHelper.try_xpaths(self.root_node, [ - "itunes:summary/text()" - ], :select_result_value => true) - unless @itunes_summary.blank? - @itunes_summary = FeedTools::HtmlHelper.unescape_entities(@itunes_summary) - @itunes_summary = FeedTools::HtmlHelper.sanitize_html(@itunes_summary) - @itunes_summary.strip! - else - @itunes_summary = nil - end - end - return @itunes_summary - end - - # Sets the contents of the itunes:summary element - def itunes_summary=(new_itunes_summary) - @itunes_summary = new_itunes_summary - end - - # Returns the contents of the itunes:subtitle element - def itunes_subtitle - if @itunes_subtitle.nil? - @itunes_subtitle = FeedTools::XmlHelper.try_xpaths(self.root_node, [ - "itunes:subtitle/text()" - ], :select_result_value => true) - unless @itunes_subtitle.blank? - @itunes_subtitle = FeedTools::HtmlHelper.unescape_entities(@itunes_subtitle) - @itunes_subtitle = FeedTools::HtmlHelper.sanitize_html(@itunes_subtitle) - @itunes_subtitle.strip! - else - @itunes_subtitle = nil - end - end - return @itunes_subtitle - end - - # Sets the contents of the itunes:subtitle element - def itunes_subtitle=(new_itunes_subtitle) - @itunes_subtitle = new_itunes_subtitle - end - - # Returns the contents of the media:text element - def media_text - if @media_text.nil? - @media_text = FeedTools::XmlHelper.try_xpaths(self.root_node, [ - "media:text/text()" - ], :select_result_value => true) - unless @media_text.blank? - @media_text = FeedTools::HtmlHelper.unescape_entities(@media_text) - @media_text = FeedTools::HtmlHelper.sanitize_html(@media_text) - @media_text.strip! - else - @media_text = nil - end - end - return @media_text - end - - # Sets the contents of the media:text element - def media_text=(new_media_text) - @media_text = new_media_text - end - - # Returns the feed item link - def link - if @link.nil? - max_score = 0 - for link_object in self.links.reverse - score = 0 - if FeedTools::HtmlHelper.html_type?(link_object.type) - score = score + 2 - elsif link_object.type != nil - score = score - 1 - end - if FeedTools::HtmlHelper.xml_type?(link_object.type) - score = score + 1 - end - if link_object.rel == "alternate" - score = score + 1 - end - if link_object.rel == "self" - score = score - 1 - end - if score >= max_score - max_score = score - @link = link_object.href - end - end - if @link.blank? - @link = FeedTools::XmlHelper.try_xpaths(self.root_node, [ - "@href", - "@rdf:about", - "@about" - ], :select_result_value => true) - end - if @link.blank? - if FeedTools::UriHelper.is_uri?(self.id) && - (self.id =~ /^http/) - @link = self.id - end - end - if !@link.blank? - @link = FeedTools::HtmlHelper.unescape_entities(@link) - end - @link = self.comments if @link.blank? - @link = nil if @link.blank? - begin - if !(@link =~ /^file:/) && - !FeedTools::UriHelper.is_uri?(@link) - stored_base_uri = - FeedTools::GenericHelper.recursion_trap(:feed_link) do - self.feed.base_uri if self.feed != nil - end - root_base_uri = nil - unless self.root_node.nil? - root_base_uri = self.root_node.base_uri - end - @link = FeedTools::UriHelper.resolve_relative_uri( - @link, [root_base_uri,stored_base_uri]) - end - rescue - end - if self.configurations[:url_normalization_enabled] - @link = FeedTools::UriHelper.normalize_url(@link) - end - end - return @link - end - - # Sets the feed item link - def link=(new_link) - @link = new_link - end - # Returns the links collection def links if @links.nil? @links = [] link_nodes = @@ -505,21 +451,22 @@ link_object.href = FeedTools::XmlHelper.try_xpaths(link_node, [ "@atom10:href", "@atom03:href", "@atom:href", "@href", + "@url", "text()" ], :select_result_value => true) if link_object.href.nil? && link_node.base_uri != nil link_object.href = "" end begin if !(link_object.href =~ /^file:/) && !FeedTools::UriHelper.is_uri?(link_object.href) stored_base_uri = FeedTools::GenericHelper.recursion_trap(:feed_link) do - self.feed.base_uri if self.feed != nil + self.base_uri if self.feed != nil end link_object.href = FeedTools::UriHelper.resolve_relative_uri( link_object.href, [link_node.base_uri, stored_base_uri]) end @@ -586,39 +533,237 @@ link_object.length = nil end end @links << link_object end + if @links.empty? && self.enclosures.size > 0 + # If there's seriously nothing to link to, but there's enclosures + # available, then add a link to the first one. + enclosure_link = self.enclosures[0] + link_object = FeedTools::Link.new + link_object.href = enclosure_link.url + link_object.type = enclosure_link.type + @links << link_object + end end return @links end # Sets the links collection def links=(new_links) @links = new_links end - + + # Returns the feed item link + def link + if @link.nil? + max_score = 0 + for link_object in self.links.reverse + score = 0 + if FeedTools::HtmlHelper.html_type?(link_object.type) + score = score + 2 + elsif link_object.type != nil + score = score - 1 + end + if FeedTools::HtmlHelper.xml_type?(link_object.type) + score = score + 1 + end + if link_object.type =~ /^video/ && self.links.size == 1 + score = score + 1 + elsif link_object.type =~ /^audio/ && self.links.size == 1 + score = score + 1 + end + if link_object.rel == "alternate" + score = score + 1 + end + if link_object.rel == "self" + score = score - 1 + end + if score >= max_score + max_score = score + @link = link_object.href + end + end + if @link.blank? + @link = FeedTools::XmlHelper.try_xpaths(self.root_node, [ + "@href", + "@rdf:about", + "@about" + ], :select_result_value => true) + end + if @link.blank? + if FeedTools::UriHelper.is_uri?(self.id) && + (self.id =~ /^http/) + @link = self.id + end + end + if !@link.blank? + @link = FeedTools::HtmlHelper.unescape_entities(@link) + end + @link = self.comments if @link.blank? + @link = nil if @link.blank? + begin + if !(@link =~ /^file:/) && + !FeedTools::UriHelper.is_uri?(@link) + stored_base_uri = + FeedTools::GenericHelper.recursion_trap(:feed_link) do + self.base_uri if self.feed != nil + end + root_base_uri = nil + unless self.root_node.nil? + root_base_uri = self.root_node.base_uri + end + @link = FeedTools::UriHelper.resolve_relative_uri( + @link, [root_base_uri,stored_base_uri]) + end + rescue + end + if self.configurations[:url_normalization_enabled] + @link = FeedTools::UriHelper.normalize_url(@link) + end + end + return @link + end + + # Sets the feed item link + def link=(new_link) + @link = new_link + end + + # Returns the parent feed's base_uri if any. + def base_uri + parent_feed = self.feed + if parent_feed != nil + return parent_feed.base_uri + else + return nil + end + end + + # Returns the url for posting comments + def comments + if @comments.nil? + @comments = FeedTools::XmlHelper.try_xpaths( + self.root_node, ["comments/text()"], + :select_result_value => true) + begin + if !(@comments =~ /^file:/) && + !FeedTools::UriHelper.is_uri?(@comments) + root_base_uri = nil + unless self.root_node.nil? + root_base_uri = self.root_node.base_uri + end + @comments = FeedTools::UriHelper.resolve_relative_uri( + @comments, [root_base_uri, self.base_uri]) + end + rescue + end + if self.configurations[:url_normalization_enabled] + @comments = FeedTools::UriHelper.normalize_url(@comments) + end + end + return @comments + end + + # Sets the url for posting comments + def comments=(new_comments) + @comments = new_comments + end + + # Returns the contents of the itunes:summary element + def itunes_summary + if @itunes_summary.nil? + @itunes_summary = FeedTools::XmlHelper.try_xpaths(self.root_node, [ + "itunes:summary/text()" + ], :select_result_value => true) + unless @itunes_summary.blank? + @itunes_summary = + FeedTools::HtmlHelper.unescape_entities(@itunes_summary) + @itunes_summary = + FeedTools::HtmlHelper.sanitize_html(@itunes_summary) + @itunes_summary.strip! + else + @itunes_summary = nil + end + end + return @itunes_summary + end + + # Sets the contents of the itunes:summary element + def itunes_summary=(new_itunes_summary) + @itunes_summary = new_itunes_summary + end + + # Returns the contents of the itunes:subtitle element + def itunes_subtitle + if @itunes_subtitle.nil? + @itunes_subtitle = FeedTools::XmlHelper.try_xpaths(self.root_node, [ + "itunes:subtitle/text()" + ], :select_result_value => true) + unless @itunes_subtitle.blank? + @itunes_subtitle = + FeedTools::HtmlHelper.unescape_entities(@itunes_subtitle) + @itunes_subtitle = + FeedTools::HtmlHelper.sanitize_html(@itunes_subtitle) + @itunes_subtitle.strip! + else + @itunes_subtitle = nil + end + end + return @itunes_subtitle + end + + # Sets the contents of the itunes:subtitle element + def itunes_subtitle=(new_itunes_subtitle) + @itunes_subtitle = new_itunes_subtitle + end + + # Returns the contents of the media:text element + def media_text + if @media_text.nil? + @media_text = FeedTools::XmlHelper.try_xpaths(self.root_node, [ + "media:text/text()" + ], :select_result_value => true) + unless @media_text.blank? + @media_text = FeedTools::HtmlHelper.unescape_entities(@media_text) + @media_text = FeedTools::HtmlHelper.sanitize_html(@media_text) + @media_text.strip! + else + @media_text = nil + end + end + return @media_text + end + + # Sets the contents of the media:text element + def media_text=(new_media_text) + @media_text = new_media_text + end + # Returns a list of the feed item's categories def categories if @categories.nil? @categories = [] category_nodes = FeedTools::XmlHelper.try_xpaths_all(self.root_node, [ "category", "dc:subject" ]) for category_node in category_nodes category = FeedTools::Category.new - category.term = FeedTools::XmlHelper.try_xpaths(category_node, ["@term", "text()"], + category.term = FeedTools::XmlHelper.try_xpaths( + category_node, ["@term", "text()"], :select_result_value => true) category.term.strip! unless category.term.nil? - category.label = FeedTools::XmlHelper.try_xpaths(category_node, ["@label"], + category.label = FeedTools::XmlHelper.try_xpaths( + category_node, ["@label"], :select_result_value => true) category.label.strip! unless category.label.nil? - category.scheme = FeedTools::XmlHelper.try_xpaths(category_node, [ - "@scheme", - "@domain" - ], :select_result_value => true) + category.scheme = FeedTools::XmlHelper.try_xpaths( + category_node, [ + "@scheme", + "@domain" + ], :select_result_value => true) category.scheme.strip! unless category.scheme.nil? @categories << category end end return @categories @@ -639,21 +784,22 @@ image = FeedTools::Image.new image.href = FeedTools::XmlHelper.try_xpaths(image_node, [ "url/text()", "@rdf:resource", "@href", + "@url", "text()" ], :select_result_value => true) if image.href.nil? && image_node.base_uri != nil image.href = "" end begin if !(image.href =~ /^file:/) && !FeedTools::UriHelper.is_uri?(image.href) stored_base_uri = FeedTools::GenericHelper.recursion_trap(:feed_link) do - self.feed.base_uri if self.feed != nil + self.base_uri if self.feed != nil end image.href = FeedTools::UriHelper.resolve_relative_uri( image.href, [image_node.base_uri, stored_base_uri]) end rescue @@ -705,11 +851,12 @@ @itunes_image_link = FeedTools::XmlHelper.try_xpaths(self.root_node, [ "itunes:image/@href", "itunes:link[@rel='image']/@href" ], :select_result_value => true) if self.configurations[:url_normalization_enabled] - @itunes_image_link = FeedTools::UriHelper.normalize_url(@itunes_image_link) + @itunes_image_link = + FeedTools::UriHelper.normalize_url(@itunes_image_link) end end return @itunes_image_link end @@ -719,15 +866,17 @@ end # Returns the feed item media thumbnail link def media_thumbnail_link if @media_thumbnail_link.nil? - @media_thumbnail_link = FeedTools::XmlHelper.try_xpaths(self.root_node, [ - "media:thumbnail/@url" - ], :select_result_value => true) + @media_thumbnail_link = FeedTools::XmlHelper.try_xpaths( + self.root_node, [ + "media:thumbnail/@url" + ], :select_result_value => true) if self.configurations[:url_normalization_enabled] - @media_thumbnail_link = FeedTools::UriHelper.normalize_url(@media_thumbnail_link) + @media_thumbnail_link = + FeedTools::UriHelper.normalize_url(@media_thumbnail_link) end end return @media_thumbnail_link end @@ -748,11 +897,11 @@ "copyrights", "dc:rights", "rights" ]) @rights = FeedTools::HtmlHelper.process_text_construct(rights_node, - self.feed_type, self.feed_version) + self.feed_type, self.feed_version, [self.base_uri]) if self.feed_type == "atom" || self.configurations[:always_strip_wrapper_elements] @rights = FeedTools::HtmlHelper.strip_wrapper_element(@rights) end end @@ -762,16 +911,28 @@ # Sets the feed item's rights information def rights=(new_rights) @rights = new_rights end - def license #:nodoc: - raise "Not implemented yet." + # Returns the first license link for the feed item. + def license + return self.licenses.first end + + # Returns all licenses linked from this feed item. + def licenses + if @licenses.nil? + @licenses = self.links.select do |link| + link.rel == "license" + end + end + return @licenses + end - def license=(new_license) #:nodoc: - raise "Not implemented yet." + # Sets the feed item's licenses. + def licenses=(new_licenses) + @licenses = new_licenses end # Returns all feed item enclosures def enclosures if @enclosures.nil? @@ -791,10 +952,13 @@ FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["media:content"]) media_group_enclosures = FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["media:group"]) + bogus_enclosures = + FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["video"]) + # TODO: Implement this bittorrent_enclosures = FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["bitTorrent:torrent"]) @@ -837,10 +1001,37 @@ enclosure.explicit = false if new_enclosure @enclosures << enclosure end end + + # Parse atom-type enclosures. If there are repeats of the same + # enclosure object, we merge the two together. + for enclosure_node in bogus_enclosures + enclosure_url = FeedTools::HtmlHelper.unescape_entities( + enclosure_node.attributes["url"].to_s) + enclosure = nil + new_enclosure = false + for existing_enclosure in @enclosures + if existing_enclosure.url == enclosure_url + enclosure = existing_enclosure + break + end + end + if enclosure.nil? + new_enclosure = true + enclosure = FeedTools::Enclosure.new + end + enclosure.url = enclosure_url + if File.extname(enclosure_url) == ".wmv" + enclosure.type = "video/x-ms-wmv" + end + enclosure.explicit = false + if new_enclosure + @enclosures << enclosure + end + end # Creates an anonymous method to parse content objects from the media # module. We do this to avoid excessive duplication of code since we # have to do identical processing for content objects within group # objects. @@ -871,25 +1062,30 @@ enclosure.framerate = enclosure_node.attributes["framerate"].to_i enclosure.expression = enclosure_node.attributes["expression"].to_s enclosure.is_default = (enclosure_node.attributes["isDefault"].to_s.downcase == "true") - enclosure_thumbnail_url = FeedTools::XmlHelper.try_xpaths(enclosure_node, - ["media:thumbnail/@url"], :select_result_value => true) + enclosure_thumbnail_url = + FeedTools::XmlHelper.try_xpaths(enclosure_node, + ["media:thumbnail/@url"], :select_result_value => true) if !enclosure_thumbnail_url.blank? enclosure.thumbnail = FeedTools::EnclosureThumbnail.new( - FeedTools::HtmlHelper.unescape_entities(enclosure_thumbnail_url), FeedTools::HtmlHelper.unescape_entities( - FeedTools::XmlHelper.try_xpaths(enclosure_node, ["media:thumbnail/@height"], + enclosure_thumbnail_url), + FeedTools::HtmlHelper.unescape_entities( + FeedTools::XmlHelper.try_xpaths(enclosure_node, + ["media:thumbnail/@height"], :select_result_value => true)), FeedTools::HtmlHelper.unescape_entities( - FeedTools::XmlHelper.try_xpaths(enclosure_node, ["media:thumbnail/@width"], + FeedTools::XmlHelper.try_xpaths(enclosure_node, + ["media:thumbnail/@width"], :select_result_value => true)) ) end enclosure.categories = [] - for category in FeedTools::XmlHelper.try_xpaths_all(enclosure_node, ["media:category"]) + for category in FeedTools::XmlHelper.try_xpaths_all( + enclosure_node, ["media:category"]) enclosure.categories << FeedTools::Category.new enclosure.categories.last.term = FeedTools::HtmlHelper.unescape_entities(category.inner_xml) enclosure.categories.last.scheme = FeedTools::HtmlHelper.unescape_entities( @@ -902,50 +1098,58 @@ end if enclosure.categories.last.label.blank? enclosure.categories.last.label = nil end end - enclosure_media_hash = FeedTools::XmlHelper.try_xpaths(enclosure_node, - ["media:hash/text()"], :select_result_value => true) + enclosure_media_hash = + FeedTools::XmlHelper.try_xpaths(enclosure_node, + ["media:hash/text()"], :select_result_value => true) if !enclosure_media_hash.nil? enclosure.hash = FeedTools::EnclosureHash.new( - FeedTools::HtmlHelper.sanitize_html(FeedTools::HtmlHelper.unescape_entities( - enclosure_media_hash), :strip), + FeedTools::HtmlHelper.sanitize_html( + FeedTools::HtmlHelper.unescape_entities( + enclosure_media_hash), :strip), "md5" ) end - enclosure_media_player_url = FeedTools::XmlHelper.try_xpaths(enclosure_node, - ["media:player/@url"], :select_result_value => true) + enclosure_media_player_url = + FeedTools::XmlHelper.try_xpaths(enclosure_node, + ["media:player/@url"], :select_result_value => true) if !enclosure_media_player_url.blank? enclosure.player = FeedTools::EnclosurePlayer.new( - FeedTools::HtmlHelper.unescape_entities(enclosure_media_player_url), FeedTools::HtmlHelper.unescape_entities( + enclosure_media_player_url), + FeedTools::HtmlHelper.unescape_entities( FeedTools::XmlHelper.try_xpaths(enclosure_node, ["media:player/@height"], :select_result_value => true)), FeedTools::HtmlHelper.unescape_entities( FeedTools::XmlHelper.try_xpaths(enclosure_node, ["media:player/@width"], :select_result_value => true)) ) end enclosure.credits = [] - for credit in FeedTools::XmlHelper.try_xpaths_all(enclosure_node, ["media:credit"]) + for credit in FeedTools::XmlHelper.try_xpaths_all( + enclosure_node, ["media:credit"]) enclosure.credits << FeedTools::EnclosureCredit.new( - FeedTools::HtmlHelper.unescape_entities(credit.inner_xml.to_s.strip), FeedTools::HtmlHelper.unescape_entities( + credit.inner_xml.to_s.strip), + FeedTools::HtmlHelper.unescape_entities( credit.attributes["role"].to_s.downcase) ) if enclosure.credits.last.name.blank? enclosure.credits.last.name = nil end if enclosure.credits.last.role.blank? enclosure.credits.last.role = nil end end - enclosure.explicit = (FeedTools::XmlHelper.try_xpaths(enclosure_node, - ["media:adult/text()"]).to_s.downcase == "true") + enclosure.explicit = + (FeedTools::XmlHelper.try_xpaths(enclosure_node, + ["media:adult/text()"]).to_s.downcase == "true") enclosure_media_text = - FeedTools::XmlHelper.try_xpaths(enclosure_node, ["media:text/text()"]) + FeedTools::XmlHelper.try_xpaths(enclosure_node, + ["media:text/text()"]) if !enclosure_media_text.blank? enclosure.text = FeedTools::HtmlHelper.unescape_entities( enclosure_media_text) end affected_enclosures << enclosure @@ -962,36 +1166,41 @@ media_groups = [] # Parse the group objects. for media_group in media_group_enclosures group_media_content_enclosures = - FeedTools::XmlHelper.try_xpaths_all(media_group, ["media:content"]) + FeedTools::XmlHelper.try_xpaths_all(media_group, + ["media:content"]) # Parse the content objects within the group objects. affected_enclosures = parse_media_content.call(group_media_content_enclosures) # Now make sure that content objects inherit certain properties from # the group objects. for enclosure in affected_enclosures - media_group_thumbnail = FeedTools::XmlHelper.try_xpaths(media_group, - ["media:thumbnail/@url"], :select_result_value => true) + media_group_thumbnail = + FeedTools::XmlHelper.try_xpaths(media_group, + ["media:thumbnail/@url"], :select_result_value => true) if enclosure.thumbnail.nil? && !media_group_thumbnail.blank? enclosure.thumbnail = FeedTools::EnclosureThumbnail.new( FeedTools::HtmlHelper.unescape_entities( media_group_thumbnail), FeedTools::HtmlHelper.unescape_entities( - FeedTools::XmlHelper.try_xpaths(media_group, ["media:thumbnail/@height"], + FeedTools::XmlHelper.try_xpaths(media_group, + ["media:thumbnail/@height"], :select_result_value => true)), FeedTools::HtmlHelper.unescape_entities( - FeedTools::XmlHelper.try_xpaths(media_group, ["media:thumbnail/@width"], + FeedTools::XmlHelper.try_xpaths(media_group, + ["media:thumbnail/@width"], :select_result_value => true)) ) end if (enclosure.categories.blank?) enclosure.categories = [] - for category in FeedTools::XmlHelper.try_xpaths_all(media_group, ["media:category"]) + for category in FeedTools::XmlHelper.try_xpaths_all( + media_group, ["media:category"]) enclosure.categories << FeedTools::Category.new enclosure.categories.last.term = FeedTools::HtmlHelper.unescape_entities(category.inner_xml) enclosure.categories.last.scheme = FeedTools::HtmlHelper.unescape_entities( @@ -1005,35 +1214,46 @@ if enclosure.categories.last.label.blank? enclosure.categories.last.label = nil end end end - enclosure_media_group_hash = FeedTools::XmlHelper.try_xpaths(enclosure_node, - ["media:hash/text()"], :select_result_value => true) + enclosure_media_group_hash = + FeedTools::XmlHelper.try_xpaths(enclosure_node, + ["media:hash/text()"], :select_result_value => true) if enclosure.hash.nil? && !enclosure_media_group_hash.blank? enclosure.hash = FeedTools::EnclosureHash.new( - FeedTools::HtmlHelper.sanitize_html(FeedTools::HtmlHelper.unescape_entities( - enclosure_media_group_hash), :strip), + FeedTools::HtmlHelper.sanitize_html( + FeedTools::HtmlHelper.unescape_entities( + enclosure_media_group_hash), :strip), "md5" ) end - enclosure_media_group_url = FeedTools::XmlHelper.try_xpaths(media_group, - "media:player/@url", :select_result_value => true) + enclosure_media_group_url = FeedTools::XmlHelper.try_xpaths( + media_group, + "media:player/@url", + :select_result_value => true + ) if enclosure.player.nil? && !enclosure_media_group_url.blank? enclosure.player = FeedTools::EnclosurePlayer.new( - FeedTools::HtmlHelper.unescape_entities(enclosure_media_group_url), FeedTools::HtmlHelper.unescape_entities( - FeedTools::XmlHelper.try_xpaths(media_group, ["media:player/@height"], + enclosure_media_group_url), + FeedTools::HtmlHelper.unescape_entities( + FeedTools::XmlHelper.try_xpaths(media_group, + ["media:player/@height"], :select_result_value => true)), FeedTools::HtmlHelper.unescape_entities( - FeedTools::XmlHelper.try_xpaths(media_group, ["media:player/@width"], - :select_result_value => true)) + FeedTools::XmlHelper.try_xpaths(media_group, + ["media:player/@width"], + :select_result_value => true + ) + ) ) end if enclosure.credits.nil? || enclosure.credits.size == 0 enclosure.credits = [] - for credit in FeedTools::XmlHelper.try_xpaths_all(media_group, ["media:credit"]) + for credit in FeedTools::XmlHelper.try_xpaths_all( + media_group, ["media:credit"]) enclosure.credits << FeedTools::EnclosureCredit.new( FeedTools::HtmlHelper.unescape_entities(credit.inner_xml), FeedTools::HtmlHelper.unescape_entities( credit.attributes["role"].to_s.downcase) ) @@ -1041,17 +1261,19 @@ enclosure.credits.last.role = nil end end end if enclosure.explicit?.nil? - enclosure.explicit = ((FeedTools::XmlHelper.try_xpaths(media_group, [ - "media:adult/text()" - ], :select_result_value => true).downcase == "true") ? - true : false) + enclosure.explicit = + ((FeedTools::XmlHelper.try_xpaths(media_group, [ + "media:adult/text()" + ], :select_result_value => true).downcase == "true") ? + true : false) end - enclosure_media_group_text = FeedTools::XmlHelper.try_xpaths(media_group, - ["media:text/text()"], :select_result_value => true) + enclosure_media_group_text = + FeedTools::XmlHelper.try_xpaths(media_group, + ["media:text/text()"], :select_result_value => true) if enclosure.text.nil? && !enclosure_media_group_text.blank? enclosure.text = FeedTools::HtmlHelper.sanitize_html( FeedTools::HtmlHelper.unescape_entities( enclosure_media_group_text), :strip) end @@ -1068,16 +1290,18 @@ end end # Add all the itunes categories itunes_categories = - FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["itunes:category"]) + FeedTools::XmlHelper.try_xpaths_all(self.root_node, + ["itunes:category"]) for itunes_category in itunes_categories genre = "Podcasts" category = itunes_category.attributes["text"].to_s subcategory = - FeedTools::XmlHelper.try_xpaths(itunes_category, ["itunes:category/@text"], + FeedTools::XmlHelper.try_xpaths(itunes_category, + ["itunes:category/@text"], :select_result_value => true) category_path = genre if !category.blank? category_path << "/" + category end @@ -1134,10 +1358,18 @@ # Make sure we don't have duplicate categories unless enclosure.categories.nil? enclosure.categories.uniq! end + + # Normalize enclosure URIs + if !enclosure.href.blank? + enclosure.href = + FeedTools::UriHelper.normalize_url(enclosure.href) + else + enclosure.href = nil + end end # And finally, now things get complicated. This is where we make # sure that the enclosures method only returns either default # enclosures or enclosures with only one version. Any enclosures @@ -1300,17 +1532,30 @@ @author.url, [author_node.base_uri, self.base_uri]) end rescue end end + if FeedTools::XmlHelper.try_xpaths(author_node, + ["@gr:unknown-author"], :select_result_value => true) == "true" + if @author.name == "(author unknown)" + @author.name = nil + end + end end # Fallback on the itunes module if we didn't find an author name begin @author.name = self.itunes_author if @author.name.nil? rescue @author.name = nil end + if @author.name.blank? && @author.email.blank? && + @author.href.blank? + parent_feed = self.feed + if parent_feed != nil + @author = parent_feed.author.dup + end + end end return @author end # Sets the feed item author @@ -1390,10 +1635,17 @@ @publisher.url, [root_base_uri, self.base_uri]) end rescue end end + if @publisher.name.blank? && @publisher.email.blank? && + @publisher.href.blank? + parent_feed = self.feed + if parent_feed != nil + @publisher = parent_feed.publisher.dup + end + end end return @publisher end # Sets the feed publisher @@ -1420,11 +1672,16 @@ def itunes_author if @itunes_author.nil? @itunes_author = FeedTools::HtmlHelper.unescape_entities( FeedTools::XmlHelper.try_xpaths(self.root_node, ["itunes:author/text()"], :select_result_value => true)) - @itunes_author = feed.itunes_author if @itunes_author.blank? + if @itunes_author.blank? + parent_feed = self.feed + if parent_feed != nil + @itunes_author = parent_feed.itunes_author + end + end end return @itunes_author end # Sets the contents of the itunes:author element @@ -1482,11 +1739,12 @@ "atom03:published/text()", "atom:published/text()", "published/text()", "dc:date/text()", "pubDate/text()", - "date/text()" + "date/text()", + "lastupdated/text()" ], :select_result_value => true) begin if !time_string.blank? @time = Time.parse(time_string).gmtime elsif self.configurations[:timestamp_estimation_enabled] && @@ -1583,11 +1841,12 @@ "updated/text()", "atom10:modified/text()", "atom03:modified/text()", "atom:modified/text()", "modified/text()", - "lastBuildDate/text()" + "lastBuildDate/text()", + "lastupdated/text()" ], :select_result_value => true) if !updated_string.blank? @updated = Time.parse(updated_string).gmtime rescue nil else @updated = nil @@ -1628,40 +1887,12 @@ # Sets the feed item published time def published=(new_published) @published = new_published end - - # Returns the url for posting comments - def comments - if @comments.nil? - @comments = FeedTools::XmlHelper.try_xpaths(self.root_node, ["comments/text()"], - :select_result_value => true) - begin - if !(@comments =~ /^file:/) && - !FeedTools::UriHelper.is_uri?(@comments) - root_base_uri = nil - unless self.root_node.nil? - root_base_uri = self.root_node.base_uri - end - @comments = FeedTools::UriHelper.resolve_relative_uri( - @comments, [root_base_uri, self.base_uri]) - end - rescue - end - if self.configurations[:url_normalization_enabled] - @comments = FeedTools::UriHelper.normalize_url(@comments) - end - end - return @comments - end - # Sets the url for posting comments - def comments=(new_comments) - @comments = new_comments - end - + # TODO: FIX ME! This code is completely wrong. # The source that this post was based on def source if @source.nil? @source = FeedTools::Link.new @source.href = FeedTools::XmlHelper.try_xpaths( @@ -1700,11 +1931,12 @@ rdf_bag = FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["taxo:topics/rdf:Bag/rdf:li"]) if rdf_bag != nil && rdf_bag.size > 0 for tag_node in rdf_bag begin - tag_url = FeedTools::XmlHelper.try_xpaths(tag_node, ["@resource"], + tag_url = FeedTools::XmlHelper.try_xpaths(tag_node, + ["@resource"], :select_result_value => true) tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)$/) if tag_match.size > 0 @tags << tag_match.first.last.downcase.strip end @@ -1713,29 +1945,32 @@ end end end if @tags.nil? || @tags.size == 0 @tags = [] - tag_list = FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["category/text()"], + tag_list = FeedTools::XmlHelper.try_xpaths_all(self.root_node, + ["category/text()"], :select_result_value => true) for tag in tag_list @tags << tag.to_s.downcase.strip end end if @tags.nil? || @tags.size == 0 @tags = [] - tag_list = FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["dc:subject/text()"], + tag_list = FeedTools::XmlHelper.try_xpaths_all(self.root_node, + ["dc:subject/text()"], :select_result_value => true) for tag in tag_list @tags << tag.to_s.downcase.strip end end if @tags.blank? begin - itunes_keywords_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [ - "itunes:keywords/text()" - ], :select_result_value => true) + itunes_keywords_string = + FeedTools::XmlHelper.try_xpaths(self.root_node, [ + "itunes:keywords/text()" + ], :select_result_value => true) unless itunes_keywords_string.blank? @tags = itunes_keywords_string.downcase.split(",") if @tags.size == 1 @tags = itunes_keywords_string.downcase.split(" ") @tags = @tags.map { |tag| tag.chomp(",") } @@ -1769,13 +2004,15 @@ if @explicit.nil? explicit_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [ "media:adult/text()", "itunes:explicit/text()" ], :select_result_value => true) - if explicit_string == "true" || explicit_string == "yes" || - feed.explicit? + parent_feed = self.feed + if explicit_string == "true" || explicit_string == "yes" @explicit = true + elsif parent_feed != nil && parent_feed.explicit? + @explicit = true else @explicit = false end end return @explicit @@ -1784,34 +2021,56 @@ # Sets whether or not the feed contains explicit material def explicit=(new_explicit) @explicit = (new_explicit ? true : false) end - # A hook method that is called during the feed generation process. Overriding this method - # will enable additional content to be inserted into the feed. + # A hook method that is called during the feed generation process. + # Overriding this method will enable additional content to be inserted + # into the feed. def build_xml_hook(feed_type, version, xml_builder) return nil end # Generates xml based on the content of the feed item def build_xml(feed_type=(self.feed.feed_type or "atom"), version=nil, xml_builder=Builder::XmlMarkup.new( :indent => 2, :escape_attrs => false)) + + parent_feed = self.feed + if parent_feed.find_node( + "access:restriction/@relationship").to_s == "deny" + raise StandardError, + "Operation not permitted. This feed denies redistribution." + elsif parent_feed.find_node("@indexing:index").to_s == "no" + raise StandardError, + "Operation not permitted. This feed denies redistribution." + end + if self.find_node( + "access:restriction/@relationship").to_s == "deny" + raise StandardError, + "Operation not permitted. This feed item denies redistribution." + end + + self.full_parse() + if feed_type == "rss" && (version == nil || version == 0.0) version = 1.0 elsif feed_type == "atom" && (version == nil || version == 0.0) version = 1.0 end - if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1) + if feed_type == "rss" && + (version == 0.9 || version == 1.0 || version == 1.1) # RDF-based rss format if link.nil? - raise "Cannot generate an rdf-based feed item with a nil link field." + raise "Cannot generate an rdf-based feed item with a " + + "nil link field." end return xml_builder.item("rdf:about" => FeedTools::HtmlHelper.escape_entities(link)) do unless self.title.blank? - xml_builder.title(FeedTools::HtmlHelper.strip_html_tags(self.title)) + xml_builder.title( + FeedTools::HtmlHelper.strip_html_tags(self.title)) else xml_builder.title end unless self.link.blank? xml_builder.link(self.link) @@ -1849,11 +2108,12 @@ end elsif feed_type == "rss" # normal rss format return xml_builder.item do unless self.title.blank? - xml_builder.title(FeedTools::HtmlHelper.strip_html_tags(self.title)) + xml_builder.title( + FeedTools::HtmlHelper.strip_html_tags(self.title)) end unless self.link.blank? xml_builder.link(self.link) end unless self.author.nil? || self.author.name.nil? @@ -1874,15 +2134,16 @@ if !self.published.nil? xml_builder.pubDate(self.published.rfc822) elsif !self.time.nil? xml_builder.pubDate(self.time.rfc822) end - unless self.copyright.blank? - xml_builder.tag!("dc:rights", self.copyright) + unless self.rights.blank? + xml_builder.tag!("dc:rights", self.rights) end unless self.guid.blank? - if FeedTools::UriHelper.is_uri?(self.guid) && (self.guid =~ /^http/) + if FeedTools::UriHelper.is_uri?(self.guid) && + (self.guid =~ /^http/) xml_builder.guid(self.guid, "isPermaLink" => "true") else xml_builder.guid(self.guid, "isPermaLink" => "false") end else @@ -1917,11 +2178,12 @@ end end end rescue end - attribute_hash["url"] = FeedTools::UriHelper.normalize_url(enclosure.url) + attribute_hash["url"] = + FeedTools::UriHelper.normalize_url(enclosure.url) if enclosure.type != nil attribute_hash["type"] = enclosure.type end if enclosure.file_size != nil && enclosure.file_size.to_i > 0 attribute_hash["length"] = enclosure.file_size.to_s @@ -1991,22 +2253,24 @@ xml_builder.rights(self.rights) end if self.id != nil unless FeedTools::UriHelper.is_uri? self.id if self.time != nil && self.link != nil - xml_builder.id(FeedTools::UriHelper.build_tag_uri(self.link, self.time)) + xml_builder.id(FeedTools::UriHelper.build_tag_uri( + self.link, self.time)) elsif self.link != nil xml_builder.id(FeedTools.build_urn_uuid_uri(self.link)) else raise "The unique id must be a URI. " + "(Attempted to generate id, but failed.)" end else xml_builder.id(self.id) end elsif self.time != nil && self.link != nil - xml_builder.id(FeedTools::UriHelper.build_tag_uri(self.link, self.time)) + xml_builder.id(FeedTools::UriHelper.build_tag_uri( + self.link, self.time)) else raise "Cannot build feed, missing feed unique id." end unless self.tags.nil? || self.tags.size == 0 for tag in self.tags @@ -2016,10 +2280,11 @@ unless self.enclosures.blank? || self.enclosures.size == 0 for enclosure in self.enclosures attribute_hash = {} next if enclosure.url.blank? attribute_hash["rel"] = "enclosure" - attribute_hash["href"] = FeedTools::UriHelper.normalize_url(enclosure.url) + attribute_hash["href"] = + FeedTools::UriHelper.normalize_url(enclosure.url) if enclosure.type != nil attribute_hash["type"] = enclosure.type end if enclosure.file_size != nil && enclosure.file_size.to_i > 0 attribute_hash["length"] = enclosure.file_size.to_s