lib/feed_tools/feed_item.rb in feedtools-0.2.18 vs lib/feed_tools/feed_item.rb in feedtools-0.2.19

- old
+ new

@@ -163,18 +163,22 @@ # Returns the parent feed of this feed item # Warning, this method may be slow if you have a # large number of FeedTools::Feed objects. Can't # use a direct reference to the parent because it plays - # havoc with the garbage collector. + # havoc with the garbage collector. Could've used + # a WeakRef object, but really, if there are multiple + # parent feeds, something is going to go wrong, and the + # programmer needs to be notified. A WeakRef + # implementation can't detect this condition. def feed parent_feed = nil ObjectSpace.each_object(FeedTools::Feed) do |feed| - if feed.instance_variable_get("@items").nil? + if feed.instance_variable_get("@entries").nil? feed.items end - unsorted_items = feed.instance_variable_get("@items") + unsorted_items = feed.instance_variable_get("@entries") for item in unsorted_items if item.object_id == self.object_id if parent_feed.nil? parent_feed = feed break @@ -222,17 +226,25 @@ end return @xml_doc end # Returns the first node within the root_node that matches the xpath query. - def find_node(xpath) - return XPath.first(root_node, xpath) + def find_node(xpath, select_result_value=false) + if feed.feed_data_type != :xml + raise "The feed data type is not xml." + end + return try_xpaths(self.root_node, [xpath], + :select_result_value => select_result_value) end # Returns all nodes within the root_node that match the xpath query. - def find_all_nodes(xpath) - return XPath.match(root_node, xpath) + def find_all_nodes(xpath, select_result_value=false) + if feed.feed_data_type != :xml + raise "The feed data type is not xml." + end + return try_xpaths_all(self.root_node, [xpath], + :select_result_value => select_result_value) end # Returns the root node of the feed item. def root_node if @root_node.nil? @@ -245,17 +257,17 @@ end # Returns the feed items's unique id def id if @id.nil? - unless root_node.nil? - @id = XPath.first(root_node, "id/text()").to_s - if @id == "" - @id = XPath.first(root_node, "guid/text()").to_s - end - end - @id = nil if @id == "" + @id = try_xpaths(self.root_node, [ + "atom10:id/text()", + "atom03:id/text()", + "atom:id/text()", + "id/text()", + "guid/text()" + ], :select_result_value => true) end return @id end # Sets the feed item's unique id @@ -264,41 +276,27 @@ end # Returns the feed item title def title if @title.nil? - unless root_node.nil? - repair_entities = false - title_node = XPath.first(root_node, "atom10:title", - FEED_TOOLS_NAMESPACES) - if title_node.nil? - title_node = XPath.first(root_node, "title") - end - if title_node.nil? - title_node = XPath.first(root_node, "atom03:title", - FEED_TOOLS_NAMESPACES) - end - if title_node.nil? - title_node = XPath.first(root_node, "atom:title") - end - if title_node.nil? - title_node = XPath.first(root_node, "dc:title", - FEED_TOOLS_NAMESPACES) - end - if title_node.nil? - title_node = XPath.first(root_node, "dc:title") - end - if title_node.nil? - title_node = XPath.first(root_node, "TITLE") - end - end + repair_entities = false + title_node = try_xpaths(self.root_node, [ + "atom10:title", + "atom03:title", + "atom:title", + "title", + "dc:title" + ]) if title_node.nil? return nil end - title_type = XPath.first(title_node, "@type").to_s - title_mode = XPath.first(title_node, "@mode").to_s - title_encoding = XPath.first(title_node, "@encoding").to_s + title_type = try_xpaths(title_node, "@type", + :select_result_value => true) + title_mode = try_xpaths(title_node, "@mode", + :select_result_value => true) + title_encoding = try_xpaths(title_node, "@encoding", + :select_result_value => true) # Note that we're checking for misuse of type, mode and encoding here if title_type == "base64" || title_mode == "base64" || title_encoding == "base64" @title = Base64.decode64(title_node.inner_xml.strip) @@ -316,11 +314,11 @@ unless @title.nil? @title = FeedTools.sanitize_html(@title, :strip) @title = FeedTools.unescape_entities(@title) if repair_entities @title = FeedTools.tidy_html(@title) unless repair_entities end - if @title != "" + if !@title.blank? && FeedTools.configurations[:strip_comment_count] # Some blogging tools include the number of comments in a post # in the title... this is supremely ugly, and breaks any # applications which expect the title to be static, so we're # gonna strip them out. # @@ -329,138 +327,105 @@ @title = @title.strip.gsub(/\[\d*\]$/, "").strip end @title.gsub!(/>\n</, "><") @title.gsub!(/\n/, " ") @title.strip! - @title = nil if @title == "" + @title = nil if @title.blank? end return @title end # Sets the feed item title def title=(new_title) @title = new_title end - # Returns the feed item description - def description - if @description.nil? - unless root_node.nil? - repair_entities = false - description_node = XPath.first(root_node, "content:encoded") - if description_node.nil? - description_node = XPath.first(root_node, "content:encoded", - FEED_TOOLS_NAMESPACES) - end - if description_node.nil? - description_node = XPath.first(root_node, "encoded") - end - if description_node.nil? - description_node = XPath.first(root_node, "content") - end - if description_node.nil? - description_node = XPath.first(root_node, "fullitem") - end - if description_node.nil? - description_node = XPath.first(root_node, "xhtml:body") - end - if description_node.nil? - description_node = XPath.first(root_node, "xhtml:body", - FEED_TOOLS_NAMESPACES) - end - if description_node.nil? - description_node = XPath.first(root_node, "body") - end - if description_node.nil? - description_node = XPath.first(root_node, "description") - end - if description_node.nil? - description_node = XPath.first(root_node, "tagline") - end - if description_node.nil? - description_node = XPath.first(root_node, "subtitle") - end - if description_node.nil? - description_node = XPath.first(root_node, "summary") - end - if description_node.nil? - description_node = XPath.first(root_node, "abstract") - end - if description_node.nil? - description_node = XPath.first(root_node, "ABSTRACT") - end - if description_node.nil? - description_node = XPath.first(root_node, "blurb") - end - if description_node.nil? - description_node = XPath.first(root_node, "info") - end - end - if description_node.nil? + # Returns the feed item content + def content + if @content.nil? + repair_entities = false + content_node = try_xpaths(self.root_node, [ + "content:encoded", + "content", + "fullitem", + "xhtml:body", + "body", + "encoded", + "description", + "tagline", + "subtitle", + "summary", + "abstract", + "blurb", + "info" + ]) + if content_node.nil? return nil end - description_type = XPath.first(description_node, "@type").to_s - description_mode = XPath.first(description_node, "@mode").to_s - description_encoding = XPath.first(description_node, "@encoding").to_s + content_type = try_xpaths(content_node, "@type", + :select_result_value => true) + content_mode = try_xpaths(content_node, "@mode", + :select_result_value => true) + content_encoding = try_xpaths(content_node, "@encoding", + :select_result_value => true) # Note that we're checking for misuse of type, mode and encoding here - if description_encoding != "" - @description = + if !content_encoding.blank? + @content = "[Embedded data objects are not currently supported.]" - elsif description_node.cdatas.size > 0 - @description = description_node.cdatas.first.value - elsif description_type == "base64" || description_mode == "base64" || - description_encoding == "base64" - @description = Base64.decode64(description_node.inner_xml.strip) - elsif description_type == "xhtml" || description_mode == "xhtml" || - description_type == "xml" || description_mode == "xml" || - description_type == "application/xhtml+xml" - @description = description_node.inner_xml - elsif description_type == "escaped" || description_mode == "escaped" - @description = FeedTools.unescape_entities( - description_node.inner_xml) + elsif content_node.cdatas.size > 0 + @content = content_node.cdatas.first.value + elsif content_type == "base64" || content_mode == "base64" || + content_encoding == "base64" + @content = Base64.decode64(content_node.inner_xml.strip) + elsif content_type == "xhtml" || content_mode == "xhtml" || + content_type == "xml" || content_mode == "xml" || + content_type == "application/xhtml+xml" + @content = content_node.inner_xml + elsif content_type == "escaped" || content_mode == "escaped" + @content = FeedTools.unescape_entities( + content_node.inner_xml) else - @description = description_node.inner_xml + @content = content_node.inner_xml repair_entities = true end - if @description == "" - @description = self.itunes_summary - @description = "" if @description.nil? + if @content.blank? + @content = self.itunes_summary end - if @description == "" - @description = self.itunes_subtitle - @description = "" if @description.nil? + if @content.blank? + @content = self.itunes_subtitle end - unless @description.nil? - @description = FeedTools.sanitize_html(@description, :strip) - @description = FeedTools.unescape_entities(@description) if repair_entities - @description = FeedTools.tidy_html(@description) + unless @content.blank? + @content = FeedTools.sanitize_html(@content, :strip) + @content = FeedTools.unescape_entities(@content) if repair_entities + @content = FeedTools.tidy_html(@content) end - @description = @description.strip unless @description.nil? - @description = nil if @description == "" + @content = @content.strip unless @content.nil? + @content = nil if @content.blank? end - return @description + return @content end - # Sets the feed item description - def description=(new_description) - @description = new_description + # Sets the feed item content + def content=(new_content) + @content = new_content end # Returns the contents of the itunes:summary element def itunes_summary if @itunes_summary.nil? - @itunes_summary = FeedTools.unescape_entities(XPath.first(root_node, - "itunes:summary/text()").to_s) - if @itunes_summary == "" + @itunes_summary = try_xpaths(self.root_node, [ + "itunes:summary/text()" + ]) + unless @itunes_summary.blank? + @itunes_summary = FeedTools.unescape_entities(@itunes_summary) + @itunes_summary = FeedTools.sanitize_html(@itunes_summary) + else @itunes_summary = nil end - unless @itunes_summary.nil? - @itunes_summary = FeedTools.sanitize_html(@itunes_summary) - end end return @itunes_summary end # Sets the contents of the itunes:summary element @@ -469,18 +434,19 @@ end # Returns the contents of the itunes:subtitle element def itunes_subtitle if @itunes_subtitle.nil? - @itunes_subtitle = FeedTools.unescape_entities(XPath.first(root_node, - "itunes:subtitle/text()").to_s) - if @itunes_subtitle == "" + @itunes_subtitle = try_xpaths(self.root_node, [ + "itunes:subtitle/text()" + ]) + unless @itunes_subtitle.blank? + @itunes_subtitle = FeedTools.unescape_entities(@itunes_subtitle) + @itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle) + else @itunes_subtitle = nil end - unless @itunes_subtitle.nil? - @itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle) - end end return @itunes_subtitle end # Sets the contents of the itunes:subtitle element @@ -509,43 +475,35 @@ end # Returns the feed item link def link if @link.nil? - unless root_node.nil? - @link = XPath.first(root_node, "link[@rel='alternate']/@href").to_s - if @link == "" - @link = XPath.first(root_node, "link/@href").to_s - end - if @link == "" - @link = XPath.first(root_node, "link/text()").to_s - end - if @link == "" - @link = XPath.first(root_node, "@rdf:about").to_s - end - if @link == "" - @link = XPath.first(root_node, "guid[@isPermaLink='true']/text()").to_s - end - if @link == "" - @link = XPath.first(root_node, "@href").to_s - end - if @link == "" - @link = XPath.first(root_node, "a/@href").to_s - end - if @link == "" - @link = XPath.first(root_node, "@HREF").to_s - end - if @link == "" - @link = XPath.first(root_node, "A/@HREF").to_s - end - end - if @link == "" || @link.nil? + @link = try_xpaths(self.root_node, [ + "atom10:link[@type='application/xhtml+xml']/@href", + "atom10:link[@type='text/html']/@href", + "atom10:link[@rel='alternate']/@href", + "atom03:link[@type='application/xhtml+xml']/@href", + "atom03:link[@type='text/html']/@href", + "atom03:link[@rel='alternate']/@href", + "atom:link[@type='application/xhtml+xml']/@href", + "atom:link[@type='text/html']/@href", + "atom:link[@rel='alternate']/@href", + "link[@type='application/xhtml+xml']/@href", + "link[@type='text/html']/@href", + "link[@rel='alternate']/@href", + "link/text()", + "@rdf:about", + "guid[@isPermaLink='true']/text()", + "@href", + "a/@href" + ], :select_result_value => true) + if @link.blank? if FeedTools.is_uri? self.guid @link = self.guid end end - if @link != "" + if !@link.blank? @link = FeedTools.unescape_entities(@link) end # TODO: Actually implement proper relative url resolving instead of this crap # =========================================================================== # @@ -554,11 +512,54 @@ # @link = @link[1..-1] # end # # prepend the base to the link since they seem to have used a relative path # @link = feed.base + @link # end - @link = FeedTools.normalize_url(@link) + if @link.blank? + link_node = try_xpaths(self.root_node, [ + "atom10:link", + "atom03:link", + "atom:link", + "link" + ]) + if link_node != nil + if link_node.attributes['type'].to_s =~ /^image/ || + link_node.attributes['type'].to_s =~ /^application/ || + link_node.attributes['type'].to_s =~ /xml/ || + link_node.attributes['rel'].to_s =~ /self/ + for child in self.root_node + if child.class == REXML::Element + if child.name.downcase == "link" + if child.attributes['type'].to_s =~ /^image/ || + child.attributes['type'].to_s =~ /^application/ || + child.attributes['type'].to_s =~ /xml/ || + child.attributes['rel'].to_s =~ /self/ + @link = nil + next + else + @link = child.attributes['href'].to_s + if @link.blank? + @link = child.inner_xml + end + if @link.blank? + next + end + break + end + end + end + end + else + @link = link_node.attributes['href'].to_s + end + end + end + @link = self.comments if @link.blank? + @link = nil if @link.blank? + if FeedTools.configurations[:url_normalization_enabled] + @link = FeedTools.normalize_url(@link) + end end return @link end # Sets the feed item link @@ -568,127 +569,126 @@ # Returns a list of the feed item's categories def categories if @categories.nil? @categories = [] - category_nodes = XPath.match(root_node, "category") - if category_nodes.nil? || category_nodes.empty? - category_nodes = XPath.match(root_node, "dc:subject") + category_nodes = try_xpaths_all(self.root_node, [ + "category", + "dc:subject" + ]) + for category_node in category_nodes + category = FeedTools::Feed::Category.new + category.term = try_xpaths(category_node, ["@term", "text()"], + :select_result_value => true) + category.term.strip! unless category.term.nil? + category.label = try_xpaths(category_node, ["@label"], + :select_result_value => true) + category.label.strip! unless category.label.nil? + category.scheme = try_xpaths(category_node, [ + "@scheme", + "@domain" + ], :select_result_value => true) + category.scheme.strip! unless category.scheme.nil? + @categories << category end - unless category_nodes.nil? - for category_node in category_nodes - category = FeedTools::Feed::Category.new - category.term = XPath.first(category_node, "@term").to_s - if category.term == "" - category.term = XPath.first(category_node, "text()").to_s - end - category.term.strip! unless category.term.nil? - category.term = nil if category.term == "" - category.label = XPath.first(category_node, "@label").to_s - category.label.strip! unless category.label.nil? - category.label = nil if category.label == "" - category.scheme = XPath.first(category_node, "@scheme").to_s - if category.scheme == "" - category.scheme = XPath.first(category_node, "@domain").to_s - end - category.scheme.strip! unless category.scheme.nil? - category.scheme = nil if category.scheme == "" - @categories << category - end - end end return @categories end # Returns a list of the feed items's images def images if @images.nil? @images = [] - image_nodes = XPath.match(root_node, "link") - if image_nodes.nil? || image_nodes.empty? - image_nodes = XPath.match(root_node, "logo") - end - if image_nodes.nil? || image_nodes.empty? - image_nodes = XPath.match(root_node, "LOGO") - end - if image_nodes.nil? || image_nodes.empty? - image_nodes = XPath.match(root_node, "image") - end - unless image_nodes.nil? + image_nodes = try_xpaths_all(self.root_node, [ + "image", + "logo", + "atom10:link", + "atom03:link", + "atom:link", + "link" + ]) + unless image_nodes.blank? for image_node in image_nodes image = FeedTools::Feed::Image.new - image.url = XPath.first(image_node, "url/text()").to_s - if image.url == "" - image.url = XPath.first(image_node, "@rdf:resource").to_s + image.url = try_xpaths(image_node, [ + "url/text()", + "@rdf:resource" + ], :select_result_value => true) + if image.url.blank? && (image_node.name == "logo" || + (image_node.attributes['type'].to_s =~ /^image/) == 0) + image.url = try_xpaths(image_node, [ + "@atom10:href", + "@atom03:href", + "@atom:href", + "@href" + ], :select_result_value => true) + if image.url == self.link && image.url != nil + image.url = nil + end end - if image.url == "" && (image_node.name == "logo" || - (image_node.attributes['type'] =~ /^image/) == 0) - image.url = XPath.first(image_node, "@href").to_s + if image.url.blank? && image_node.name == "LOGO" + image.url = try_xpaths(image_node, [ + "@href" + ], :select_result_value => true) end - if image.url == "" && image_node.name == "LOGO" - image.url = XPath.first(image_node, "@HREF").to_s - end image.url.strip! unless image.url.nil? - image.url = nil if image.url == "" - image.title = XPath.first(image_node, "title/text()").to_s + image.title = try_xpaths(image_node, + ["title/text()"], :select_result_value => true) image.title.strip! unless image.title.nil? - image.title = nil if image.title == "" - image.description = - XPath.first(image_node, "description/text()").to_s + image.description = try_xpaths(image_node, + ["description/text()"], :select_result_value => true) image.description.strip! unless image.description.nil? - image.description = nil if image.description == "" - image.link = XPath.first(image_node, "link/text()").to_s + image.link = try_xpaths(image_node, + ["link/text()"], :select_result_value => true) image.link.strip! unless image.link.nil? - image.link = nil if image.link == "" - image.height = XPath.first(image_node, "height/text()").to_s.to_i + image.height = try_xpaths(image_node, + ["height/text()"], :select_result_value => true).to_i image.height = nil if image.height <= 0 - image.width = XPath.first(image_node, "width/text()").to_s.to_i + image.width = try_xpaths(image_node, + ["width/text()"], :select_result_value => true).to_i image.width = nil if image.width <= 0 - image.style = XPath.first(image_node, "@style").to_s.downcase - if image.style == "" - image.style = XPath.first(image_node, "@STYLE").to_s.downcase - end + image.style = try_xpaths(image_node, [ + "style/text()", + "@style" + ], :select_result_value => true) image.style.strip! unless image.style.nil? - image.style = nil if image.style == "" - @images << image + image.style.downcase! unless image.style.nil? + @images << image unless image.url.nil? end end end return @images end # Returns the feed item itunes image link - # - # If it's not present, falls back to the normal image link. - # Technically, the itunes spec says that the image needs to be - # square and larger than 300x300, but hey, if there's an image - # to be had, it's better than none at all. def itunes_image_link if @itunes_image_link.nil? - # get the feed item itunes image link from the xml document - @itunes_image_link = XPath.first(root_node, "itunes:image/@href").to_s - if @itunes_image_link == "" - @itunes_image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s + @itunes_image_link = try_xpaths(self.root_node, [ + "itunes:image/@href", + "itunes:link[@rel='image']/@href" + ], :select_result_value => true) + if FeedTools.configurations[:url_normalization_enabled] + @itunes_image_link = FeedTools.normalize_url(@itunes_image_link) end - @itunes_image_link = FeedTools.normalize_url(@itunes_image_link) end return @itunes_image_link end # Sets the feed item itunes image link def itunes_image_link=(new_itunes_image_link) @itunes_image_link = new_itunes_image_link end # Returns the feed item media thumbnail link - # - # If it's not present, falls back to the normal image link. def media_thumbnail_link if @media_thumbnail_link.nil? - # get the feed item itunes image link from the xml document - @media_thumbnail_link = XPath.first(root_node, "media:thumbnail/@url").to_s - @media_thumbnail_link = FeedTools.normalize_url(@media_thumbnail_link) + @media_thumbnail_link = try_xpaths(self.root_node, [ + "media:thumbnail/@url" + ], :select_result_value => true) + if FeedTools.configurations[:url_normalization_enabled] + @media_thumbnail_link = FeedTools.normalize_url(@media_thumbnail_link) + end end return @media_thumbnail_link end # Sets the feed item media thumbnail url @@ -697,48 +697,32 @@ end # Returns the feed item's copyright information def copyright if @copyright.nil? - unless root_node.nil? - repair_entities = false - - copyright_node = XPath.first(root_node, "dc:rights") - if copyright_node.nil? - copyright_node = XPath.first(root_node, "dc:rights", - FEED_TOOLS_NAMESPACES) - end - if copyright_node.nil? - copyright_node = XPath.first(root_node, "rights", - FEED_TOOLS_NAMESPACES) - end - if copyright_node.nil? - copyright_node = XPath.first(root_node, "copyright", - FEED_TOOLS_NAMESPACES) - end - if copyright_node.nil? - copyright_node = XPath.first(root_node, "atom03:copyright", - FEED_TOOLS_NAMESPACES) - end - if copyright_node.nil? - copyright_node = XPath.first(root_node, "atom10:copyright", - FEED_TOOLS_NAMESPACES) - end - if copyright_node.nil? - copyright_node = XPath.first(root_node, "copyrights", - FEED_TOOLS_NAMESPACES) - end - end + repair_entities = false + copyright_node = try_xpaths(self.root_node, [ + "atom10:copyright", + "atom03:copyright", + "atom:copyright", + "copyright", + "copyrights", + "dc:rights", + "rights" + ]) if copyright_node.nil? return nil end - copyright_type = XPath.first(copyright_node, "@type").to_s - copyright_mode = XPath.first(copyright_node, "@mode").to_s - copyright_encoding = XPath.first(copyright_node, "@encoding").to_s + copyright_type = try_xpaths(copyright_node, "@type", + :select_result_value => true) + copyright_mode = try_xpaths(copyright_node, "@mode", + :select_result_value => true) + copyright_encoding = try_xpaths(copyright_node, "@encoding", + :select_result_value => true) # Note that we're checking for misuse of type, mode and encoding here - if copyright_encoding != "" + if !copyright_encoding.blank? @copyright = "[Embedded data objects are not currently supported.]" elsif copyright_node.cdatas.size > 0 @copyright = copyright_node.cdatas.first.value elsif copyright_type == "base64" || copyright_mode == "base64" || @@ -761,11 +745,11 @@ @copyright = FeedTools.unescape_entities(@copyright) if repair_entities @copyright = FeedTools.tidy_html(@copyright) end @copyright = @copyright.strip unless @copyright.nil? - @copyright = nil if @copyright == "" + @copyright = nil if @copyright.blank? end return @copyright end # Sets the feed item's copyright information @@ -777,31 +761,43 @@ def enclosures if @enclosures.nil? @enclosures = [] # First, load up all the different possible sources of enclosures - rss_enclosures = XPath.match(root_node, "enclosure") - atom_enclosures = XPath.match(root_node, "link[@rel='enclosure']") - media_content_enclosures = XPath.match(root_node, "media:content") - media_group_enclosures = XPath.match(root_node, "media:group") - - # Parse RSS-type enclosures. Thanks to a few buggy enclosures implementations, - # sometimes these also manage to show up in atom files. + rss_enclosures = + try_xpaths_all(self.root_node, ["enclosure"]) + atom_enclosures = + try_xpaths_all(self.root_node, [ + "atom10:link[@rel='enclosure']", + "atom03:link[@rel='enclosure']", + "atom:link[@rel='enclosure']", + "link[@rel='enclosure']" + ]) + media_content_enclosures = + try_xpaths_all(self.root_node, ["media:content"]) + media_group_enclosures = + try_xpaths_all(self.root_node, ["media:group"]) + + # Parse RSS-type enclosures. Thanks to a few buggy enclosures + # implementations, sometimes these also manage to show up in atom + # files. for enclosure_node in rss_enclosures enclosure = Enclosure.new - enclosure.url = FeedTools.unescape_entities(enclosure_node.attributes["url"].to_s) + enclosure.url = FeedTools.unescape_entities( + enclosure_node.attributes["url"].to_s) enclosure.type = enclosure_node.attributes["type"].to_s enclosure.file_size = enclosure_node.attributes["length"].to_i enclosure.credits = [] enclosure.explicit = false @enclosures << enclosure end - # Parse atom-type enclosures. If there are repeats of the same enclosure object, - # we merge the two together. + # Parse atom-type enclosures. If there are repeats of the same + # enclosure object, we merge the two together. for enclosure_node in atom_enclosures - enclosure_url = FeedTools.unescape_entities(enclosure_node.attributes["href"].to_s) + enclosure_url = FeedTools.unescape_entities( + enclosure_node.attributes["href"].to_s) enclosure = nil new_enclosure = false for existing_enclosure in @enclosures if existing_enclosure.url == enclosure_url enclosure = existing_enclosure @@ -820,17 +816,19 @@ if new_enclosure @enclosures << enclosure end end - # Creates an anonymous method to parse content objects from the media module. We - # do this to avoid excessive duplication of code since we have to do identical - # processing for content objects within group objects. + # Creates an anonymous method to parse content objects from the media + # module. We do this to avoid excessive duplication of code since we + # have to do identical processing for content objects within group + # objects. parse_media_content = lambda do |media_content_nodes| affected_enclosures = [] for enclosure_node in media_content_nodes - enclosure_url = FeedTools.unescape_entities(enclosure_node.attributes["url"].to_s) + enclosure_url = FeedTools.unescape_entities( + enclosure_node.attributes["url"].to_s) enclosure = nil new_enclosure = false for existing_enclosure in @enclosures if existing_enclosure.url == enclosure_url enclosure = existing_enclosure @@ -847,77 +845,88 @@ enclosure.duration = enclosure_node.attributes["duration"].to_s enclosure.height = enclosure_node.attributes["height"].to_i enclosure.width = enclosure_node.attributes["width"].to_i enclosure.bitrate = enclosure_node.attributes["bitrate"].to_i enclosure.framerate = enclosure_node.attributes["framerate"].to_i - enclosure.expression = enclosure_node.attributes["expression"].to_s + enclosure.expression = + enclosure_node.attributes["expression"].to_s enclosure.is_default = (enclosure_node.attributes["isDefault"].to_s.downcase == "true") - if XPath.first(enclosure_node, "media:thumbnail/@url").to_s != "" + enclosure_thumbnail_url = try_xpaths(enclosure_node, + ["media:thumbnail/@url"], :select_result_value => true) + if !enclosure_thumbnail_url.blank? enclosure.thumbnail = EnclosureThumbnail.new( - FeedTools.unescape_entities(XPath.first(enclosure_node, "media:thumbnail/@url").to_s), - FeedTools.unescape_entities(XPath.first(enclosure_node, "media:thumbnail/@height").to_s), - FeedTools.unescape_entities(XPath.first(enclosure_node, "media:thumbnail/@width").to_s) + FeedTools.unescape_entities(enclosure_thumbnail_url), + FeedTools.unescape_entities( + try_xpaths(enclosure_node, ["media:thumbnail/@height"], + :select_result_value => true)), + FeedTools.unescape_entities( + try_xpaths(enclosure_node, ["media:thumbnail/@width"], + :select_result_value => true)) ) - if enclosure.thumbnail.height == "" - enclosure.thumbnail.height = nil - end - if enclosure.thumbnail.width == "" - enclosure.thumbnail.width = nil - end end enclosure.categories = [] - for category in XPath.match(enclosure_node, "media:category") + for category in try_xpaths_all(enclosure_node, ["media:category"]) enclosure.categories << FeedTools::Feed::Category.new enclosure.categories.last.term = - FeedTools.unescape_entities(category.text) + FeedTools.unescape_entities(category.inner_xml) enclosure.categories.last.scheme = - FeedTools.unescape_entities(category.attributes["scheme"].to_s) + FeedTools.unescape_entities( + category.attributes["scheme"].to_s) enclosure.categories.last.label = - FeedTools.unescape_entities(category.attributes["label"].to_s) - if enclosure.categories.last.scheme == "" + FeedTools.unescape_entities( + category.attributes["label"].to_s) + if enclosure.categories.last.scheme.blank? enclosure.categories.last.scheme = nil end - if enclosure.categories.last.label == "" + if enclosure.categories.last.label.blank? enclosure.categories.last.label = nil end end - if XPath.first(enclosure_node, "media:hash/text()").to_s != "" + enclosure_media_hash = try_xpaths(enclosure_node, + ["media:hash/text()"], :select_result_value => true) + if !enclosure_media_hash.nil? enclosure.hash = EnclosureHash.new( - FeedTools.sanitize_html(FeedTools.unescape_entities(XPath.first( - enclosure_node, "media:hash/text()").to_s), :strip), + FeedTools.sanitize_html(FeedTools.unescape_entities( + enclosure_media_hash), :strip), "md5" ) end - if XPath.first(enclosure_node, "media:player/@url").to_s != "" + enclosure_media_player_url = try_xpaths(enclosure_node, + ["media:player/@url"], :select_result_value => true) + if !enclosure_media_player_url.blank? enclosure.player = EnclosurePlayer.new( - FeedTools.unescape_entities(XPath.first(enclosure_node, "media:player/@url").to_s), - FeedTools.unescape_entities(XPath.first(enclosure_node, "media:player/@height").to_s), - FeedTools.unescape_entities(XPath.first(enclosure_node, "media:player/@width").to_s) + FeedTools.unescape_entities(enclosure_media_player_url), + FeedTools.unescape_entities( + try_xpaths(enclosure_node, + ["media:player/@height"], :select_result_value => true)), + FeedTools.unescape_entities( + try_xpaths(enclosure_node, + ["media:player/@width"], :select_result_value => true)) ) - if enclosure.player.height == "" - enclosure.player.height = nil - end - if enclosure.player.width == "" - enclosure.player.width = nil - end end enclosure.credits = [] - for credit in XPath.match(enclosure_node, "media:credit") + for credit in try_xpaths_all(enclosure_node, ["media:credit"]) enclosure.credits << EnclosureCredit.new( - FeedTools.unescape_entities(credit.text), - FeedTools.unescape_entities(credit.attributes["role"].to_s.downcase) + FeedTools.unescape_entities(credit.inner_xml.to_s.strip), + FeedTools.unescape_entities( + credit.attributes["role"].to_s.downcase) ) - if enclosure.credits.last.role == "" + if enclosure.credits.last.name.blank? + enclosure.credits.last.name = nil + end + if enclosure.credits.last.role.blank? enclosure.credits.last.role = nil end end - enclosure.explicit = (XPath.first(enclosure_node, - "media:adult/text()").to_s.downcase == "true") - if XPath.first(enclosure_node, "media:text/text()").to_s != "" - enclosure.text = FeedTools.unescape_entities(XPath.first(enclosure_node, - "media:text/text()").to_s) + enclosure.explicit = (try_xpaths(enclosure_node, + ["media:adult/text()"]).to_s.downcase == "true") + enclosure_media_text = + try_xpaths(enclosure_node, ["media:text/text()"]) + if !enclosure_media_text.blank? + enclosure.text = FeedTools.unescape_entities( + enclosure_media_text) end affected_enclosures << enclosure if new_enclosure @enclosures << enclosure end @@ -931,95 +940,100 @@ media_groups = [] # Parse the group objects. for media_group in media_group_enclosures group_media_content_enclosures = - XPath.match(media_group, "media:content") + try_xpaths_all(media_group, ["media:content"]) # Parse the content objects within the group objects. affected_enclosures = parse_media_content.call(group_media_content_enclosures) # Now make sure that content objects inherit certain properties from # the group objects. for enclosure in affected_enclosures - if enclosure.thumbnail.nil? && - XPath.first(media_group, "media:thumbnail/@url").to_s != "" + media_group_thumbnail = try_xpaths(media_group, + ["media:thumbnail/@url"], :select_result_value => true) + if enclosure.thumbnail.nil? && !media_group_thumbnail.blank? enclosure.thumbnail = EnclosureThumbnail.new( FeedTools.unescape_entities( - XPath.first(media_group, "media:thumbnail/@url").to_s), + media_group_thumbnail), FeedTools.unescape_entities( - XPath.first(media_group, "media:thumbnail/@height").to_s), + try_xpaths(media_group, ["media:thumbnail/@height"], + :select_result_value => true)), FeedTools.unescape_entities( - XPath.first(media_group, "media:thumbnail/@width").to_s) + try_xpaths(media_group, ["media:thumbnail/@width"], + :select_result_value => true)) ) - if enclosure.thumbnail.height == "" - enclosure.thumbnail.height = nil - end - if enclosure.thumbnail.width == "" - enclosure.thumbnail.width = nil - end end - if (enclosure.categories.nil? || enclosure.categories.size == 0) + if (enclosure.categories.blank?) enclosure.categories = [] - for category in XPath.match(media_group, "media:category") + for category in try_xpaths_all(media_group, ["media:category"]) enclosure.categories << FeedTools::Feed::Category.new enclosure.categories.last.term = - FeedTools.unescape_entities(category.text) + FeedTools.unescape_entities(category.inner_xml) enclosure.categories.last.scheme = - FeedTools.unescape_entities(category.attributes["scheme"].to_s) + FeedTools.unescape_entities( + category.attributes["scheme"].to_s) enclosure.categories.last.label = - FeedTools.unescape_entities(category.attributes["label"].to_s) - if enclosure.categories.last.scheme == "" + FeedTools.unescape_entities( + category.attributes["label"].to_s) + if enclosure.categories.last.scheme.blank? enclosure.categories.last.scheme = nil end - if enclosure.categories.last.label == "" + if enclosure.categories.last.label.blank? enclosure.categories.last.label = nil end end end - if enclosure.hash.nil? && - XPath.first(media_group, "media:hash/text()").to_s != "" + enclosure_media_group_hash = try_xpaths(enclosure_node, + ["media:hash/text()"], :select_result_value => true) + if enclosure.hash.nil? && !enclosure_media_group_hash.blank? enclosure.hash = EnclosureHash.new( - FeedTools.unescape_entities(XPath.first(media_group, "media:hash/text()").to_s), + FeedTools.sanitize_html(FeedTools.unescape_entities( + enclosure_media_group_hash), :strip), "md5" ) end - if enclosure.player.nil? && - XPath.first(media_group, "media:player/@url").to_s != "" + enclosure_media_group_url = try_xpaths(media_group, + "media:player/@url", :select_result_value => true) + if enclosure.player.nil? && !enclosure_media_group_url.blank? enclosure.player = EnclosurePlayer.new( - FeedTools.unescape_entities(XPath.first(media_group, "media:player/@url").to_s), - FeedTools.unescape_entities(XPath.first(media_group, "media:player/@height").to_s), - FeedTools.unescape_entities(XPath.first(media_group, "media:player/@width").to_s) + FeedTools.unescape_entities(enclosure_media_group_url), + FeedTools.unescape_entities( + try_xpaths(media_group, ["media:player/@height"], + :select_result_value => true)), + FeedTools.unescape_entities( + try_xpaths(media_group, ["media:player/@width"], + :select_result_value => true)) ) - if enclosure.player.height == "" - enclosure.player.height = nil - end - if enclosure.player.width == "" - enclosure.player.width = nil - end end if enclosure.credits.nil? || enclosure.credits.size == 0 enclosure.credits = [] - for credit in XPath.match(media_group, "media:credit") + for credit in try_xpaths_all(media_group, ["media:credit"]) enclosure.credits << EnclosureCredit.new( - FeedTools.unescape_entities(credit.text), - FeedTools.unescape_entities(credit.attributes["role"].to_s.downcase) + FeedTools.unescape_entities(credit.inner_xml), + FeedTools.unescape_entities( + credit.attributes["role"].to_s.downcase) ) - if enclosure.credits.last.role == "" + if enclosure.credits.last.role.blank? enclosure.credits.last.role = nil end end end if enclosure.explicit?.nil? - enclosure.explicit = (XPath.first(media_group, - "media:adult/text()").to_s.downcase == "true") ? true : false + enclosure.explicit = ((try_xpaths(media_group, [ + "media:adult/text()" + ], :select_result_value => true).downcase == "true") ? + true : false) end - if enclosure.text.nil? && - XPath.first(media_group, "media:text/text()").to_s != "" - enclosure.text = FeedTools.sanitize_html(FeedTools.unescape_entities( - XPath.first(media_group, "media:text/text()").to_s), :strip) + enclosure_media_group_text = try_xpaths(media_group, + ["media:text/text()"], :select_result_value => true) + if enclosure.text.nil? && !enclosure_media_group_text.blank? + enclosure.text = FeedTools.sanitize_html( + FeedTools.unescape_entities( + enclosure_media_group_text), :strip) end end # Keep track of the media groups media_groups << affected_enclosures @@ -1031,14 +1045,18 @@ enclosure.explicit = true end end # Add all the itunes categories - for itunes_category in XPath.match(root_node, "itunes:category") + itunes_categories = + try_xpaths_all(self.root_node, ["itunes:category"]) + for itunes_category in itunes_categories genre = "Podcasts" category = itunes_category.attributes["text"].to_s - subcategory = XPath.first(itunes_category, "itunes:category/@text").to_s + subcategory = + try_xpaths(itunes_category, ["itunes:category/@text"], + :select_result_value => true) category_path = genre if category != "" category_path << "/" + category end if subcategory != "" @@ -1059,11 +1077,11 @@ end for enclosure in @enclosures # Clean up any of those attributes that incorrectly have "" # or 0 as their values - if enclosure.type == "" + if enclosure.type.blank? enclosure.type = nil end if enclosure.file_size == 0 enclosure.file_size = nil end @@ -1080,16 +1098,17 @@ enclosure.bitrate = nil end if enclosure.framerate == 0 enclosure.framerate = nil end - if enclosure.expression == "" || enclosure.expression.nil? + if enclosure.expression.blank? enclosure.expression = "full" end - # If an enclosure is missing the text field, fall back on the itunes:summary field - if enclosure.text.nil? || enclosure.text = "" + # If an enclosure is missing the text field, fall back on the + # itunes:summary field + if enclosure.text.blank? enclosure.text = self.itunes_summary end # Make sure we don't have duplicate categories unless enclosure.categories.nil? @@ -1126,12 +1145,12 @@ end @enclosures << default_enclosure end end - # If we have a single enclosure, it's safe to inherit the itunes:duration field - # if it's missing. + # If we have a single enclosure, it's safe to inherit the + # itunes:duration field if it's missing. if @enclosures.size == 1 if @enclosures.first.duration.nil? || @enclosures.first.duration == 0 @enclosures.first.duration = self.itunes_duration end end @@ -1145,52 +1164,33 @@ # Returns the feed item author def author if @author.nil? @author = FeedTools::Feed::Author.new - unless root_node.nil? - author_node = XPath.first(root_node, "atom10:author", - FEED_TOOLS_NAMESPACES) - if author_node.nil? - author_node = XPath.first(root_node, "atom03:author", - FEED_TOOLS_NAMESPACES) - end - if author_node.nil? - author_node = XPath.first(root_node, "atom:author") - end - if author_node.nil? - author_node = XPath.first(root_node, "author") - end - if author_node.nil? - author_node = XPath.first(root_node, "managingEditor") - end - if author_node.nil? - author_node = XPath.first(root_node, "dc:author", - FEED_TOOLS_NAMESPACES) - end - if author_node.nil? - author_node = XPath.first(root_node, "dc:author") - end - if author_node.nil? - author_node = XPath.first(root_node, "dc:creator", - FEED_TOOLS_NAMESPACES) - end - if author_node.nil? - author_node = XPath.first(root_node, "dc:creator") - end - end + author_node = try_xpaths(self.root_node, [ + "atom10:author", + "atom03:author", + "atom:author", + "author", + "managingEditor", + "dc:author", + "dc:creator", + "creator" + ]) unless author_node.nil? @author.raw = FeedTools.unescape_entities( - XPath.first(author_node, "text()").to_s) - @author.raw = nil if @author.raw == "" + XPath.first(author_node, "text()").to_s).strip + @author.raw = nil if @author.raw.blank? unless @author.raw.nil? raw_scan = @author.raw.scan( /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i) if raw_scan.nil? || raw_scan.size == 0 raw_scan = @author.raw.scan( /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i) - author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0 + unless raw_scan.size == 0 + author_raw_pair = raw_scan.first.reverse + end else author_raw_pair = raw_scan.first end if raw_scan.nil? || raw_scan.size == 0 email_scan = @author.raw.scan( @@ -1203,46 +1203,48 @@ @author.name = author_raw_pair.first.strip @author.email = author_raw_pair.last.strip else unless @author.raw.include?("@") # We can be reasonably sure we are looking at something - # that the creator didn't intend to contain an email address if - # it got through the preceeding regexes and it doesn't + # that the creator didn't intend to contain an email address + # if it got through the preceeding regexes and it doesn't # contain the tell-tale '@' symbol. @author.name = @author.raw end end end - @author.name = "" if @author.name.nil? - if @author.name == "" + if @author.name.blank? @author.name = FeedTools.unescape_entities( - XPath.first(author_node, "name/text()").to_s) + try_xpaths(author_node, [ + "name/text()", + "@name" + ], :select_result_value => true) + ) end - if @author.name == "" - @author.name = FeedTools.unescape_entities( - XPath.first(author_node, "@name").to_s) - end - if @author.email == "" + if @author.email.blank? @author.email = FeedTools.unescape_entities( - XPath.first(author_node, "email/text()").to_s) + try_xpaths(author_node, [ + "email/text()", + "@email" + ], :select_result_value => true) + ) end - if @author.email == "" - @author.email = FeedTools.unescape_entities( - XPath.first(author_node, "@email").to_s) - end - if @author.url == "" + if @author.url.blank? @author.url = FeedTools.unescape_entities( - XPath.first(author_node, "url/text()").to_s) + try_xpaths(author_node, [ + "url/text()", + "uri/text()", + "@url", + "@uri", + "@href" + ], :select_result_value => true) + ) end - if @author.url == "" - @author.url = FeedTools.unescape_entities( - XPath.first(author_node, "@url").to_s) - end - @author.name = nil if @author.name == "" - @author.raw = nil if @author.raw == "" - @author.email = nil if @author.email == "" - @author.url = nil if @author.url == "" + @author.name = nil if @author.name.blank? + @author.raw = nil if @author.raw.blank? + @author.email = nil if @author.email.blank? + @author.url = nil if @author.url.blank? end # Fallback on the itunes module if we didn't find an author name begin @author.name = self.itunes_author if @author.name.nil? rescue @@ -1274,16 +1276,15 @@ if @publisher.nil? @publisher = FeedTools::Feed::Author.new # Set the author name @publisher.raw = FeedTools.unescape_entities( - XPath.first(root_node, "dc:publisher/text()").to_s) - if @publisher.raw == "" - @publisher.raw = FeedTools.unescape_entities( - XPath.first(root_node, "webMaster/text()").to_s) - end - unless @publisher.raw == "" + try_xpaths(self.root_node, [ + "dc:publisher/text()", + "webMaster/text()" + ], :select_result_value => true)) + unless @publisher.raw.blank? raw_scan = @publisher.raw.scan( /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i) if raw_scan.nil? || raw_scan.size == 0 raw_scan = @publisher.raw.scan( /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i) @@ -1312,14 +1313,14 @@ @publisher.name = @publisher.raw end end end - @publisher.name = nil if @publisher.name == "" - @publisher.raw = nil if @publisher.raw == "" - @publisher.email = nil if @publisher.email == "" - @publisher.url = nil if @publisher.url == "" + @publisher.name = nil if @publisher.name.blank? + @publisher.raw = nil if @publisher.raw.blank? + @publisher.email = nil if @publisher.email.blank? + @publisher.url = nil if @publisher.url.blank? end return @publisher end # Sets the feed publisher @@ -1343,14 +1344,14 @@ # # This inherits from any incorrectly placed channel-level itunes:author # elements. They're actually amazingly common. People don't read specs. def itunes_author if @itunes_author.nil? - @itunes_author = FeedTools.unescape_entities(XPath.first(root_node, - "itunes:author/text()").to_s) - @itunes_author = feed.itunes_author if @itunes_author == "" - @itunes_author = nil if @itunes_author == "" + @itunes_author = FeedTools.unescape_entities( + try_xpaths(self.root_node, + ["itunes:author/text()"], :select_result_value => true)) + @itunes_author = feed.itunes_author if @itunes_author.blank? end return @itunes_author end # Sets the contents of the itunes:author element @@ -1359,18 +1360,19 @@ end # Returns the number of seconds that the associated media runs for def itunes_duration if @itunes_duration.nil? - raw_duration = FeedTools.unescape_entities(XPath.first(root_node, - "itunes:duration/text()").to_s) - if raw_duration != "" + raw_duration = FeedTools.unescape_entities( + try_xpaths(self.root_node, + ["itunes:duration/text()"], :select_result_value => true)) + if !raw_duration.blank? hms = raw_duration.split(":").map { |x| x.to_i } if hms.size == 3 - @itunes_duration = hms[0].hour + hms[1].minute + hms[2] + @itunes_duration = hms[0].hours + hms[1].minutes + hms[2] elsif hms.size == 2 - @itunes_duration = hms[0].minute + hms[1] + @itunes_duration = hms[0].minutes + hms[1] elsif hms.size == 1 @itunes_duration = hms[0] end end end @@ -1386,44 +1388,56 @@ def time(options = {}) validate_options([ :estimate_timestamp ], options.keys) options = { :estimate_timestamp => true }.merge(options) if @time.nil? - unless root_node.nil? - time_string = XPath.first(root_node, "pubDate/text()").to_s - if time_string == "" - time_string = XPath.first(root_node, "dc:date/text()").to_s - end - if time_string == "" - time_string = XPath.first(root_node, "issued/text()").to_s - end - if time_string == "" - time_string = XPath.first(root_node, "updated/text()").to_s - end - if time_string == "" - time_string = XPath.first(root_node, "time/text()").to_s - end - end + time_string = try_xpaths(self.root_node, [ + "atom10:updated/text()", + "atom03:updated/text()", + "atom:updated/text()", + "updated/text()", + "atom10:modified/text()", + "atom03:modified/text()", + "atom:modified/text()", + "modified/text()", + "time/text()", + "atom10:issued/text()", + "atom03:issued/text()", + "atom:issued/text()", + "issued/text()", + "atom10:published/text()", + "atom03:published/text()", + "atom:published/text()", + "published/text()", + "pubDate/text()", + "dc:date/text()", + "date/text()" + ], :select_result_value => true) begin - time_string = "" if time_string.nil? - if time_string != "" + if !time_string.blank? @time = Time.parse(time_string).gmtime + elsif FeedTools.configurations[:timestamp_estimation_enabled] && + !self.title.nil? && + (Time.parse(self.title) - Time.now).abs > 100 + @time = Time.parse(self.title).gmtime end rescue end - if options[:estimate_timestamp] - if @time.nil? - begin - @time = succ_time + if FeedTools.configurations[:timestamp_estimation_enabled] + if options[:estimate_timestamp] + if @time.nil? + begin + @time = succ_time + if @time.nil? + @time = prev_time + end + rescue + end if @time.nil? - @time = prev_time + @time = Time.now.gmtime end - rescue end - if @time.nil? - @time = Time.now.gmtime - end end end end return @time end @@ -1438,14 +1452,14 @@ begin parent_feed = self.feed if parent_feed.nil? return nil end - if parent_feed.instance_variable_get("@items").nil? + if parent_feed.instance_variable_get("@entries").nil? parent_feed.items end - unsorted_items = parent_feed.instance_variable_get("@items") + unsorted_items = parent_feed.instance_variable_get("@entries") item_index = unsorted_items.index(self) if item_index.nil? return nil end if item_index <= 0 @@ -1455,23 +1469,23 @@ return (previous_item.time(:estimate_timestamp => false) + 1) rescue return nil end end - #private :succ_time + private :succ_time # Returns 1 second before the succeeding item's time. def prev_time #:nodoc: begin parent_feed = self.feed if parent_feed.nil? return nil end - if parent_feed.instance_variable_get("@items").nil? + if parent_feed.instance_variable_get("@entries").nil? parent_feed.items end - unsorted_items = parent_feed.instance_variable_get("@items") + unsorted_items = parent_feed.instance_variable_get("@entries") item_index = unsorted_items.index(self) if item_index.nil? return nil end if item_index >= (unsorted_items.size - 1) @@ -1481,22 +1495,26 @@ return (succeeding_item.time(:estimate_timestamp => false) - 1) rescue return nil end end - #private :prev_time - + private :prev_time + # Returns the feed item updated time def updated if @updated.nil? - unless root_node.nil? - updated_string = XPath.first(root_node, "updated/text()").to_s - if updated_string == "" - updated_string = XPath.first(root_node, "modified/text()").to_s - end - end - if updated_string != nil && updated_string != "" + updated_string = try_xpaths(self.root_node, [ + "atom10:updated/text()", + "atom03:updated/text()", + "atom:updated/text()", + "updated/text()", + "atom10:modified/text()", + "atom03:modified/text()", + "atom:modified/text()", + "modified/text()" + ], :select_result_value => true) + if !updated_string.blank? @updated = Time.parse(updated_string).gmtime rescue nil else @updated = nil end end @@ -1506,45 +1524,48 @@ # Sets the feed item updated time def updated=(new_updated) @updated = new_updated end - # Returns the feed item issued time - def issued - if @issued.nil? - unless root_node.nil? - issued_string = XPath.first(root_node, "issued/text()").to_s - if issued_string == "" - issued_string = XPath.first(root_node, "published/text()").to_s - end - if issued_string == "" - issued_string = XPath.first(root_node, "pubDate/text()").to_s - end - if issued_string == "" - issued_string = XPath.first(root_node, "dc:date/text()").to_s - end - end - if issued_string != nil && issued_string != "" - @issued = Time.parse(issued_string).gmtime rescue nil + # Returns the feed item published time + def published + if @published.nil? + published_string = try_xpaths(self.root_node, [ + "atom10:issued/text()", + "atom03:issued/text()", + "atom:issued/text()", + "issued/text()", + "atom10:published/text()", + "atom03:published/text()", + "atom:published/text()", + "published/text()", + "pubDate/text()", + "dc:date/text()", + "date/text()" + ], :select_result_value => true) + if !published_string.blank? + @issued = Time.parse(published_string).gmtime rescue nil else @issued = nil end end return @issued end - # Sets the feed item issued time - def issued=(new_issued) - @issued = new_issued + # Sets the feed item published time + def published=(new_published) + @published = new_published end # Returns the url for posting comments def comments if @comments.nil? - @comments = FeedTools.normalize_url( - XPath.first(root_node, "comments/text()").to_s) - @comments = nil if @comments == "" + @comments = try_xpaths(self.root_node, ["comments/text()"], + :select_result_value => true) + if FeedTools.configurations[:url_normalization_enabled] + @comments = FeedTools.normalize_url(@comments) + end end return @comments end # Sets the url for posting comments @@ -1554,14 +1575,14 @@ # The source that this post was based on def source if @source.nil? @source = FeedTools::Feed::Link.new - @source.url = XPath.first(root_node, "source/@url").to_s - @source.url = nil if @source.url == "" - @source.value = XPath.first(root_node, "source/text()").to_s - @source.value = nil if @source.value == "" + @source.url = try_xpaths(self.root_node, ["source/@url"], + :select_result_value => true) + @source.value = try_xpaths(self.root_node, ["source/text()"], + :select_result_value => true) end return @source end # Returns the feed item tags @@ -1573,55 +1594,70 @@ if root_node.nil? return @tags end if @tags.nil? || @tags.size == 0 @tags = [] - tag_list = XPath.match(root_node, "dc:subject/rdf:Bag/rdf:li/text()") - if tag_list.nil? || tag_list.size == 0 - tag_list = XPath.match(root_node, - "dc:subject/rdf:Bag/rdf:li/text()", FEED_TOOLS_NAMESPACES) - end - if tag_list != nil && tag_list.size > 1 + tag_list = try_xpaths_all(self.root_node, + ["dc:subject/rdf:Bag/rdf:li/text()"], + :select_result_value => true) + if tag_list != nil && tag_list.size > 0 for tag in tag_list - @tags << tag.to_s.downcase.strip + @tags << tag.downcase.strip end end end if @tags.nil? || @tags.size == 0 # messy effort to find ourselves some tags, mainly for del.icio.us @tags = [] - rdf_bag = XPath.match(root_node, "taxo:topics/rdf:Bag/rdf:li") + rdf_bag = try_xpaths_all(self.root_node, + ["taxo:topics/rdf:Bag/rdf:li"]) if rdf_bag != nil && rdf_bag.size > 0 for tag_node in rdf_bag begin - tag_url = XPath.first(root_node, "@resource").to_s - tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)/) + tag_url = try_xpaths(tag_node, ["@resource"], + :select_result_value => true) + tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)$/) if tag_match.size > 0 @tags << tag_match.first.last.downcase.strip end rescue end end end end if @tags.nil? || @tags.size == 0 @tags = [] - tag_list = XPath.match(root_node, "category/text()") + tag_list = try_xpaths_all(self.root_node, ["category/text()"], + :select_result_value => true) for tag in tag_list @tags << tag.to_s.downcase.strip end end if @tags.nil? || @tags.size == 0 @tags = [] - tag_list = XPath.match(root_node, "dc:subject/text()") + tag_list = try_xpaths_all(self.root_node, ["dc:subject/text()"], + :select_result_value => true) for tag in tag_list @tags << tag.to_s.downcase.strip end end - if @tags.nil? || @tags.size == 0 + if @tags.blank? begin - @tags = XPath.first(root_node, "itunes:keywords/text()").to_s.downcase.split(" ") + itunes_keywords_string = try_xpaths(self.root_node, [ + "itunes:keywords/text()" + ], :select_result_value => true) + unless itunes_keywords_string.blank? + @tags = itunes_keywords_string.downcase.split(",") + if @tags.size == 1 + @tags = itunes_keywords_string.downcase.split(" ") + @tags = @tags.map { |tag| tag.chomp(",") } + end + if @tags.size == 1 + @tags = itunes_keywords_string.downcase.split(",") + end + @tags = @tags.map { |tag| tag.strip } + end rescue @tags = [] end end if @tags.nil? @@ -1640,16 +1676,15 @@ # Returns true if this feed item contains explicit material. If the whole # feed has been marked as explicit, this will return true even if the item # isn't explicitly marked as explicit. def explicit? if @explicit.nil? - if XPath.first(root_node, - "media:adult/text()").to_s.downcase == "true" || - XPath.first(root_node, - "itunes:explicit/text()").to_s.downcase == "yes" || - XPath.first(root_node, - "itunes:explicit/text()").to_s.downcase == "true" || + explicit_string = try_xpaths(self.root_node, [ + "media:adult/text()", + "itunes:explicit/text()" + ], :select_result_value => true) + if explicit_string == "true" || explicit_string == "yes" || feed.explicit? @explicit = true else @explicit = false end @@ -1667,23 +1702,25 @@ def build_xml_hook(feed_type, version, xml_builder) return nil end # Generates xml based on the content of the feed item - def build_xml(feed_type=(self.feed.feed_type or "rss"), version=nil, - xml_builder=Builder::XmlMarkup.new(:indent => 2)) + def build_xml(feed_type=(self.feed.feed_type or "atom"), version=nil, + xml_builder=Builder::XmlMarkup.new( + :indent => 2, :escape_attrs => false)) if feed_type == "rss" && (version == nil || version == 0.0) version = 1.0 elsif feed_type == "atom" && (version == nil || version == 0.0) version = 1.0 end if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1) # RDF-based rss format if link.nil? raise "Cannot generate an rdf-based feed item with a nil link field." end - return xml_builder.item("rdf:about" => CGI.escapeHTML(link)) do + return xml_builder.item("rdf:about" => + FeedTools.escape_entities(link)) do unless title.nil? || title == "" xml_builder.title(title) else xml_builder.title end @@ -1706,84 +1743,95 @@ for tag in tags xml_builder.tag!("rdf:li", tag) end end end - xml_builder.tag!("itunes:keywords", tags.join(" ")) + if self.feed.podcast? + xml_builder.tag!("itunes:keywords", tags.join(", ")) + end end build_xml_hook(feed_type, version, xml_builder) end elsif feed_type == "rss" # normal rss format return xml_builder.item do - unless title.nil? || title == "" - xml_builder.title(title) + unless self.title.blank? + xml_builder.title(self.title) end - unless link.nil? || link == "" + unless self.link.blank? xml_builder.link(link) end - unless description.nil? || description == "" - xml_builder.description(description) + unless self.description.blank? + xml_builder.description(self.description) end - unless time.nil? - xml_builder.pubDate(time.rfc822) + unless self.time.nil? + xml_builder.pubDate(self.time.rfc822) end + unless self.guid.blank? + if FeedTools.is_uri?(self.guid) + xml_builder.guid(self.guid, "isPermaLink" => "true") + else + xml_builder.guid(self.guid, "isPermaLink" => "false") + end + else + unless self.link.blank? + xml_builder.guid(self.link, "isPermaLink" => "true") + end + end unless tags.nil? || tags.size == 0 xml_builder.tag!("taxo:topics") do xml_builder.tag!("rdf:Bag") do for tag in tags xml_builder.tag!("rdf:li", tag) end end end - xml_builder.tag!("itunes:keywords", tags.join(" ")) - end - build_xml_hook(feed_type, version, xml_builder) - end - elsif feed_type == "atom" && version == 0.3 - # normal atom format - return xml_builder.entry("xmlns" => - FEED_TOOLS_NAMESPACES['atom03']) do - unless title.nil? || title == "" - xml_builder.title(title, - "mode" => "escaped", - "type" => "text/html") - end - xml_builder.author do - unless self.author.nil? || self.author.name.nil? - xml_builder.name(self.author.name) - else - xml_builder.name("n/a") + if self.feed.podcast? + xml_builder.tag!("itunes:keywords", tags.join(", ")) end - unless self.author.nil? || self.author.email.nil? - xml_builder.email(self.author.email) - end - unless self.author.nil? || self.author.url.nil? - xml_builder.url(self.author.url) - end end - unless link.nil? || link == "" - xml_builder.link("href" => link, - "rel" => "alternate", - "type" => "text/html", - "title" => title) - end - unless description.nil? || description == "" - xml_builder.content(description, - "mode" => "escaped", - "type" => "text/html") - end - unless time.nil? - xml_builder.issued(time.iso8601) - end - unless tags.nil? || tags.size == 0 - for tag in tags - xml_builder.category(tag) + unless self.enclosures.blank? || self.enclosures.size == 0 + for enclosure in self.enclosures + attribute_hash = {} + next if enclosure.url.blank? + begin + if enclosure.file_size.blank? || enclosure.file_size.to_i == 0 + # We can't use this enclosure because it's missing the + # required file size. Check alternate versions for + # file_size. + if !enclosure.versions.blank? && enclosure.versions.size > 0 + for alternate in enclosure.versions + if alternate.file_size != nil && + alternate.file_size.to_i > 0 + enclosure = alternate + break + end + end + end + end + rescue + end + attribute_hash["url"] = FeedTools.normalize_url(enclosure.url) + if enclosure.type != nil + attribute_hash["type"] = enclosure.type + end + if enclosure.file_size != nil && enclosure.file_size.to_i > 0 + attribute_hash["length"] = enclosure.file_size.to_s + else + # We couldn't find an alternate and the problem is still + # there. Give up and go on. + xml_builder.comment!( + "*** Enclosure failed to include file size. Ignoring. ***") + next + end + xml_builder.enclosure(attribute_hash) end end build_xml_hook(feed_type, version, xml_builder) end + elsif feed_type == "atom" && version == 0.3 + raise "Atom 0.3 is obsolete." elsif feed_type == "atom" && version == 1.0 # normal atom format return xml_builder.entry("xmlns" => FEED_TOOLS_NAMESPACES['atom10']) do unless title.nil? || title == "" @@ -1798,24 +1846,24 @@ end unless self.author.nil? || self.author.email.nil? xml_builder.email(self.author.email) end unless self.author.nil? || self.author.url.nil? - xml_builder.url(self.author.url) + xml_builder.uri(self.author.url) end end unless link.nil? || link == "" - xml_builder.link("href" => link, + xml_builder.link("href" => FeedTools.escape_entities(self.link), "rel" => "alternate", "type" => "text/html", - "title" => title) + "title" => FeedTools.escape_entities(title)) end - unless description.nil? || description == "" + if !description.blank? xml_builder.content(description, "type" => "html") - else - xml_builder.content(FeedTools.no_content_string, + elsif !FeedTools.configurations[:no_content_string].blank? + xml_builder.content(FeedTools.configurations[:no_content_string], "type" => "html") end if self.updated != nil xml_builder.updated(self.updated.iso8601) elsif self.time != nil @@ -1849,28 +1897,39 @@ unless self.tags.nil? || self.tags.size == 0 for tag in self.tags xml_builder.category("term" => tag) end end + unless self.enclosures.blank? || self.enclosures.size == 0 + for enclosure in self.enclosures + attribute_hash = {} + next if enclosure.url.blank? + attribute_hash["rel"] = "enclosure" + attribute_hash["href"] = FeedTools.normalize_url(enclosure.url) + if enclosure.type != nil + attribute_hash["type"] = enclosure.type + end + if enclosure.file_size != nil && enclosure.file_size.to_i > 0 + attribute_hash["length"] = enclosure.file_size.to_s + end + xml_builder.link(attribute_hash) + end + end build_xml_hook(feed_type, version, xml_builder) end + else + raise "Unsupported feed format/version." end end - alias_method :tagline, :description - alias_method :tagline=, :description= - alias_method :subtitle, :description - alias_method :subtitle=, :description= - alias_method :summary, :description - alias_method :summary=, :description= - alias_method :abstract, :description - alias_method :abstract=, :description= - alias_method :content, :description - alias_method :content=, :description= + alias_method :summary, :content + alias_method :summary=, :content= + alias_method :abstract, :content + alias_method :abstract=, :content= + alias_method :description, :content + alias_method :description=, :content= alias_method :guid, :id alias_method :guid=, :id= - alias_method :published, :issued - alias_method :published=, :issued= # Returns a simple representation of the feed item object's state. def inspect return "#<FeedTools::FeedItem:0x#{self.object_id.to_s(16)} " + "LINK:#{self.link}>"