# Copyright (c) 2008 The Kaphan Foundation # # For licensing information see LICENSE.txt. =begin License.txt Copyright (c) 2008 Peerworks Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. =end require 'forwardable' require 'delegate' require 'rubygems' require 'xml/libxml' require 'atom/xml/parser.rb' module Atom # :nodoc: NAMESPACE = 'http://www.w3.org/2005/Atom' unless defined?(NAMESPACE) module Pub NAMESPACE = 'http://www.w3.org/2007/app' end # Raised when a Parsing Error occurs. class ParseError < StandardError; end # Raised when a Serialization Error occurs. class SerializationError < StandardError; end # Provides support for reading and writing simple extensions as defined by the Atom Syndication Format. # # A Simple extension is an element from a non-atom namespace that has no attributes and only contains # text content. It is interpreted as a key-value pair when the namespace and the localname of the # extension make up the key. Since in XML you can have many instances of an element, the values are # represented as an array of strings, so to manipulate the values manipulate the array returned by # +[ns, localname]+. # module SimpleExtensions attr_reader :simple_extensions # Gets a simple extension value for a given namespace and local name. # # +ns+:: The namespace. # +localname+:: The local name of the extension element. # def [](ns, localname) if !defined?(@simple_extensions) || @simple_extensions.nil? @simple_extensions = {} end key = "{#{ns},#{localname}}" (@simple_extensions[key] or @simple_extensions[key] = ValueProxy.new) end class ValueProxy < DelegateClass(Array) attr_accessor :as_attribute def initialize super([]) @as_attribute = false end end end # Represents a Generator as defined by the Atom Syndication Format specification. # # The generator identifies an agent or engine used to a produce a feed. # # See also http://www.atomenabled.org/developers/syndication/atom-format-spec.php#element.generator class Generator include Xml::Parseable attr_accessor :name attribute :uri, :version # Initialize a new Generator. # # +xml+:: An XML::Reader object. # def initialize(o = nil) case o when XML::Reader @name = o.read_string.strip parse(o, :once => true) when Hash o.each do |k, v| self.send("#{k.to_s}=", v) end end yield(self) if block_given? end end # Represents a Category as defined by the Atom Syndication Format specification. # # class Category include Atom::Xml::Parseable include SimpleExtensions attribute :label, :scheme, :term def initialize(o = nil) case o when XML::Reader parse(o, :once => true) when Hash o.each do |k, v| self.send("#{k.to_s}=", v) end end yield(self) if block_given? end end # Represents a Person as defined by the Atom Syndication Format specification. # # A Person is used for all author and contributor attributes. # # See also http://www.atomenabled.org/developers/syndication/atom-format-spec.php#atomPersonConstruct # class Person include Xml::Parseable element :name, :uri, :email # Initialize a new person. # # +o+:: An XML::Reader object or a hash. Valid hash keys are +:name+, +:uri+ and +:email+. def initialize(o = {}) case o when XML::Reader o.read parse(o) when Hash o.each do |k, v| self.send("#{k.to_s}=", v) end end end def inspect " true) end def to_xml(nodeonly = true, name = 'content', namespace = nil, namespace_map = Atom::Xml::NamespaceMap.new) node = XML::Node.new("#{namespace_map.get(Atom::NAMESPACE)}:#{name}") node << self.to_s node end end # Html content within an Atom document. class Html < Base attribute :type, :'xml:lang' # Creates a new Content::Html. # # +o+:: An XML::Reader or a HTML string. # def initialize(o) case o when XML::Reader super(o.read_string.gsub(/\s+/, ' ').strip) parse(o, :once => true) when String super(o) @type = 'html' end end def to_xml(nodeonly = true, name = 'content', namespace = nil, namespace_map = Atom::Xml::NamespaceMap.new) # :nodoc: require 'iconv' # Convert from utf-8 to utf-8 as a way of making sure the content is UTF-8. # # This is a pretty crappy way to do it but if we don't check libxml just # fails silently and outputs the content element without any content. At # least checking here and raising an exception gives the caller a chance # to try and recitfy the situation. # begin node = XML::Node.new("#{namespace_map.get(Atom::NAMESPACE)}:#{name}") node << Iconv.iconv('utf-8', 'utf-8', self.to_s, namespace_map = nil) node['type'] = 'html' node['xml:lang'] = self.xml_lang node rescue Iconv::IllegalSequence => e raise SerializationError, "Content must be converted to UTF-8 before attempting to serialize to XML: #{e.message}." end end end # XHTML content within an Atom document. class Xhtml < Base XHTML = 'http://www.w3.org/1999/xhtml' attribute :type, :'xml:lang' def initialize(xml) super("") parse(xml, :once => true) starting_depth = xml.depth # Get the next element - should be a div according to the atom spec while xml.read == 1 && xml.node_type != XML::Reader::TYPE_ELEMENT; end if xml.local_name == 'div' && xml.namespace_uri == XHTML set_content(xml.read_inner_xml.strip.gsub(/\s+/, ' ')) else set_content(xml.read_outer_xml) end # get back to the end of the element we were created with while xml.read == 1 && xml.depth > starting_depth; end end def to_xml(nodeonly = true, name = 'content', namespace = nil, namespace_map = Atom::Xml::NamespaceMap.new) node = XML::Node.new("#{namespace_map.get(Atom::NAMESPACE)}:#{name}") node['type'] = 'xhtml' node['xml:lang'] = self.xml_lang div = XML::Node.new('div') div['xmlns'] = XHTML p = XML::Parser.string(to_s) content = p.parse.root.copy(true) div << content node << div node end end end # Represents a Source as defined by the Atom Syndication Format specification. # # See also http://www.atomenabled.org/developers/syndication/atom-format-spec.php#element.source class Source extend Forwardable def_delegators :@links, :alternate, :self, :alternates, :enclosures include Xml::Parseable element :id element :updated, :class => Time, :content_only => true element :title, :subtitle, :class => Content elements :authors, :contributors, :class => Person elements :links def initialize(o = nil) @authors, @contributors, @links = [], [], Links.new case o when XML::Reader unless current_node_is?(o, 'source', NAMESPACE) raise ArgumentError, "Invalid node for atom:source - #{o.name}(#{o.namespace})" end o.read parse(o) when Hash o.each do |k, v| self.send("#{k.to_s}=", v) end end yield(self) if block_given? end end # Represents a Feed as defined by the Atom Syndication Format specification. # # A feed is the top level element in an atom document. It is a container for feed level # metadata and for each entry in the feed. # # This supports pagination as defined in RFC 5005, see http://www.ietf.org/rfc/rfc5005.txt # # == Parsing # # A feed can be parsed using the Feed.load_feed method. This method accepts a String containing # a valid atom document, an IO object, or an URI to a valid atom document. For example: # # # Using a File # feed = Feed.load_feed(File.open("/path/to/myfeed.atom")) # # # Using a URL # feed = Feed.load_feed(URI.parse("http://example.org/afeed.atom")) # # == Encoding # # A feed can be converted to XML using, the to_xml method that returns a valid atom document in a String. # # == Attributes # # A feed has the following attributes: # # +id+:: A unique id for the feed. # +updated+:: The Time the feed was updated. # +title+:: The title of the feed. # +subtitle+:: The subtitle of the feed. # +authors+:: An array of Atom::Person objects that are authors of this feed. # +contributors+:: An array of Atom::Person objects that are contributors to this feed. # +generator+:: A Atom::Generator. # +rights+:: A string describing the rights associated with this feed. # +entries+:: An array of Atom::Entry objects. # +links+:: An array of Atom:Link objects. (This is actually an Atom::Links array which is an Array with some sugar). # # == References # See also http://www.atomenabled.org/developers/syndication/atom-format-spec.php#element.feed # class Feed include Xml::Parseable include SimpleExtensions extend Forwardable def_delegators :@links, :alternate, :self, :via, :first_page, :last_page, :next_page, :prev_page loadable! namespace Atom::NAMESPACE element :id, :rights element :generator, :class => Generator element :title, :subtitle, :class => Content element :updated, :class => Time, :content_only => true elements :links elements :authors, :contributors, :class => Person elements :entries # Initialize a Feed. # # This will also yield itself, so a feed can be constructed like this: # # feed = Feed.new do |feed| # feed.title = "My Cool feed" # end # # +o+:: An XML Reader or a Hash of attributes. # def initialize(o = {}) @links, @entries, @authors, @contributors = Links.new, [], [], [] case o when XML::Reader if next_node_is?(o, 'feed', Atom::NAMESPACE) o.read parse(o) else raise ArgumentError, "XML document was missing atom:feed: #{o.read_outer_xml}" end when Hash o.each do |k, v| self.send("#{k.to_s}=", v) end end yield(self) if block_given? end # Return true if this is the first feed in a paginated set. def first? links.self == links.first_page end # Returns true if this is the last feed in a paginated set. def last? links.self == links.last_page end # Reloads the feed by fetching the self uri. def reload! if links.self Feed.load_feed(URI.parse(links.self.href)) end end # Iterates over each entry in the feed. # # ==== Options # # +paginate+:: If true and the feed supports pagination this will fetch each page of the feed. # +since+:: If a Time object is provided each_entry will iterate over all entries that were updated since that time. # def each_entry(options = {}, &block) if options[:paginate] since_reached = false feed = self loop do feed.entries.each do |entry| if options[:since] && entry.updated && options[:since] > entry.updated since_reached = true break else block.call(entry) end end if since_reached || feed.next_page.nil? break else feed.next_page feed = feed.next_page.fetch end end else self.entries.each(&block) end end end # Represents an Entry as defined by the Atom Syndication Format specification. # # An Entry represents an individual entry within a Feed. # # == Parsing # # An Entry can be parsed using the Entry.load_entry method. This method accepts a String containing # a valid atom entry document, an IO object, or an URI to a valid atom entry document. For example: # # # Using a File # entry = Entry.load_entry(File.open("/path/to/myfeedentry.atom")) # # # Using a URL # Entry = Entry.load_entry(URI.parse("http://example.org/afeedentry.atom")) # # The document must contain a stand alone entry element as described in the Atom Syndication Format. # # == Encoding # # A Entry can be converted to XML using, the to_xml method that returns a valid atom entry document in a String. # # == Attributes # # An entry has the following attributes: # # +id+:: A unique id for the entry. # +updated+:: The Time the entry was updated. # +published+:: The Time the entry was published. # +title+:: The title of the entry. # +summary+:: A short textual summary of the item. # +authors+:: An array of Atom::Person objects that are authors of this entry. # +contributors+:: An array of Atom::Person objects that are contributors to this entry. # +rights+:: A string describing the rights associated with this entry. # +links+:: An array of Atom:Link objects. (This is actually an Atom::Links array which is an Array with some sugar). # +source+:: Metadata of a feed that was the source of this item, for feed aggregators, etc. # +categories+:: Array of Atom::Categories. # +content+:: The content of the entry. This will be one of Atom::Content::Text, Atom::Content:Html or Atom::Content::Xhtml. # # == References # See also http://www.atomenabled.org/developers/syndication/atom-format-spec.php#element.entry for more detailed # definitions of the attributes. # class Entry include Xml::Parseable include SimpleExtensions extend Forwardable def_delegators :@links, :alternate, :self, :alternates, :enclosures, :edit_link, :via loadable! namespace Atom::NAMESPACE element :title, :id, :summary element :updated, :published, :class => Time, :content_only => true element :content, :class => Content element :source, :class => Source elements :links elements :authors, :contributors, :class => Person elements :categories, :class => Category # Initialize an Entry. # # This will also yield itself, so an Entry can be constructed like this: # # entry = Entry.new do |entry| # entry.title = "My Cool entry" # end # # +o+:: An XML Reader or a Hash of attributes. # def initialize(o = {}) @links = Links.new @authors = [] @contributors = [] @categories = [] case o when XML::Reader if current_node_is?(o, 'entry', Atom::NAMESPACE) || next_node_is?(o, 'entry', Atom::NAMESPACE) o.read parse(o) else raise ArgumentError, "Entry created with node other than atom:entry: #{o.name}" end when Hash o.each do |k,v| send("#{k.to_s}=", v) end end yield(self) if block_given? end # Reload the Entry by fetching the self link. def reload! if links.self Entry.load_entry(URI.parse(links.self.href)) end end end # Links provides an Array of Link objects belonging to either a Feed or an Entry. # # Some additional methods to get specific types of links are provided. # # == References # # See also http://www.atomenabled.org/developers/syndication/atom-format-spec.php#element.link # for details on link selection and link attributes. # class Links < DelegateClass(Array) include Enumerable # Initialize an empty Links array. def initialize super([]) end # Get the alternate. # # Returns the first link with rel == 'alternate' that matches the given type. def alternate(type = nil) detect { |link| (link.rel.nil? || link.rel == Link::Rel::ALTERNATE) && (type.nil? || type == link.type) } end # Get all alternates. def alternates select { |link| link.rel.nil? || link.rel == Link::Rel::ALTERNATE } end # Gets the self link. def self detect { |link| link.rel == Link::Rel::SELF } end # Gets the via link. def via detect { |link| link.rel == Link::Rel::VIA } end # Gets all links with rel == 'enclosure' def enclosures select { |link| link.rel == Link::Rel::ENCLOSURE } end # Gets the link with rel == 'first'. # # This is defined as the first page in a pagination set. def first_page detect { |link| link.rel == Link::Rel::FIRST } end # Gets the link with rel == 'last'. # # This is defined as the last page in a pagination set. def last_page detect { |link| link.rel == Link::Rel::LAST } end # Gets the link with rel == 'next'. # # This is defined as the next page in a pagination set. def next_page detect { |link| link.rel == Link::Rel::NEXT } end # Gets the link with rel == 'prev'. # # This is defined as the previous page in a pagination set. def prev_page detect { |link| link.rel == Link::Rel::PREVIOUS } end # Gets the edit link. # # This is the link which can be used for posting updates to an item using the Atom Publishing Protocol. # def edit_link detect { |link| link.rel == 'edit' } end end # Represents a link in an Atom document. # # A link defines a reference from an Atom document to a web resource. # # == References # See http://www.atomenabled.org/developers/syndication/atom-format-spec.php#element.link for # a description of the different types of links. # class Link module Rel # :nodoc: ALTERNATE = 'alternate' SELF = 'self' VIA = 'via' ENCLOSURE = 'enclosure' FIRST = 'first' LAST = 'last' PREVIOUS = 'prev' NEXT = 'next' end include Xml::Parseable attribute :href, :rel, :type, :length # Create a link. # # +o+:: An XML::Reader containing a link element or a Hash of attributes. # def initialize(o) case o when XML::Reader if current_node_is?(o, 'link') parse(o, :once => true) else raise ArgumentError, "Link created with node other than atom:link: #{o.name}" end when Hash [:href, :rel, :type, :length].each do |attr| self.send("#{attr}=", o[attr]) end else raise ArgumentError, "Don't know how to handle #{o}" end end remove_method :length= def length=(v) @length = v.to_i end def to_s self.href end def ==(o) o.respond_to?(:href) && o.href == self.href end # This will fetch the URL referenced by the link. # # If the URL contains a valid feed, a Feed will be returned, otherwise, # the body of the response will be returned. # # TODO: Handle redirects. # def fetch content = Net::HTTP.get_response(URI.parse(self.href)).body begin Atom::Feed.load_feed(content) rescue ArgumentError, ParseError => ae content end end def inspect "" end end end