require "atom/element"
module XHTML
NS = "http://www.w3.org/1999/xhtml"
end
module Atom
# An Atom::Element representing a text construct.
# It has a single attribute, "type", which specifies how to interpret
# the element's content. Different types are:
#
# text:: a plain string, without any markup (default)
# html:: a chunk of HTML
# xhtml:: a chunk of *well-formed* XHTML
#
# You should set this attribute appropriately after you set a Text
# element (entry.content, entry.title or entry.summary).
#
# This content of this element can be retrieved in different formats, see #html and #xml
class Text < Atom::Element
attrb :type
def initialize value, name # :nodoc:
@content = value
@content ||= "" # in case of nil
self["type"] = "text"
super name
end
# convenient, but not overly useful. see #html instead.
def to_s
if self["type"] == "xhtml"
@content.children.to_s
else
@content.to_s
end
end
# returns a string suitable for dumping into an HTML document.
# (or nil if that's impossible)
#
# if you're storing the content of a Text construct, you probably
# want this representation.
def html
if self["type"] == "xhtml" or self["type"] == "html"
to_s
elsif self["type"] == "text"
REXML::Text.new(to_s).to_s
end
end
# attempts to parse the content of this element as XML and return it
# as an array of REXML::Elements.
#
# If self["type"] is "html" and Hpricot is installed, it will
# be converted to XHTML first.
def xml
if self["type"] == "xhtml"
@content.children
elsif self["type"] == "text"
[self.to_s]
elsif self["type"] == "html"
begin
require "hpricot"
rescue
raise "Turning HTML content into XML requires Hpricot."
end
fixed = Hpricot(self.to_s, :xhtml_strict => true)
REXML::Document.new("
#{fixed}
").root.children
else
# XXX check that @type is an XML mimetype and parse it
raise "I haven't implemented this yet"
end
end
def inspect # :nodoc:
"'#{to_s}'##{self['type']}"
end
def []= key, value # :nodoc:
if key == "type"
unless valid_type? value
raise "atomTextConstruct type '#{value}' is meaningless"
end
if value == "xhtml"
begin
parse_xhtml_content
rescue REXML::ParseException
raise "#{@content.inspect} can't be parsed as XML"
end
end
end
super(key, value)
end
def to_element # :nodoc:
e = super
if self["type"] == "text"
e.attributes.delete "type"
end
# this should be done via inheritance
unless self.class == Atom::Content and self["src"]
c = convert_contents e
if c.is_a? String
e.text = c
elsif c.is_a? REXML::Element
e << c.dup
else
raise RuntimeError, "atom:#{local_name} can't contain type #{@content.class}"
end
end
e
end
private
# converts @content based on the value of self["type"]
def convert_contents e
if self["type"] == "xhtml"
@content
elsif self["type"] == "text" or self["type"].nil?
REXML::Text.normalize(@content.to_s)
elsif self["type"] == "html"
@content.to_s.gsub(/&/, "&")
end
end
def valid_type? type
["text", "xhtml", "html"].member? type
end
def parse_xhtml_content xhtml = nil
xhtml ||= @content
@content = if xhtml.is_a? REXML::Element
if xhtml.name == "div" and xhtml.namespace == XHTML::NS
xhtml.dup
else
elem = REXML::Element.new("div")
elem.add_namespace(XHTML::NS)
elem << xhtml.dup
elem
end
elsif xhtml.is_a? REXML::Document
parse_xhtml_content xhtml.root
else
div = REXML::Document.new("#{@content}
")
div.root.add_namespace(XHTML::NS)
div.root
end
end
end
# Atom::Content behaves the same as an Atom::Text, but for two things:
#
# * the "type" attribute can be an arbitrary media type
# * there is a "src" attribute which is an IRI that points to the content of the entry (in which case the content element will be empty)
class Content < Atom::Text
attrb :src
def html
if self["src"]
""
else
super
end
end
private
def valid_type? type
super or type.match(/\//)
end
def convert_contents e
s = super
s ||= if @content.is_a? REXML::Document
@content.root
elsif @content.is_a? REXML::Element
@content
else
REXML::Text.normalize(@content.to_s)
end
s
end
end
end