# xml-mapping -- bidirectional Ruby-XML mapper # Copyright (C) 2004,2005 Olaf Klischat require 'rexml/document' require "xml/xxpath" module XML class MappingError < RuntimeError end # This is the central interface module of the xml-mapping library. # # Including this module in your classes adds XML mapping # capabilities to them. # # == Example # # === Input document: # # :include: company.xml # # === mapping class declaration: # # :include: company.rb # # === usage: # # :include: company_usage.intout # # So you have to include XML::Mapping into your class to turn it # into a "mapping class", that is, to add XML mapping capabilities # to it. An instance of the mapping classes is then bidirectionally # mapped to an XML node (i.e. an element), where the state (simple # attributes, sub-objects, arrays, hashes etc.) of that instance is # mapped to sub-nodes of that node. In addition to the class and # instance methods defined in XML::Mapping, your mapping class will # get class methods like 'text_node', 'array_node' and so on; I call # them "node factory methods". More precisely, there is one node # factory method for each registered node type. Node types # are classes derived from XML::Mapping::Node; they're registered # with the xml-mapping library via XML::Mapping.add_node_class. The # node types TextNode, BooleanNode, NumericNode, ObjectNode, # ArrayNode, and HashNode are automatically registered by # xml/mapping.rb; you can easily write your own ones. The name of a # node factory method is inferred by 'underscoring' the name of the # corresponding node type; e.g. 'TextNode' becomes 'text_node'. Each # node factory method creates an instance of the corresponding node # type and adds it to the mapping class (not its instances). The # arguments to a node factory method are automatically turned into # arguments to the corresponding node type's initializer. So, in # order to learn more about the meaning of a node factory method's # parameters, you read the documentation of the corresponding node # type. All predefined node types expect as their first argument a # symbol that names an r/w attribute which will be added to the # mapping class. The mapping class is a normal Ruby class; you can # add constructors, methods and attributes to it, derive from it, # derive it from another class, include additional modules etc. # # Including XML::Mapping also adds all methods of # XML::Mapping::ClassMethods to your class (as class methods). # # As you may have noticed from the example, the node factory methods # generally use XPath expressions to specify locations in the mapped # XML document. To make this work, XML::Mapping relies on # XML::XXPath, which implements a subset of XPath, but also provides # write access, which is needed by the node types to support writing # data back to XML. Both XML::Mapping and XML::XXPath use REXML # (http://www.germane-software.com/software/rexml/) to represent XML # elements/documents in memory. module Mapping # can't really use class variables for these because they must be # shared by all class methods mixed into classes by including # Mapping. See # http://user.cs.tu-berlin.de/~klischat/mydocs/ruby/mixin_class_methods_global_state.txt.html # for a more detailed discussion. Classes_w_default_rootelt_names = {} #:nodoc: Classes_w_nondefault_rootelt_names = {} #:nodoc: def self.append_features(base) #:nodoc: super base.extend(ClassMethods) Classes_w_default_rootelt_names[base.default_root_element_name] = base end # Finds the mapping class corresponding to the given XML root # element name. This is the inverse operation to # .root_element_name (see # XML::Mapping::ClassMethods.root_element_name). def self.class_for_root_elt_name(name) # TODO: implement Hash read-only instead of this # interface Classes_w_nondefault_rootelt_names[name] || Classes_w_default_rootelt_names[name] end def initialize_xml_mapping #:nodoc: self.class.all_xml_mapping_nodes.each do |node| node.obj_initializing(self) end end # private :initialize_xml_mapping # Initializer. Calls obj_initializing(self) on all nodes. You # should call this using +super+ in your mapping classes to # inherit this behaviour. def initialize(*args) initialize_xml_mapping end # "fill" the contents of _xml_ into _self_. _xml_ is a # REXML::Element. # # First, pre_load(_xml_) is called, then all the nodes for this # object's class are processed (i.e. have their # #xml_to_obj method called) in the order of their definition # inside the class, then #post_load is called. def fill_from_xml(xml) pre_load(xml) self.class.all_xml_mapping_nodes.each do |node| node.xml_to_obj self, xml end post_load end # This method is called immediately before _self_ is filled from # an xml source. _xml_ is the source REXML::Element. # # The default implementation of this method is empty. def pre_load(xml) end # This method is called immediately after _self_ has been filled # from an xml source. If you have things to do after the object # has been succefully loaded from the xml (reorganising the loaded # data in some way, setting up additional views on the data etc.), # this is the place where you put them. You can also raise an # exception to abandon the whole loading process. # # The default implementation of this method is empty. def post_load end # Fill _self_'s state into the xml node (REXML::Element) # _xml_. All the nodes for this object's class are processed # (i.e. have their # #obj_to_xml method called) in the order of their definition # inside the class. def fill_into_xml(xml) self.class.all_xml_mapping_nodes.each do |node| node.obj_to_xml self,xml end end # Fill _self_'s state into a new xml node, return that # node. # # This method calls #pre_save, then #fill_into_xml, then # #post_save. def save_to_xml xml = pre_save fill_into_xml(xml) post_save(xml) xml end # This method is called when _self_ is to be converted to an XML # tree. It *must* create and return an XML element (as a # REXML::Element); that element will then be passed to # #fill_into_xml. # # The default implementation of this method creates a new empty # element whose name is the #root_element_name of _self_'s class # (see ClassMethods.root_element_name). By default, this is the # class name, with capital letters converted to lowercase and # preceded by a dash, e.g. "MySampleClass" becomes # "my-sample-class". def pre_save REXML::Element.new(self.class.root_element_name) end # This method is called immediately after _self_'s state has been # filled into an XML element. # # The default implementation does nothing. def post_save(xml) end # Save _self_'s state as XML into the file named _filename_. # The XML is obtained by calling #save_to_xml. def save_to_file(filename) xml = save_to_xml File.open(filename,"w") do |f| xml.write(f,2) end end # Abstract base class for all node types. As mentioned in the # documentation for XML::Mapping, node types must be registered # using add_node_class, and a corresponding "node factory method" # (e.g. "text_node") will then be added as a class method to your # mapping classes. The node factory method is called from the body # of the mapping classes as demonstrated in the examples. It # creates an instance of its corresponding node type (the list of # parameters to the node factory method, preceded by the owning # mapping class, will be passed to the constructor of the node # type) and adds it to its owning mapping class, so there is one # node object per node definition per mapping class. That node # object will handle all XML marshalling/unmarshalling for this # node, for all instances of the mapping class. For this purpose, # the marshalling and unmarshalling methods of a mapping class # instance (fill_into_xml and fill_from_xml, respectively) # will call obj_to_xml resp. xml_to_obj on all nodes of the # mapping class, in the order of their definition, passing the # REXML element the data is to be marshalled to/unmarshalled from # as well as the object the data is to be read from/filled into. # # Node types that map some XML data to a single attribute of their # mapping class (that should be most of them) shouldn't be # directly derived from this class, but rather from # SingleAttributeNode. class Node # Intializer, to be called from descendant classes. _owner_ is # the mapping class this node is being defined in. It'll be # stored in _@owner_. def initialize(owner) @owner = owner owner.xml_mapping_nodes << self end # This is called by the XML unmarshalling machinery when the # state of an instance of this node's @owner is to be read from # an XML node. _obj_ is the instance, _xml_ is the element (a # REXML::Element). The node must read "its" data from _xml_ # (using XML::XXPath or any other means) and store it to the # corresponding parts (attributes etc.) of _obj_'s state. def xml_to_obj(obj,xml) raise "abstract method called" end # This is called by the XML unmarshalling machinery when the # state of an instance of this node's @owner is to be stored # into an XML node. _obj_ is the instance, _xml_ is the element # (a REXML::Element). The node must extract "its" data from # _obj_ and store it to the corresponding parts (sub-elements, # attributes etc.) of _xml_ (using XML::XXPath or any other # means). def obj_to_xml(obj,xml) raise "abstract method called" end # Called when a new instance is being initialized. _obj_ is the # instance. You may set up initial values for the attributes # this node is responsible for here. Default implementation is # empty. def obj_initializing(obj) end end # Base class for node types that map some XML data to a single # attribute of their mapping class. This class also introduces a # general "options" hash parameter which may be used to influence # the creation of nodes in numerous ways, e.g. by providing # default attribute values when there is no source data in the # mapped XML. # # All node types that come with xml-mapping inherit from # SingleAttributeNode. class SingleAttributeNode < Node # Initializer. _owner_ is the owning mapping class (gets passed # to the superclass initializer and therefore put into # @owner). The second parameter (and hence the first parameter # to the node factory method), _attrname_, is a symbol that # names the mapping class attribute this node should map to. It # gets stored into @attrname, and the attribute (an r/w # attribute of name attrname) is added to the mapping class # (using attr_accessor). # # If the last argument is a hash, it is assumed to be the # abovementioned "options hash", and is stored into # @options. Two entries -- :optional and :default_value -- in # the options hash are already processed in SingleAttributeNode: # # Supplying :default_value=>_obj_ makes _obj_ the _default # value_ for this attribute. When unmarshalling (loading) an # object from an XML source, the attribute will be set to this # value if nothing was provided in the XML; when marshalling # (saving), the attribute won't be saved if it is set to the # default value. # # Providing just :optional=>true is equivalent to providing # :default_value=>nil. # # The remaining arguments are passed to initialize_impl, which # is the initializer subclasses should overwrite instead of # initialize. # # For example (TextNode is a subclass of SingleAttributeNote): # # class Address # include XML::Mapping # text_node :city, "city", :optional=>true, :default_value=>"Berlin" # end # # Here +Address+ is the _owner_, :city is the # _attrname_, # {:optional=>true,:default_value=>"Berlin"} is the # @options, and ["city"] is the argument list that'll be passed # to TextNode.initialize_impl. "city" is of course the XPath # expression locating the XML sub-element this text node refers # to; TextNode.initialize_impl stores it into @path. def initialize(owner,attrname,*args) super(owner) @attrname = attrname owner.add_accessor attrname if Hash===args[-1] @options = args[-1] args = args[0..-2] else @options={} end if @options[:optional] and not(@options.has_key?(:default_value)) @options[:default_value] = nil end initialize_impl(*args) end # Initializer to be implemented by subclasses. def initialize_impl(*args) raise "abstract method called" end # Exception that may be used by implementations of # #extract_attr_value to announce that the attribute value is # not set in the XML and, consequently, the default value should # be set in the object being created, or an Exception be raised # if no default value was specified. class NoAttrValueSet < XXPathError end def xml_to_obj(obj,xml) # :nodoc: begin obj.send :"#{@attrname}=", extract_attr_value(xml) rescue NoAttrValueSet => err unless @options.has_key? :default_value raise XML::MappingError, "no value, and no default value: #{err}" end begin obj.send :"#{@attrname}=", @options[:default_value].clone rescue obj.send :"#{@attrname}=", @options[:default_value] end end end # (to be overridden by subclasses) Extract and return the # attribute's value from _xml_. In the example above, TextNode's # implementation would return the current value of the # sub-element named by @path (i.e., "city"). If the # implementation decides that the attribute value is "unset" in # _xml_, it should raise NoAttrValueSet in order to initiate # proper handling of possibly supplied :optional and # :default_value options (you may use #default_when_xpath_err # for this purpose). def extract_attr_value(xml) raise "abstract method called" end def obj_to_xml(obj,xml) # :nodoc: value = obj.send(:"#{@attrname}") if @options.has_key? :default_value unless value == @options[:default_value] set_attr_value(xml, value) end else if value == nil raise XML::MappingError, "no value, and no default value, for attribute: #{@attrname}" end set_attr_value(xml, value) end end # (to be overridden by subclasses) Write _value_ into the # correct sub-nodes of _xml_. def set_attr_value(xml, value) raise "abstract method called" end def obj_initializing(obj) # :nodoc: if @options.has_key? :default_value begin obj.send :"#{@attrname}=", @options[:default_value].clone rescue obj.send :"#{@attrname}=", @options[:default_value] end end end # utility method to be used by implementations of # #extract_attr_value. Calls the supplied block, catching # XML::XXPathError and mapping it to NoAttrValueSet. This is for # the common case that an implementation considers an attribute # value not to be present in the XML if some specific sub-path # does not exist. def default_when_xpath_err # :yields: begin yield rescue XML::XXPathError => err raise NoAttrValueSet, "Attribute #{@attrname} not set (XXPathError: #{err})" end end end # Registers the new node class _c_ (must be a descendant of Node) # with the xml-mapping framework. # # A new "factory method" will automatically be added to # ClassMethods (and therefore to all classes that include # XML::Mapping from now on); so you can call it from the body of # your mapping class definition in order to create nodes of type # _c_. The name of the factory method is derived by "underscoring" # the (unqualified) name of _c_; # e.g. _c_==Foo::Bar::MyNiftyNode will result in the # creation of a factory method named +my_nifty_node+. The # generated factory method creates and returns a new instance of # _c_. The list of argument to _c_.new consists of _self_ # (i.e. the mapping class the factory method was called from) # followed by the arguments passed to the factory method. You # should always use the factory methods to create instances of # node classes; you should never need to call a node class's # constructor directly. # # For a demonstration, see the calls to +text_node+, +array_node+ # etc. in the examples along with the corresponding node classes # TextNode, ArrayNode etc. (these predefined node classes are in # no way "special"; they're added using add_node_class in # mapping.rb just like any custom node classes would be). def self.add_node_class(c) meth_name = c.name.split('::')[-1].gsub(/^(.)/){$1.downcase}.gsub(/(.)([A-Z])/){$1+"_"+$2.downcase} ClassMethods.module_eval <<-EOS def #{meth_name}(*args) #{c.name}.new(self,*args) end EOS end # The instance methods of this module are automatically added as # class methods to a class that includes XML::Mapping. module ClassMethods #ClassMethods = Module.new do # this is the alterbative -- but see above for peculiarities # Add getter and setter methods for a new attribute named _name_ # to this class. This is a convenience method intended to be # called from Node class initializers. def add_accessor(name) name = name.id2name if name.kind_of? Symbol unless self.instance_methods.include?(name) self.module_eval <<-EOS attr_reader :#{name} EOS end unless self.instance_methods.include?("#{name}=") self.module_eval <<-EOS attr_writer :#{name} EOS end end # Create a new instance of this class from the XML contained in # the file named _filename_. Calls load_from_xml internally. def load_from_file(filename) xml = REXML::Document.new(File.new(filename)) load_from_xml(xml.root) end # Create a new instance of this class from the XML contained in # _xml_ (a REXML::Element). # # Allocates a new object, then calls fill_from_xml(_xml_) on # it. def load_from_xml(xml) obj = self.allocate obj.initialize_xml_mapping obj.fill_from_xml(xml) obj end # array of all nodes types defined in this class, in the order # of their definition def xml_mapping_nodes @xml_mapping_nodes ||= [] end # enumeration of all nodes types in effect when # marshalling/unmarshalling this class, that is, node types # defined for this class as well as for its superclasses. The # node types are returned in the order of their definition, # starting with the topmost superclass that has node types # defined. def all_xml_mapping_nodes # TODO: we could return a dynamic Enumerable here, or cache # the array... result = [] if superclass and superclass.respond_to?(:all_xml_mapping_nodes) result += superclass.all_xml_mapping_nodes end result += xml_mapping_nodes end # The "root element name" of this class (combined getter/setter # method). # # The root element name is the name of the root element of the # XML tree returned by .#save_to_xml (or, more # specifically, .#pre_save). By default, this method # returns the #default_root_element_name; you may call this # method with an argument to set the root element name to # something other than the default. def root_element_name(name=nil) if name Classes_w_nondefault_rootelt_names.delete(root_element_name) Classes_w_default_rootelt_names.delete(root_element_name) Classes_w_default_rootelt_names.delete(name) @root_element_name = name Classes_w_nondefault_rootelt_names[name]=self end @root_element_name || default_root_element_name end # The default root element name for this class. Equals the class # name, with all parent module names stripped, and with capital # letters converted to lowercase and preceded by a dash; # e.g. "Foo::Bar::MySampleClass" becomes "my-sample-class". def default_root_element_name self.name.split('::')[-1].gsub(/^(.)/){$1.downcase}.gsub(/(.)([A-Z])/){$1+"-"+$2.downcase} end end # "polymorphic" load function. Turns the XML tree _xml_ into an # object, which is returned. The class of the object is # automatically determined from the root element name of _xml_ # using XML::Mapping::class_for_root_elt_name. def self.load_object_from_xml(xml) unless c = class_for_root_elt_name(xml.name) raise MappingError, "no mapping class for root element name #{xml.name}" end c.load_from_xml(xml) end # Like load_object_from_xml, but loads from the XML file named by # _filename_. def self.load_object_from_file(filename) xml = REXML::Document.new(File.new(filename)) load_object_from_xml(xml.root) end end end