require 'nokogiri'
require 'pathname'
require 'rubyXL/objects/reference'
module RubyXL
module OOXMLObjectClassMethods
# Get the value of a [sub]class variable if it exists, or create the respective variable
# with the passed-in +default+ (or +{}+, if not specified)
#
# Throughout this class, we are setting class variables through explicit method calls
# rather than by directly addressing the name of the variable because of context issues:
# addressing variable by name creates it in the context of defining class, while calling
# the setter/getter method addresses it in the context of descendant class,
# which is what we need.
def obtain_class_variable(var_name, default = {})
self.class_variable_get(var_name)
rescue NameError
self.class_variable_set(var_name, default)
end
# Defines an attribute of OOXML object.
# === Parameters
# * +attribute_name+ - Name of the element attribute as seen in the source XML. Can be either "String" or :Symbol
# * Special attibute name '_' (underscore) denotes the value of the element rather than attribute.
# * +attribute_type+ - Specifies the conversion type for the attribute when parsing. Available options are:
# * +:int+ - Integer
# * +:uint+ - Unsigned Integer
# * +:double+ - Float
# * +:string+ - String (no conversion)
# * +:sqref+ - RubyXL::Sqref
# * +:ref+ - RubyXL::Reference
# * +:bool+ - Boolean ("1" and "true" convert to +true+, others to +false+)
# * one of +simple_types+ - String, plus the list of acceptable values is saved for future validation (not used yet).
# * +extra_parameters+ - Hash of optional parameters as follows:
# * +:accessor+ - Name of the accessor for this attribute to be defined on the object. If not provided, defaults to classidied +attribute_name+.
# * +:default+ - Value this attribute defaults to if not explicitly provided.
# * +:required+ - Whether this attribute is required when writing XML. If the value of the attrinute is not explicitly provided, +:default+ is written instead.
# * +:computed+ - Do not store this attribute on +parse+, but do call the object-provided read accessor on +write_xml+.
# ==== Examples
# define_attribute(:outline, :bool, :default => true)
# A Boolean attribute 'outline' with default value +true+ will be accessible by calling +obj.outline+
# define_attribute(:uniqueCount, :int)
# An Integer attribute 'uniqueCount' accessible as +obj.unique_count+
# define_attribute(:_, :string, :accessor => :expression)
# The value of the element will be accessible as a String by calling +obj.expression+
# define_attribute(:errorStyle, %w{ stop warning information }, :default => 'stop',)
# A String attribute named 'errorStyle' will be accessible as +obj.error_style+, valid values are "stop", "warning", "information"
def define_attribute(attr_name, attr_type, extra_params = {})
attrs = obtain_class_variable(:@@ooxml_attributes)
attr_hash = extra_params.merge({ :attr_type => attr_type })
attr_hash[:accessor] ||= accessorize(attr_name)
attrs[attr_name.to_s] = attr_hash
self.send(:attr_accessor, attr_hash[:accessor]) unless attr_hash[:computed]
end
# Defines a child node of OOXML object.
# === Parameters
# * +klass+ - Class (descendant of RubyXL::OOXMLObject) of the child nodes. Child node objects will be produced by calling +parse+ method of that class.
# * +extra_parameters+ - Hash of optional parameters as follows:
# * +:accessor+ - Name of the accessor for this attribute to be defined on the object. If not provided, defaults to classidied +attribute_name+.
# * +:node_name+ - Node name for the child node, in case it does not match the one defined by the +klass+.
# * +:collection+ - Whether the child node should be treated as a single node or a collection of nodes:
# * +false+ (default) - child node is directly accessible through the respective accessor;
# * +true+ - a collection of child nodes is accessed as +Array+ through the respective accessor;
# * +:with_count+ - same as +true+, but in addition, the attribute +count+ is defined on the current object, that will be automatically set to the number of elements in the collection at the start of +write_xml+ call.
# ==== Examples
# define_child_node(RubyXL::Alignment)
# Define a singular child node parsed by the RubyXL::BorderEdge.parse() and accessed by the default obj.alignment accessor
# define_child_node(RubyXL::Hyperlink, :collection => true, :accessor => :hyperlinks)
# Define an array of nodes accessed by obj.hyperlinks accessor, each of which will be parsed by the RubyXL::Hyperlink.parse()
# define_child_node(RubyXL::BorderEdge, :node_name => :left)
# define_child_node(RubyXL::BorderEdge, :node_name => :right)
# Use class RubyXL::BorderEdge when parsing both the elements and elements.
# define_child_node(RubyXL::Font, :collection => :with_count, :accessor => :fonts)
# Upon writing of the object this was defined on, its count attribute will be set to the count of nodes in fonts array
def define_child_node(klass, extra_params = {})
child_nodes = obtain_class_variable(:@@ooxml_child_nodes)
child_node_name = (extra_params[:node_name] || klass.class_variable_get(:@@ooxml_tag_name)).to_s
accessor = (extra_params[:accessor] || accessorize(child_node_name)).to_sym
child_nodes[child_node_name] = {
:class => klass,
:is_array => extra_params[:collection],
:accessor => accessor
}
define_count_attribute if extra_params[:collection] == :with_count
self.send(:attr_accessor, accessor)
end
def define_count_attribute
define_attribute(:count, :uint, :required => true)
end
private :define_count_attribute
# Defines the name of the element that represents the current OOXML object. Should only be used once per object.
# In case of different objects represented by the same class in different parts of OOXML tree, +:node_name+
# extra parameter can be used to override the default element name.
# === Parameters
# * +element_name+
# ==== Examples
# define_element_name 'externalReference'
def define_element_name(element_name)
self.class_variable_set(:@@ooxml_tag_name, element_name)
end
def parse(node, known_namespaces = nil)
case node
when String, IO, Zip::InputStream then node = Nokogiri::XML.parse(node)
end
if node.is_a?(Nokogiri::XML::Document) then
@namespaces = node.namespaces
node = node.root
# ignorable_attr = node.attributes['Ignorable']
# @ignorables << ignorable_attr.value if ignorable_attr
end
obj = self.new
known_attributes = obtain_class_variable(:@@ooxml_attributes)
content_params = known_attributes['_']
process_attribute(obj, node.text, content_params) if content_params
node.attributes.each_pair { |attr_name, attr|
attr_name = if attr.namespace then "#{attr.namespace.prefix}:#{attr.name}"
else attr.name
end
attr_params = known_attributes[attr_name]
next if attr_params.nil?
# raise "Unknown attribute [#{attr_name}] for element [#{node.name}]" if attr_params.nil?
process_attribute(obj, attr.value, attr_params) unless attr_params[:computed]
}
known_child_nodes = obtain_class_variable(:@@ooxml_child_nodes)
unless known_child_nodes.empty?
known_namespaces ||= obtain_class_variable(:@@ooxml_namespaces)
node.element_children.each { |child_node|
ns = child_node.namespace
prefix = known_namespaces[ns.href] || ns.prefix
child_node_name = case prefix
when '', nil then child_node.name
else "#{prefix}:#{child_node.name}"
end
child_node_params = known_child_nodes[child_node_name]
raise "Unknown child node [#{child_node_name}] for element [#{node.name}]" if child_node_params.nil?
parsed_object = child_node_params[:class].parse(child_node, known_namespaces)
if child_node_params[:is_array] then
index = parsed_object.index_in_collection
collection = if (self < RubyXL::OOXMLContainerObject) then obj
else obj.send(child_node_params[:accessor])
end
if index.nil? then
collection << parsed_object
else
collection[index] = parsed_object
end
else
obj.send("#{child_node_params[:accessor]}=", parsed_object)
end
}
end
obj
end
private
def accessorize(str)
acc = str.to_s.dup
acc.gsub!(/([A-Z\d]+)([A-Z][a-z])/,'\1_\2')
acc.gsub!(/([a-z\d])([A-Z])/,'\1_\2')
acc.gsub!(':','_')
acc.downcase.to_sym
end
def process_attribute(obj, raw_value, params)
val = raw_value &&
case params[:attr_type]
when :double then Float(raw_value) # http://www.datypic.com/sc/xsd/t-xsd_double.html
when :string then raw_value
when Array then raw_value # Case of Simple Types
when :sqref then RubyXL::Sqref.new(raw_value)
when :ref then RubyXL::Reference.new(raw_value)
when :bool then ['1', 'true'].include?(raw_value)
when :int then Integer(raw_value)
when :uint then
v = Integer(raw_value)
raise ArgumentError.new("invalid value for unsigned Integer(): \"#{raw_value}\"") if v < 0
v
end
obj.send("#{params[:accessor]}=", val)
end
end
module OOXMLObjectInstanceMethods
def self.included(klass)
klass.extend RubyXL::OOXMLObjectClassMethods
end
def obtain_class_variable(var_name, default = {})
self.class.obtain_class_variable(var_name, default)
end
private :obtain_class_variable
def initialize(params = {})
obtain_class_variable(:@@ooxml_attributes).each_value { |v|
instance_variable_set("@#{v[:accessor]}", params[v[:accessor]]) unless v[:computed]
}
init_child_nodes(params)
end
def init_child_nodes(params)
obtain_class_variable(:@@ooxml_child_nodes).each_value { |v|
initial_value =
if params.has_key?(v[:accessor]) then params[v[:accessor]]
elsif v[:is_array] then []
else nil
end
instance_variable_set("@#{v[:accessor]}", initial_value)
}
end
private :init_child_nodes
def ==(other)
other.is_a?(self.class) &&
obtain_class_variable(:@@ooxml_attributes).all? { |k, v| self.send(v[:accessor]) == other.send(v[:accessor]) } &&
obtain_class_variable(:@@ooxml_child_nodes).all? { |k, v| self.send(v[:accessor]) == other.send(v[:accessor]) }
end
# Recursively write the OOXML object and all its children out as Nokogiri::XML. Immediately before the actual
# generation, +before_write_xml()+ is called to perform last-minute cleanup and validation operations; if it
# returns +false+, an empty string is returned (rather than +nil+, so Nokogiri::XML's << operator
# can be used without additional +nil+ checking)
# === Parameters
# * +xml+ - Base Nokogiri::XML object used for building. If omitted, a blank document will be generated.
# * +node_name_override+ - if present, is used instead of the default element name for this object provided by +define_element_name+
# ==== Examples
# obj.write_xml()
# Creates a new empty +Nokogiri::XML+, populates it with the OOXML structure as described in the respective definition, and returns the resulting +Nokogiri::XML+ object.
# obj.write_xml(seed_xml)
# Using the passed-in +Nokogiri+ +xml+ object, creates a new element corresponding to +obj+ according to its definition, along with all its properties and children, and returns the newly created element.
# obj.write_xml(seed_xml, 'overriden_element_name')
# Same as above, but uses the passed-in +node_name_override+ as the new element name, instead of its default name set by +define_element_name+.
def write_xml(xml = nil, node_name_override = nil)
if xml.nil? then
seed_xml = Nokogiri::XML('')
seed_xml.encoding = 'UTF-8'
result = self.write_xml(seed_xml)
return result if result == ''
seed_xml << result
return seed_xml.to_xml({ :indent => 0, :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML })
end
return '' unless before_write_xml
# Populate namespaces, if any
attrs = {}
obtain_class_variable(:@@ooxml_namespaces).each_pair { |k, v| attrs[v.empty? ? 'xmlns' : "xmlns:#{v}"] = k }
obtain_class_variable(:@@ooxml_attributes).each_pair { |k, v|
val = self.send(v[:accessor])
if val.nil? then
next unless v[:required]
val = v[:default]
end
val = val &&
case v[:attr_type]
when :bool then val ? '1' : '0'
when :double then val.to_s.gsub(/\.0*\Z/, '') # Trim trailing zeroes
else val
end
attrs[k] = val
}
element_text = attrs.delete('_')
elem = xml.create_element(node_name_override || obtain_class_variable(:@@ooxml_tag_name), attrs, element_text)
child_nodes = obtain_class_variable(:@@ooxml_child_nodes)
child_nodes.each_pair { |child_node_name, child_node_params|
node_obj = get_node_object(child_node_params)
next if node_obj.nil?
if node_obj.respond_to?(:write_xml) && !node_obj.equal?(self) then
# If child node is either +OOXMLObject+, or +OOXMLContainerObject+ on its first (envelope) pass,
# serialize that object.
elem << node_obj.write_xml(xml, child_node_name)
else
# If child node is either vanilla +Array+, or +OOXMLContainerObject+ on its seconds (content) pass,
# serialize write its members.
node_obj.each { |item| elem << item.write_xml(xml, child_node_name) unless item.nil? }
end
}
elem
end
# Prototype method. For sparse collections (+Rows+, +Cells+, etc.) must return index at which this object
# is expected to reside in the collection. If +nil+ is returned, then object is simply added
# to the end of the collection.
def index_in_collection
nil
end
def get_node_object(child_node_params)
self.send(child_node_params[:accessor])
end
private :get_node_object
# Subclass provided filter to perform last-minute operations (cleanup, count, etc.) immediately prior to write,
# along with option to terminate the actual write if +false+ is returned (for example, to avoid writing
# the collection's root node if the collection is empty).
def before_write_xml
#TODO# This will go away once containers are fully implemented.
child_nodes = obtain_class_variable(:@@ooxml_child_nodes)
child_nodes.each_pair { |child_node_name, child_node_params|
self.count = self.send(child_node_params[:accessor]).size if child_node_params[:is_array] == :with_count
}
true
end
end
# Parent class for defining OOXML based objects (not unlike Rails' +ActiveRecord+!)
# Most importantly, provides functionality of parsing such objects from XML,
# and marshalling them to XML.
class OOXMLObject
include OOXMLObjectInstanceMethods
end
# Parent class for OOXML conainer objects (for example,
# <fonts><font>...</font><font>...</font></fonts>
# that obscures the top-level container, allowing direct access to the contents as +Array+.
class OOXMLContainerObject < Array
include OOXMLObjectInstanceMethods
def initialize(params = {})
array_content = params.delete(:_)
super
array_content.each_with_index { |v, i| self[i] = v } if array_content
end
def get_node_object(child_node_params)
if child_node_params[:is_array] then self
else super
end
end
protected :get_node_object
def init_child_nodes(params)
obtain_class_variable(:@@ooxml_child_nodes).each_value { |v|
next if v[:is_array] # Only one collection node allowed per OOXMLContainerObject, and it is contained in itself.
instance_variable_set("@#{v[:accessor]}", params[v[:accessor]])
}
end
protected :init_child_nodes
def before_write_xml
true
end
def inspect
vars = [ super ]
vars = self.instance_variables.each { |v| vars << "#{v}=#{instance_variable_get(v).inspect}" }
"<#{self.class}: #{super} #{vars.join(", ")}>"
end
class << self
def define_count_attribute
# Count will be inherited from Array. so no need to define it explicitly.
define_attribute(:count, :uint, :required => true, :computed => true)
end
protected :define_count_attribute
end
end
# Extension class providing functionality for top-level OOXML objects that are represented by
# their own .xml files in .xslx zip container.
class OOXMLTopLevelObject < OOXMLObject
SAVE_ORDER = 500
ROOT = ::Pathname.new('/')
attr_accessor :root
# Prototype method. For top-level OOXML object, returns the path at which the current object's XML file
# is located within the .xlsx zip container.
def xlsx_path
raise 'Subclass responsebility'
end
# Sets the list of namespaces on this object to be added when writing out XML. Valid only on top-level objects.
# === Parameters
# * +namespace_hash+ - Hash of namespaces in the form of "url" => "prefix"
# ==== Examples
# set_namespaces('http://schemas.openxmlformats.org/spreadsheetml/2006/main' => '',
# 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' => 'r')
def self.set_namespaces(namespace_hash)
self.class_variable_set(:@@ooxml_namespaces, namespace_hash)
end
# Generates the top-level OOXML object by parsing its XML file from the temporary
# directory containing the unzipped contents of .xslx
# === Parameters
# * +dirpath+ - path to the directory with the unzipped .xslx contents.
def self.parse_file(zip_file, file_path)
entry = zip_file.find_entry(RubyXL::from_root(file_path))
# Accomodate for Nokogiri Java implementation which is incapable of reading from a stream
entry && (entry.get_input_stream { |f| parse(defined?(JRUBY_VERSION) ? f.read : f) })
end
# Saves the contents of the object as XML to respective location in .xslx zip container.
# === Parameters
# * +zipfile+ - ::Zip::File to which the resulting XNMML should be added.
def add_to_zip(zip_stream)
xml_string = write_xml
return if xml_string.empty?
zip_stream.put_next_entry(RubyXL::from_root(self.xlsx_path))
zip_stream.write(xml_string)
end
def file_index
root.rels_hash[self.class].index{ |f| f.equal?(self) }.to_i + 1
end
end
end