require "parsedate.rb"
module Eeml
#a parser for xml eeml v051, implemented with LibXML
class LibXMLEemlParserV051 # :nodoc:
include LibXML
include Exceptions
@@eeml_version = Constants::EEML['0.5.1']
#main method
#take an xml string, and create an Environment from it.
#If an optional environment is given, that will be populated (overwritten) instead of a new environment.
def make_environment_from_xml(xml_str, given_environment = nil)
doc = parse_xml(xml_str)
raise MissingNamespace if doc.root.namespaces.namespace.blank?
env = given_environment || Environment.new
#TODO: what has to be reset in a given environment before passing it for re-population?
return extract_environment_from_doc(doc, env)
end
#take an xml string containing zero or more environment nodes, and create an array of Environment objects from it.
def make_environments_from_xml(xml_str)
doc = parse_xml(xml_str)
raise MissingNamespace if doc.root.namespaces.namespace.blank?
return extract_environments_from_doc(doc)
end
protected
def parse_xml(xml_str)
errors = []
#http://libxml.rubyforge.org/rdoc/classes/LibXML/XML/Error.html
#TODO: is the error handler set up per thread? (XML::Error.set_handler)
XML::Error.set_handler { |error| errors << error }
#TODO: performance - is this expensive?
#TODO: are these configurations per-thread? If they're global (e.g. class variables) then we shouldn't be setting them here.
XML.default_line_numbers=true
parser = XML::Parser.string(strip_content(xml_str))
begin
doc = parser.parse
rescue XML::Error => e
#note: errors var available here, too.
raise BadXML, "Malformed xml: #{e.class}: #{e}", e.backtrace
end
#validation?
# seems we have to recreate our XML::Schema object on each invocation
# else libxml segfaults very quickly
#doc.validate_schema(XML::Schema.from_string(IO.read(LOCAL_EEML5_SCHEMA_LOCATION)))
return doc
end
#multiple (zero or more)
def extract_environments_from_doc(doc)
env_nodes = doc.find('x:environment', "x:#{@@eeml_version[:href]}")
return env_nodes.map{|env_node| new_env = Environment.new; extract_environment_from_node(env_node, new_env)}
end
#single, mandatory
def extract_environment_from_doc(doc, env_to_populate)
env_node = find_first_node_or_fail(doc, 'x:environment', 'environment', "x:#{@@eeml_version[:href]}")
return extract_environment_from_node(env_node, env_to_populate)
end
#single, from node (everyone uses this to get the work done)
def extract_environment_from_node(env_node, env_to_populate)
env = env_to_populate
env.identifier = env_node['id']
env.updated = Time.parse(env_node['updated']) if !env_node['updated'].nil?
env.creator = env_node['creator']
env.title = optional_content(env_node, 'x:title', 'title', "x:#{@@eeml_version[:href]}")
env.feed_url = optional_content(env_node, 'x:feed', 'feed', "x:#{@@eeml_version[:href]}")
env.description = optional_content(env_node, 'x:description', 'description', "x:#{@@eeml_version[:href]}")
env.website = optional_content(env_node, 'x:website', 'website', "x:#{@@eeml_version[:href]}")
env.status = optional_content(env_node, 'x:status', 'status', "x:#{@@eeml_version[:href]}")
env.email = optional_content(env_node, 'x:email', 'email', "x:#{@@eeml_version[:href]}")
env.icon = optional_content(env_node, 'x:icon', 'icon', "x:#{@@eeml_version[:href]}")
env.private = (optional_content(env_node, 'x:private', 'private', "x:#{@@eeml_version[:href]}") == "true")
#find_first_node_or_fail(env_node, 'x:location', 'location')
loc_node = env_node.find_first('x:location', "x:#{@@eeml_version[:href]}")
env.location = extractLocation(loc_node) if loc_node
env_node.find('x:tag', "x:#{@@eeml_version[:href]}").each do |tag_node|
env.tags << tag_node.content.strip
end
datastream_nodes = env_node.find('x:data', "x:#{@@eeml_version[:href]}")
# raise NoDataStreams.new, "no datastreams found" if datastream_nodes.empty?
env.datastreams = extractDataStreams(datastream_nodes) unless datastream_nodes.empty?
return env
end
def extractLocation(node)
#
# 50.1
# 48.7
# 1.34
#
raise "given nil node" if node.nil?
loc = Location.new
loc.domain = node['domain']
loc.disposition = node['disposition']
loc.exposure = node['exposure']
loc.name = optional_content(node, 'x:name', 'name', "x:#{@@eeml_version[:href]}")
loc.latitude = optional_content(node, 'x:lat', 'lat', "x:#{@@eeml_version[:href]}")
loc.longitude = optional_content(node, 'x:lon', 'lon', "x:#{@@eeml_version[:href]}")
loc.elevation = optional_content(node, 'x:ele', 'ele', "x:#{@@eeml_version[:href]}")
return loc
end
#return an array (TODO: or a hash?) of DataStream objects from the given list of data nodes
def extractDataStreams(nodes)
#......
dataStreams = []
nodes.each do |node|
dataStreams << extractDataStream(node)
end
return dataStreams
end
#builds and returns a detailed exception of the given class, for problems concerning the given node (or its missing children)
#details include node's name and line number (zero if not available)
def exception_for_node(node, exception_class, message)
ex = exception_class.new(message)
ex.line_num = node.line_num
ex.node_name = node_name_or_root(node)
return ex
end
def extractDataStream(node)
#
#some_tag
#another_tag
#0
#0.0
#1022.0
#Celsius
#
data = DataStream.new
raise MissingAttribute.new(node.name, 'id') if node['id'].nil?
data.identifier = node['id']
node.find('x:tag', "x:#{@@eeml_version[:href]}").each do |tag_node|
data.tags << tag_node.content.strip
end
value_nodes = node.find('x:current_value', "x:#{@@eeml_version[:href]}")
raise exception_for_node(node, DataMissingValue, "Data node is missing current_value node.") if value_nodes.empty?
raise exception_for_node(node, DataHasMultipleValues, "Data node has multiple 'value' nodes.") if value_nodes.size > 1
value_node = value_nodes.first
max_value_node = node.find_first('x:max_value', "x:#{@@eeml_version[:href]}")
min_value_node = node.find_first('x:min_value', "x:#{@@eeml_version[:href]}")
value = Value.new
value.min_value = min_value_node.content if min_value_node
value.max_value = max_value_node.content if max_value_node
value.value = value_node.content.strip
value.recorded_at = value_node['at'] unless value_node['at'].blank?
data.add_value(value)
unit_nodes = node.find('x:unit', "x:#{@@eeml_version[:href]}")
raise exception_for_node(node, DataHasMultipleUnits, "Data node has multiple 'unit' nodes.") if unit_nodes.size > 1
unit_node = unit_nodes.first
unless unit_node.nil?
data.unit_symbol = unit_node['symbol']
data.unit_type = unit_node['type']
data.unit_value = unit_node.content.strip
end
return data
end
#Helpers ------------------------------------------------------------------
#Consider mixing these in to the libxml parser for more readable code
#raises MissingNode if the node isn't there
def mandatory_content(base_node, xpath, description, nslist = nil)
node = base_node.find_first(xpath, nslist)
raise(MissingNode.new(node_name_or_root(base_node), description, xpath)) if node.nil?
return node.content
end
#returns the node's content, or the given default if the node isn't there (default itself defaults to nil)
#description isn't used, but keeps our signature same as mandatory_content(), up to that point.
def optional_content(base_node, xpath, description, nslist = nil, default = nil)
node = base_node.find_first(xpath, nslist)
return node.nil? ? default : node.content
end
#get the name of the given node if it is a node, or 'root' if it is a doc.
#for use only for error messages
def node_name_or_root(node)
node.respond_to?(:name) ? node.name : 'root'
end
def find_first_node_or_fail(base_node, xpath, description, nslist = nil)
node = base_node.find_first(xpath, nslist)
raise(MissingNode.new(node_name_or_root(base_node), description, xpath)) if node.nil?
return node
end
end
end