require "parsedate.rb"
module Eeml
#a parser for xml eeml v005, implemented with LibXML
class LibXMLEemlParserV005 # :nodoc:
include LibXML
#main method
#take an xml string, and create an Environment from it
def make_environment_from_xml(xml_str)
doc = parse_xml(xml_str)
return extract_environment(doc)
end
protected
def parse_xml(xml_str)
errors = []
#http://libxml.rubyforge.org/rdoc/classes/LibXML/XML/Error.html
#TODO: is the error handler set up per thread? (XML::Error.set_handler)
XML::Error.set_handler { |error| errors << error }
#TODO: performance - is this expensive?
#TODO: are these configurations per-thread? If they're global (e.g. class variables) then we shouldn't be setting them here.
XML.default_line_numbers=true
parser = XML::Parser.string(xml_str)
begin
doc = parser.parse
rescue XML::Error => e
#note: errors var available here, too.
raise BadXML, "Malformed xml: #{e.class}: #{e}", e.backtrace
end
#validation?
# seems we have to recreate our XML::Schema object on each invocation
# else libxml segfaults very quickly
#doc.validate_schema(XML::Schema.from_string(IO.read(LOCAL_EEML_SCHEMA_LOCATION)))
return doc
end
def extract_environment(doc)
env = Environment.new
doc.root.namespaces.default_prefix = 'x'
env_node = find_first_node_or_fail(doc, 'x:environment', 'environment')
env.identifier = env_node['id']
env.updated = Time.mktime(*ParseDate.parsedate(env_node['updated'])) if !env_node['updated'].nil?
env.creator = env_node['creator']
env.title = optional_content(env_node, 'x:title', 'title')
env.feed_url = optional_content(env_node, 'x:feed', 'feed')
env.description = optional_content(env_node, 'x:description', 'description')
env.website = optional_content(env_node, 'x:website', 'website')
env.status = optional_content(env_node, 'x:status', 'status')
env.email = optional_content(env_node, 'x:email', 'email')
env.icon = optional_content(env_node, 'x:icon', 'icon')
#find_first_node_or_fail(env_node, 'x:location', 'location')
loc_node = env_node.find_first('x:location')
env.location = extractLocation(loc_node) if loc_node
datastream_nodes = env_node.find('x:data')
# raise NoDataStreams.new, "no datastreams found" if datastream_nodes.empty?
env.datastreams = extractDataStreams(datastream_nodes) unless datastream_nodes.empty?
return env
end
def extractLocation(node)
#
# 50.1
# 48.7
# 1.34
#
raise "given nil node" if node.nil?
loc = Location.new
loc.domain = node['domain']
loc.disposition = node['disposition']
loc.exposure = node['exposure']
loc.name = optional_content(node, 'x:name', 'name')
loc.latitude = optional_content(node, 'x:lat', 'lat')
loc.longitude = optional_content(node, 'x:lon', 'lon')
loc.elevation = optional_content(node, 'x:ele', 'ele')
return loc
end
#return an array (TODO: or a hash?) of DataStream objects from the given list of data nodes
def extractDataStreams(nodes)
#......
dataStreams = []
nodes.each do |node|
dataStreams << extractDataStream(node)
end
return dataStreams
end
#builds and returns a detailed exception of the given class, for problems concerning the given node (or its missing children)
#details include node's name and line number (zero if not available)
def exception_for_node(node, exception_class, message)
ex = exception_class.new(message)
ex.line_num = node.line_num
ex.node_name = node_name_or_root(node)
return ex
end
def extractDataStream(node)
#
#some_tag
#another_tag
#0
#Celsius
#
data = DataStream.new
raise MissingAttribute.new('id', node.name) if node['id'].nil?
data.identifier = node['id']
node.find('x:tag').each do |tag_node|
data.tags << tag_node.content
end
value_nodes = node.find('x:value')
raise exception_for_node(node, DataMissingValue, "Data node is missing value node.") if value_nodes.empty?
raise exception_for_node(node, DataHasMultipleValues, "Data node has multiple 'value' nodes.") if value_nodes.size > 1
value_node = value_nodes.first
data.min_value = value_node['minValue']
data.max_value = value_node['maxValue']
data.value = value_node.content
unit_nodes = node.find('x:unit')
raise exception_for_node(node, DataHasMultipleUnits, "Data node has multiple 'unit' nodes.") if unit_nodes.size > 1
unit_node = unit_nodes.first
unless unit_node.nil?
data.unit_symbol = unit_node['symbol']
data.unit_type = unit_node['type']
data.unit_value = unit_node.content
end
return data
end
#Helpers ------------------------------------------------------------------
#Consider mixing these in to the libxml parser for more readable code
#raises MissingNode if the node isn't there
def mandatory_content(base_node, xpath, description)
node = base_node.find_first(xpath)
raise(MissingNode.new(node_name_or_root(base_node), description, xpath)) if node.nil?
return node.content
end
#returns the node's content, or the given default if the node isn't there (default itself defaults to nil)
#description isn't used, but keeps our signature same as mandatory_content(), up to that point.
def optional_content(base_node, xpath, description, default = nil)
node = base_node.find_first(xpath)
return node.nil? ? default : node.content
end
#get the name of the given node if it is a node, or 'root' if it is a doc.
#for use only for error messages
def node_name_or_root(node)
node.respond_to?(:name) ? node.name : 'root'
end
def find_first_node_or_fail(base_node, xpath, description)
node = base_node.find_first(xpath)
raise(MissingNode.new(node_name_or_root(base_node), description, xpath)) if node.nil?
return node
end
end
end