require "parsedate.rb" module Eeml #a parser for xml eeml v051, implemented with LibXML class LibXMLEemlParserV051 # :nodoc: include LibXML include Exceptions @@eeml_version = Constants::EEML['0.5.1'] #main method #take an xml string, and create an Environment from it. #If an optional environment is given, that will be populated (overwritten) instead of a new environment. def make_environment_from_xml(xml_str, given_environment = nil) doc = parse_xml(xml_str) raise MissingNamespace if doc.root.namespaces.namespace.blank? env = given_environment || Environment.new #TODO: what has to be reset in a given environment before passing it for re-population? return extract_environment_from_doc(doc, env) end #take an xml string containing zero or more environment nodes, and create an array of Environment objects from it. def make_environments_from_xml(xml_str) doc = parse_xml(xml_str) raise MissingNamespace if doc.root.namespaces.namespace.blank? return extract_environments_from_doc(doc) end protected def parse_xml(xml_str) errors = [] #http://libxml.rubyforge.org/rdoc/classes/LibXML/XML/Error.html #TODO: is the error handler set up per thread? (XML::Error.set_handler) XML::Error.set_handler { |error| errors << error } #TODO: performance - is this expensive? #TODO: are these configurations per-thread? If they're global (e.g. class variables) then we shouldn't be setting them here. XML.default_line_numbers=true parser = XML::Parser.string(strip_content(xml_str)) begin doc = parser.parse rescue XML::Error => e #note: errors var available here, too. raise BadXML, "Malformed xml: #{e.class}: #{e}", e.backtrace end #validation? # seems we have to recreate our XML::Schema object on each invocation # else libxml segfaults very quickly return doc end #multiple (zero or more) def extract_environments_from_doc(doc) env_nodes = doc.find('x:environment', "x:#{@@eeml_version[:href]}") return env_nodes.map{|env_node| new_env = Environment.new; extract_environment_from_node(env_node, new_env)} end #single, mandatory def extract_environment_from_doc(doc, env_to_populate) env_node = find_first_node_or_fail(doc, 'x:environment', 'environment', "x:#{@@eeml_version[:href]}") return extract_environment_from_node(env_node, env_to_populate) end #single, from node (everyone uses this to get the work done) def extract_environment_from_node(env_node, env_to_populate) env = env_to_populate env.identifier = env_node['id'] env.updated = Time.parse(env_node['updated']) if !env_node['updated'].nil? env.creator = env_node['creator'] env.title = optional_content(env_node, 'x:title', 'title', "x:#{@@eeml_version[:href]}") env.feed_url = optional_content(env_node, 'x:feed', 'feed', "x:#{@@eeml_version[:href]}") env.description = optional_content(env_node, 'x:description', 'description', "x:#{@@eeml_version[:href]}") env.website = optional_content(env_node, 'x:website', 'website', "x:#{@@eeml_version[:href]}") env.status = optional_content(env_node, 'x:status', 'status', "x:#{@@eeml_version[:href]}") env.email = optional_content(env_node, 'x:email', 'email', "x:#{@@eeml_version[:href]}") env.icon = optional_content(env_node, 'x:icon', 'icon', "x:#{@@eeml_version[:href]}") env.private = (optional_content(env_node, 'x:private', 'private', "x:#{@@eeml_version[:href]}") == "true") #find_first_node_or_fail(env_node, 'x:location', 'location') loc_node = env_node.find_first('x:location', "x:#{@@eeml_version[:href]}") env.location = extractLocation(loc_node) if loc_node env_node.find('x:tag', "x:#{@@eeml_version[:href]}").each do |tag_node| env.tags << tag_node.content.strip end env.has_tag_element = true datastream_nodes = env_node.find('x:data', "x:#{@@eeml_version[:href]}") # raise NoDataStreams.new, "no datastreams found" if datastream_nodes.empty? env.add_datastreams(extractDataStreams(datastream_nodes)) unless datastream_nodes.empty? return env end def extractLocation(node) # # 50.1 # 48.7 # 1.34 # raise "given nil node" if node.nil? loc = Location.new loc.domain = node['domain'] loc.disposition = node['disposition'] loc.exposure = node['exposure'] loc.name = optional_content(node, 'x:name', 'name', "x:#{@@eeml_version[:href]}") loc.latitude = optional_content(node, 'x:lat', 'lat', "x:#{@@eeml_version[:href]}") loc.longitude = optional_content(node, 'x:lon', 'lon', "x:#{@@eeml_version[:href]}") loc.elevation = optional_content(node, 'x:ele', 'ele', "x:#{@@eeml_version[:href]}") return loc end #return an array (TODO: or a hash?) of DataStream objects from the given list of data nodes def extractDataStreams(nodes) #...... dataStreams = [] nodes.each do |node| dataStreams << extractDataStream(node) end return dataStreams end #builds and returns a detailed exception of the given class, for problems concerning the given node (or its missing children) #details include node's name and line number (zero if not available) def exception_for_node(node, exception_class, message) ex = exception_class.new(message) ex.line_num = node.line_num ex.node_name = node_name_or_root(node) return ex end def extractDataStream(node) # #some_tag #another_tag #0 #0.0 #1022.0 #Celsius # data = DataStream.new raise MissingAttribute.new(node.name, 'id') if node['id'].nil? data.identifier = node['id'] node.find('x:tag', "x:#{@@eeml_version[:href]}").each do |tag_node| data.tags << tag_node.content.strip end data.has_tag_element = true value_nodes = node.find('x:current_value', "x:#{@@eeml_version[:href]}") # raise exception_for_node(node, DataMissingValue, "Data node is missing current_value node.") if value_nodes.empty? raise exception_for_node(node, DataHasMultipleValues, "Data node has multiple 'value' nodes.") if value_nodes.size > 1 value_node = value_nodes.first max_value_node = node.find_first('x:max_value', "x:#{@@eeml_version[:href]}") min_value_node = node.find_first('x:min_value', "x:#{@@eeml_version[:href]}") value = Value.new value.min_value = min_value_node.content if min_value_node value.max_value = max_value_node.content if max_value_node if value_node value.value = value_node.content.strip value.recorded_at = value_node['at'] unless value_node['at'].blank? end data.add_value(value) unit_nodes = node.find('x:unit', "x:#{@@eeml_version[:href]}") raise exception_for_node(node, DataHasMultipleUnits, "Data node has multiple 'unit' nodes.") if unit_nodes.size > 1 unit_node = unit_nodes.first unless unit_node.nil? data.unit_symbol = unit_node['symbol'] data.unit_type = unit_node['type'] data.unit_value = unit_node.content.strip end return data end #Helpers ------------------------------------------------------------------ #Consider mixing these in to the libxml parser for more readable code #raises MissingNode if the node isn't there def mandatory_content(base_node, xpath, description, nslist = nil) node = base_node.find_first(xpath, nslist) raise(MissingNode.new(node_name_or_root(base_node), description, xpath)) if node.nil? return node.content end #returns the node's content, or the given default if the node isn't there (default itself defaults to nil) #description isn't used, but keeps our signature same as mandatory_content(), up to that point. def optional_content(base_node, xpath, description, nslist = nil, default = nil) node = base_node.find_first(xpath, nslist) return node.nil? ? default : node.content end #get the name of the given node if it is a node, or 'root' if it is a doc. #for use only for error messages def node_name_or_root(node) node.respond_to?(:name) ? node.name : 'root' end def find_first_node_or_fail(base_node, xpath, description, nslist = nil) node = base_node.find_first(xpath, nslist) raise(MissingNode.new(node_name_or_root(base_node), description, xpath)) if node.nil? return node end end end