require "parsedate.rb" module Eeml #a parser for xml eeml v005, implemented with LibXML class LibXMLEemlParserV005 # :nodoc: include LibXML #main method #take an xml string, and create an Environment from it def make_environment_from_xml(xml_str) doc = parse_xml(xml_str) return extract_environment(doc) end protected def parse_xml(xml_str) errors = [] #http://libxml.rubyforge.org/rdoc/classes/LibXML/XML/Error.html #TODO: is the error handler set up per thread? (XML::Error.set_handler) XML::Error.set_handler { |error| errors << error } #TODO: performance - is this expensive? #TODO: are these configurations per-thread? If they're global (e.g. class variables) then we shouldn't be setting them here. XML.default_line_numbers=true parser = XML::Parser.string(xml_str) begin doc = parser.parse rescue XML::Error => e #note: errors var available here, too. raise BadXML, "Malformed xml: #{e.class}: #{e}", e.backtrace end #validation? # seems we have to recreate our XML::Schema object on each invocation # else libxml segfaults very quickly #doc.validate_schema(XML::Schema.from_string(IO.read(LOCAL_EEML_SCHEMA_LOCATION))) return doc end def extract_environment(doc) env = Environment.new doc.root.namespaces.default_prefix = 'x' env_node = find_first_node_or_fail(doc, 'x:environment', 'environment') env.identifier = env_node['id'] env.updated = Time.mktime(*ParseDate.parsedate(env_node['updated'])) if !env_node['updated'].nil? env.creator = env_node['creator'] env.title = optional_content(env_node, 'x:title', 'title') env.feed_url = optional_content(env_node, 'x:feed', 'feed') env.description = optional_content(env_node, 'x:description', 'description') env.website = optional_content(env_node, 'x:website', 'website') env.status = optional_content(env_node, 'x:status', 'status') env.email = optional_content(env_node, 'x:email', 'email') env.icon = optional_content(env_node, 'x:icon', 'icon') #find_first_node_or_fail(env_node, 'x:location', 'location') loc_node = env_node.find_first('x:location') env.location = extractLocation(loc_node) if loc_node datastream_nodes = env_node.find('x:data') # raise NoDataStreams.new, "no datastreams found" if datastream_nodes.empty? env.datastreams = extractDataStreams(datastream_nodes) unless datastream_nodes.empty? return env end def extractLocation(node) # # 50.1 # 48.7 # 1.34 # raise "given nil node" if node.nil? loc = Location.new loc.domain = node['domain'] loc.disposition = node['disposition'] loc.exposure = node['exposure'] loc.name = optional_content(node, 'x:name', 'name') loc.latitude = optional_content(node, 'x:lat', 'lat') loc.longitude = optional_content(node, 'x:lon', 'lon') loc.elevation = optional_content(node, 'x:ele', 'ele') return loc end #return an array (TODO: or a hash?) of DataStream objects from the given list of data nodes def extractDataStreams(nodes) #...... dataStreams = [] nodes.each do |node| dataStreams << extractDataStream(node) end return dataStreams end #builds and returns a detailed exception of the given class, for problems concerning the given node (or its missing children) #details include node's name and line number (zero if not available) def exception_for_node(node, exception_class, message) ex = exception_class.new(message) ex.line_num = node.line_num ex.node_name = node_name_or_root(node) return ex end def extractDataStream(node) # #some_tag #another_tag #0 #Celsius # data = DataStream.new raise MissingAttribute.new('id', node.name) if node['id'].nil? data.identifier = node['id'] node.find('x:tag').each do |tag_node| data.tags << tag_node.content end value_nodes = node.find('x:value') raise exception_for_node(node, DataMissingValue, "Data node is missing value node.") if value_nodes.empty? raise exception_for_node(node, DataHasMultipleValues, "Data node has multiple 'value' nodes.") if value_nodes.size > 1 value_node = value_nodes.first data.min_value = value_node['minValue'] data.max_value = value_node['maxValue'] data.value = value_node.content unit_nodes = node.find('x:unit') raise exception_for_node(node, DataHasMultipleUnits, "Data node has multiple 'unit' nodes.") if unit_nodes.size > 1 unit_node = unit_nodes.first unless unit_node.nil? data.unit_symbol = unit_node['symbol'] data.unit_type = unit_node['type'] data.unit_value = unit_node.content end return data end #Helpers ------------------------------------------------------------------ #Consider mixing these in to the libxml parser for more readable code #raises MissingNode if the node isn't there def mandatory_content(base_node, xpath, description) node = base_node.find_first(xpath) raise(MissingNode.new(node_name_or_root(base_node), description, xpath)) if node.nil? return node.content end #returns the node's content, or the given default if the node isn't there (default itself defaults to nil) #description isn't used, but keeps our signature same as mandatory_content(), up to that point. def optional_content(base_node, xpath, description, default = nil) node = base_node.find_first(xpath) return node.nil? ? default : node.content end #get the name of the given node if it is a node, or 'root' if it is a doc. #for use only for error messages def node_name_or_root(node) node.respond_to?(:name) ? node.name : 'root' end def find_first_node_or_fail(base_node, xpath, description) node = base_node.find_first(xpath) raise(MissingNode.new(node_name_or_root(base_node), description, xpath)) if node.nil? return node end end end