lib/mspire/ident/pepxml.rb in mspire-0.8.6.2 vs lib/mspire/ident/pepxml.rb in mspire-0.8.7

- old
+ new

@@ -22,26 +22,41 @@ DEFAULT_PEPXML_VERSION = MsmsPipelineAnalysis::PEPXML_VERSION XML_ENCODING = 'UTF-8' attr_accessor :msms_pipeline_analysis - # returns an array of Mspire::Ident::Pepxml::SearchHit::Simple structs - def self.simple_search_hits(file) + # returns an array of Mspire::Ident::Pepxml::SearchHit::Simple structs will + # only process last result if duplicate search scores are included. Score + # keys returned as symbols and values cast as Floats while analysis results + # are all returned as strings. + def self.simple_search_hits(file, &block) hit_values = File.open(file) do |io| doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS | Nokogiri::XML::ParseOptions::STRICT) # we can work with namespaces, or just remove them ... doc.remove_namespaces! root = doc.root search_hits = root.xpath('//search_hit') search_hits.each_with_index.map do |search_hit,i| aaseq = search_hit['peptide'] charge = search_hit.parent.parent['assumed_charge'].to_i - search_score_nodes = search_hit.children.select {|node| node.name == 'search_score' } + nodes_by_name = search_hit.children.group_by(&:name) search_scores = {} - search_score_nodes.each do |node| + nodes_by_name['search_score'].each do |node| search_scores[node['name'].to_sym] = node['value'].to_f end - Mspire::Ident::Pepxml::SearchHit::Simple.new("hit_#{i}", Mspire::Ident::Search.new(file.chomp(File.extname(file))), aaseq, charge, search_scores) + analysis_results = {} + nodes_by_name['analysis_result'].each do |node| + analysis_results[node['analysis']] = node.children.map do |atnode| + atnode.attribute_nodes.each_with_object({}) do |attribute, hash| + hash[attribute.name] = attribute.value + end + end + end + hit = Mspire::Ident::Pepxml::SearchHit::Simple.new("hit_#{i}", Mspire::Ident::Search.new(file.chomp(File.extname(file))), aaseq, charge, search_scores, analysis_results) + if block + block.call(search_hit_n.parent.parent) + end + hit end end end def pepxml_version