require 'xml_style_parser'
require 'ms/spectrum'
require 'ms/scan'
class MS::Parser::MzXML::DOM
include XMLStyleParser
include MS::Parser::MzXML
#@@scan_atts = %w(num msLevel retentionTime startMz endMz precursors spectrum)
def initialize(parse_type=:msrun, version='1.0')
@method = parse_type
@version = version
end
def new_scan_from_hash(node)
scan = MS::Scan.new # array class creates one with 9 positions
scan[0] = node['num'].to_i
scan[1] = node['msLevel'].to_i
scan[2] = node['retentionTime'][2...-1].to_f
if x = node['startMz']
scan[3] = x.to_f
scan[4] = node['endMz'].to_f
end
scan
end
# takes a scan node and creates a scan object
# the parent scan is the one directly above it in mslevel
# if the
def create_scan(scan_n, scans_by_num, get_spectra=true)
if @version < '3.0'
scan = new_scan_from_hash(scan_n)
precs = []
scan_n.each do |node|
case node.name
when 'precursorMz'
# should be able to do this!!!
#scan[5] = scan_n.find('child::precursorMz').map do |prec_n|
prec = MS::Precursor.new
prec[1] = node['precursorIntensity'].to_f
prec[0] = node.content.to_f
if x = node['precursorScanNum']
prec[2] = scans_by_num[x.to_i]
end
precs << prec
when 'peaks'
next unless get_spectra
# SHOULD be able to do this!!
#peaks_n = scan_n.find_first('child::peaks')
scan[6] = MS::Spectrum.from_base64_peaks(node.content, node['precision'].to_i)
end
end
scan[5] = precs
scan
else # for version > 3.0
abort 'not supporting version 3.0 just yet'
# note that mzXML version 3.0 *can* have more than one peak...
# I'm not sure how to deal with that since I have one spectrum/scan
end
end
# returns an array of msrun objects
def msruns(file)
raise NotImplementedError
end
# returns a string with double tags into single and missing
# tags after peaks added in
# we do this in windows style since these are generated off a windows
# machine only
def fix_bad_scan_tags(file)
IO.read(file).gsub(/<\/scan>\s+<\/scan>/m, '').gsub(/<\/peaks>\s+\r\n \r\n MSRun # use this object instead of creating one
# :spectra => *true|false # if false don't get spectra
def msrun(file, opts={})
unless opts.key?(:spectra)
opts[:spectra] = true
end
msrun_obj =
if x = opts[:msrun]
msrun_obj = x
else
MS::MSRun.new
end
root =
if @version == '2.0'
string = fix_bad_scan_tags(file)
get_root_node_from_string(string)
else
get_root_node_from_file(file)
end
# right now we are only finding the first msRun (probably a rare case of
# multiple runs in an mzXML file...)
msrun_n =
if @version >= '2.0'
kids = root.children.select {|v| v.name == 'msRun' }
raise(NotImplementedError, "one msrun per doc right now" ) if kids.size > 1
kids.first
else
root
end
if msrun_n.name != 'msRun'
raise RuntimeError, "extra node slipped in somehow"
end
## HEADER
scan_count = msrun_n['scanCount'].to_i
msrun_obj.scan_count = scan_count
scans_by_num = Array.new(scan_count + 1)
## SPECTRUM
parent = nil
scans = Array.new( scan_count )
scn_index = 0
# we should be able to do this, but it's not working!!!
#scan_n = msrun_n.find_first('scan')
#while (scn_index < scan_count)
get_spectra = opts[:spectra]
msrun_n.each do |scan_n|
next unless scan_n.name == 'scan'
scan = create_scan(scan_n, scans_by_num, get_spectra)
scans[scn_index] = scan
#sc = scan_n.next
scans_by_num[scan[0]] = scan
scn_index += 1
end
## update the scan's parents
MS::MSRun.add_parent_scan(scans)
# note that startTime and endTime are optional AND in >2.2 are dateTime
# instead of duration types!, so we will just use scan times...
# Also, note that startTime and endTime are BROKEN on readw -> mzXML 2.0
# export. They give the start and end time in seconds, but they are
# really minutes. All the more reason to use the first and last scans!
msrun_obj.start_time = scans.first.time
msrun_obj.end_time = scans.last.time
msrun_obj.scans = scans
end
end