#!/usr/bin/ruby -w ############################################################## # GLOBAL CONSTANTS DEFAULT_DATABASE_PATH = "/project/marcotte/marcotte/ms/database" DEFAULT_MZXML_PATH = "." DEFAULT_OUTDIR = "pepxml" DEFAULT_PARAMS_GLOB = "*.params" DEFAULT_PARAMS_FILE = Dir[DEFAULT_PARAMS_GLOB].first DEFAULT_PEPXML_VERSION = 18 DEFAULT_MS_MODEL = 'LCQ' DEFAULT_MASS_ANALYZER = 'Ion Trap' ############################################################## require 'spec_id' require 'optparse' require 'ostruct' require 'fileutils' # establish the default database path after examining env vars def_dbpath = nil db_env_var = ENV["BIOWORKS_DBPATH"] if db_env_var def_dbpath = db_env_var else def_dbpath = DEFAULT_DATABASE_PATH end opt = OpenStruct.new opt_obj = OptionParser.new do |op| op.banner = "\nusage: #{File.basename(__FILE__)} [options] .srf ... usage: #{File.basename(__FILE__)} [options] bioworks.xml" op.on_head " Takes .srf files or the xml exported output of Bioworks multi-consensus view (no filtering) and outputs pepXML files (to feed the trans-proteomic pipeline). Options:" op.on('-h', '--help', "display this and more notes and exit") {|v| opt.help = v } op.on('-o', '--outdir path', "output directory d: '#{DEFAULT_OUTDIR}'") {|v| opt.outdir = v } op.separator "" op.separator "bioworks.xml files may require additional options:" op.separator "" op.on('-p', '--params file', "sequest params file d: '#{DEFAULT_PARAMS_FILE}'") {|v| opt.params = v } op.on('-d', '--dbpath path', "path to databases d: '#{DEFAULT_DATABASE_PATH}'") {|v| opt.dbpath = v } op.on('-m', '--mspath path', "path to MS files d: '#{DEFAULT_MZXML_PATH}'") {|v| opt.mspath = v } op.on('--model ', "MS model d: '#{DEFAULT_MS_MODEL}'") {|v| opt.model = v } op.on('--mass_analyzer ', "Mass Analyzer d: '#{DEFAULT_MASS_ANALYZER}'") {|v| opt.mass_analyzer = v } op.on('-v', '--version pepxml_version', "pepxml version d: '#{DEFAULT_PEPXML_VERSION}'") {|v| opt.pepxml_version = v.to_i } end more_notes = " Notes: mspath: Directory to RAW or mzXML (version 1) files. This option is not used with Bioworks 3.3 files. outdir: Path will be created if it does not already exist. model : LCQ -> 'LCQ Deca XP Plus' : Orbi -> 'LTQ Orbitrap' : other string -> That's the string that will be used. options with spaces should be quoted: e.g., \"Time of Flight\" Database Path: If the database path in the sequest.params file is valid, that will be used. Otherwise, will try (in order): 1. --dbpath or -d option 1. environmental variable BIOWORKS_DBPATH (currently: '#{db_env_var}') 2. constant at top of this script (currently: '#{DEFAULT_DATABASE_PATH}') " opt_obj.parse! # intercept before argv count if opt.help puts opt_obj puts more_notes exit end if ARGV.size < 1 puts opt_obj exit end opt.outdir ||= DEFAULT_OUTDIR ## Create dbpath if does not exist if opt.outdir FileUtils.mkpath(opt.outdir) unless File.exist? opt.outdir end files = ARGV.to_a if files[0] =~ /\.srf/i opt.dbpath ||= def_dbpath files.each do |file| hash = { :backup_db_path => opt.dbpath || def_dbpath, :out_path => opt.outdir, } xml_obj = SpecID::Sequest::PepXML.new_from_srf(file, hash) xml_obj.to_pepxml(xml_obj.base_name + ".xml") end else ## Ensure params file exists (unless opt given) opt.params ||= DEFAULT_PARAMS_FILE params_obj = SpecID::Sequest::Params.new(opt.params) # Ensure the database exists! unless File.exist?( params_obj.database ) if opt.dbpath params_obj.database_path = opt.dbpath else params_obj.database_path = def_dbpath end end opt.mspath ||= DEFAULT_MZXML_PATH opt.pepxml_version ||= DEFAULT_PEPXML_VERSION opt.model ||= DEFAULT_MS_MODEL opt.mass_analyzer ||= DEFAULT_MASS_ANALYZER case opt.model when "LCQ" model = 'LCQ Deca XP Plus' when "Orbi" model = 'LTQ Orbitrap' else model = opt.model end bioworks = files[0] xml_objs = SpecID::Sequest::PepXML.set_from_bioworks(params_obj, bioworks, opt.mspath, opt.outdir, opt.pepxml_version, 'trypsin', 'ThermoFinnigan', model) xml_objs.each do |obj| obj.to_pepxml(obj.base_name + ".xml") end end