require 'sort_by_attributes' require 'validator' require 'spec_id' require 'merge_deep' require 'spec_id/precision/filter/interactive' require 'spec_id/precision/filter/output' class Filter # filters using previously passed in methods and options def filter(group) if @opts send(@method, group, *@opts) else send(@method, group) end end # replaces the contents of group with what passed def filter!(group) group.replace(filter(group)) end end # we have to require this after we setup our defaults hash # require 'filter/spec_id/cmdline' class SpecID::Precision::Filter FV_DEFAULTS = { :sequest => { :xcorr1 => 1.0, :xcorr2 => 1.5, :xcorr3 => 2.0, :deltacn => 0.1, :ppm => 1000, :include_deltacnstar => true, }, # output :proteins => false, :output => [], # general :top_hit_by => :xcorr, :postfilter => :top_per_scan, :prefilter => false, :hits_together => true, # These are also defaulted in the commandline because they are necessary # for the validators... could this introduce conflicts somehow? :decoy_on_match => true, :ties => true, # UNLISTED FOR NOW: :include_ties_in_top_hit_prefilter => true, :include_ties_in_top_hit_postfilter => false, } require 'spec_id/precision/filter/cmdline' def filter_and_validate_cmdline(args) (spec_id_obj, options, option_parser) = CmdlineParser.new.parse(args) if spec_id_obj == nil puts option_parser return end final_answer = SpecID::Precision::Filter.new.filter_and_validate(spec_id_obj, options) end # # output_array has doublets of [format, handle] # # answer is the answer one gets out of filter_and_validate # def output(answer, output_array) # output_array.each do |format, handle| # SpecID::Precision::Filter::Output.new(format, handle) # end # end # Very high level method that takes simple parameters. # spec_id may be a filename or a SpecID object (containing peps) # Default values may be queried from SpecID::Precision::Filter::FV_DEFAULTS # Returns a structured hash: # Fl = Float ; Ar = Array # { :params => , # :pephits => , # :pephits_precision => [] # # if :proteins => true # :prothits => , # :prothits_precision => [ Array of hashes where each hash = # { :worst => Fl, :normal => Fl, # :normal_stdev => Fl } ] # } # # NOTE: Brackets [] indicate an Array! The Bar '|' indicates another option. # The asterik '*' is the default option. # # :sequest => { # :xcorr1 -> >= (xcorr +1 charge state) # :xcorr2 -> >= (xcorr +2 charge state) # :xcorr3 -> >= (xcorr +3 charge state) # :deltacn -> >= (delta cn) # :ppm -> <= parts per million (Float) # :include_deltacnstar => *true | false include deltacn (given at 1.1) of # top hit with no 2nd hit # # } # OUTPUT: # :proteins => true | *false gives proteins (and validation) # :output => [[format, FILENAME=nil],...] formats to output filtering results. # can be used multiple times # FILENAME is the filename to use # if nil, then outputs to $stdout # valid formats are: # :text_table (default) # :yaml (need to implement) # :protein_summary (need to implement) # :html_table (need to implement) # default value => # [[:text_table,nil]] # # VALIDATION: # :validators => [Array] objects that respond to pephit_precision # usually of base class Validator # NOTE: if you have decoy peptides, you MUST have # a Validator::Decoy object to separate them out. # NOTE: if transmem validator passed in, the # proteins in spec_id must already be granted # transmem status! # # # OTHER: # :top_hit_by -> *:xcorr | :probability # probabilities only in bioworks.xml files right now (if # they were calculated). # :postfilter -> *:top_per_scan | :top_per_aaseq | :top_per_aaseq_charge # :top_per_scan hashes by filename + scan # :top_per_aaseq hashes by top_per_scan + aaseq # :top_per_aaseq_charge hashes by top_per_aaseq + charge # :prefilter -> true | *false Takes top hit per file+scan+charge # :interactive => interactive_object # # should behave like this: # # interactive_object.filter_args(currentopts) -> args_for_filtering | nil (done) # # # interactive_object.passing(final_answer) # The defaults for filter_and_validate def filter_and_validate(spec_id_obj, options={}) # NOTE: # This is a fairly complicated method. The complication comes in doing # top hit filters on separate/cat searches wanted them to be either # together or separate. I opt for fewer conversions between the two, but # that means keeping track of more things... opts = FV_DEFAULTS.merge_deep(options) spec_id = spec_id_obj peps = spec_id.peps filename = spec_id.filename ####################################### # DEFAULTS: interactive_changing_keys = [:xcorr1, :xcorr2, :xcorr3, :deltacn, :ppm, :include_deltacnstar, :postfilter] interactive_shortcut_map = { :xcorr1 => 'x1', :xcorr2 => 'x2', :xcorr3 => 'x3', :deltacn => 'dcn', :ppm => 'ppm', :include_deltacnstar => 'dcns', :postfilter => 'pf', } to_float = proc {|x| x.to_f} to_bool = proc do |x| case x when /^t/io true when /^f/io false when true true when false false else nil end end to_postfilter = proc do |x| case x when 's' :top_per_scan when 'a' :top_per_aaseq when 'ac' :top_per_aaseq_charge when Symbol x end end casting_map = { :xcorr1 => to_float, :xcorr2 => to_float, :xcorr3 => to_float, :deltacn => to_float, :ppm => to_float, :include_deltacnstar => to_bool, :postfilter => to_postfilter, } # output: # NOTE: BOOLEANS that are by default false do not need a default!! # They will yield false on key lookup if no key or false! # BOOLEANS that by default are true should be queried like this # !(opts[: