lib/pi_zero.rb in mspire-0.4.5 vs lib/pi_zero.rb in mspire-0.4.7

- old
+ new

@@ -1,7 +1,6 @@ require 'rsruby' -require 'gsl' require 'vec' require 'vec/r' require 'enumerator' @@ -19,19 +18,50 @@ # From Storey et al. PNAS 2003: lambdas = [] # lambda pi_zeros = [] # pi_0 total = sorted_pvals.size # m - # totally retarded implementation with correct logic: + # totally inefficient implementation (with correct logic): + # TODO: implement this efficiently start.step(stop, step) do |lam| lambdas << lam (greater, less) = sorted_pvals.partition {|pval| pval > lam } pi_zeros.push( greater.size.to_f / ( total * (1.0 - lam) ) ) end [lambdas, pi_zeros] end +=begin + def plateau_height_with_gsl(x, y) + require 'gsl' + x_deltas = (0...(x.size-1)).to_a.map do |i| + x[i+1] - x[i] + end + y_deltas = (0...(y.size-1)).to_a.map do |i| + y[i+1] - y[i] + end + new_xs = x.dup + new_ys = y.dup + x_deltas.reverse.each do |delt| + new_xs.push( new_xs.last + delt ) + end + + y_cnt = y.size + y_deltas.reverse.each do |delt| + y_cnt -= 1 + new_ys.push( y[y_cnt] - delt ) + end + + x_vec = GSL::Vector.alloc(new_xs) + y_vec = GSL::Vector.alloc(new_ys) + coef, cov, chisq, status = GSL::Poly.fit(x_vec,y_vec, 3) + coef.eval(x.last) + #x2 = GSL::Vector::linspace(0,2.4,20) + #graph([x_vec,y_vec], [x2, coef.eval(x2)], "-C -g 3 -S 4") + end +=end + # expecting x and y to make a scatter plot descending to a plateau on the # right side (which is assumed to be of increasing noise as it goes to the # right) # returns the height of the plateau at the right edge # @@ -40,50 +70,23 @@ # * # ** # ** *** * * # ***** **** *** def plateau_height(x, y) -=begin - require 'gsl' - x_deltas = (0...(x.size-1)).to_a.map do |i| - x[i+1] - x[i] - end - y_deltas = (0...(y.size-1)).to_a.map do |i| - y[i+1] - y[i] - end - new_xs = x.dup - new_ys = y.dup - x_deltas.reverse.each do |delt| - new_xs.push( new_xs.last + delt ) - end - - y_cnt = y.size - y_deltas.reverse.each do |delt| - y_cnt -= 1 - new_ys.push( y[y_cnt] - delt ) - end - - x_vec = GSL::Vector.alloc(new_xs) - y_vec = GSL::Vector.alloc(new_ys) - coef, cov, chisq, status = GSL::Poly.fit(x_vec,y_vec, 3) - coef.eval(x.last) - #x2 = GSL::Vector::linspace(0,2.4,20) - #graph([x_vec,y_vec], [x2, coef.eval(x2)], "-C -g 3 -S 4") -=end - r = RSRuby.instance answ = r.smooth_spline(x,y, :df => 3) ## to plot it! - #r.plot(x,y, :ylab=>"instantaneous pi_zeros") - #r.lines(answ['x'], answ['y']) - #r.points(answ['x'], answ['y']) - #sleep(8) + r.plot(x,y, :ylab=>"pi_zeros or frit") + r.lines(answ['x'], answ['y']) + r.points(answ['x'], answ['y']) + sleep(4) answ['y'].last end def plateau_exponential(x,y) + require 'gsl' xvec = GSL::Vector.alloc(x) yvec = GSL::Vector.alloc(y) a2, b2, = GSL::Fit.linear(xvec, GSL::Sf::log(yvec)) x2 = GSL::Vector.linspace(0, 1.2, 20) exp_a = GSL::Sf::exp(a2) @@ -91,13 +94,14 @@ raise NotImplementedError, "need to grab out the answer" #graph([xvec, yvec], [x2, exp_a*GSL::Sf::exp(b2*x2)], "-C -g 3 -S 4") end - # returns a conservative (but close) estimate of pi_0 given sorted p-values + # returns a conservative (but close) estimate of pi_0 given p-values # following Storey et al. 2003, PNAS. - def pi_zero(sorted_pvals) + def pi_zero(pvals) + sorted_pvals = pvals.sort plateau_height( *(pi_zero_hats(sorted_pvals)) ) end # returns an array where the left values have been filled in using the # similar values on the right side of the distribution. These values are @@ -159,10 +163,12 @@ #File.open("decoy.yml", 'w') {|out| out.puts target_hits.map {|v| v.xcorr }.join(" ") } #abort 'checking' p_values(target_hits.map {|v| v.xcorr}, new_decoy_vals ) end +#### NEED TO VERIFY if this is PIT or PI_ZERO! +=begin # takes a list of booleans with true being a target hit and false being a # decoy hit and returns the pi_zero using the smooth method # Should be ordered from best to worst (i.e., one expects more true values # at the beginning of the list) def pi_zero_from_booleans(booleans) @@ -182,46 +188,57 @@ end end ys.reverse! plateau_height(xs, ys) end +=end + # returns fraction of incorrect target hits (frit) (this is the percent + # incorrect targets [PIT] expressed as a fraction rather than percent) + # takes two parallel arrays consisting of the total number of hits (this + # will typically be the total # target hits) at that point and the + # precision (ranging from: [0,1]) (typically determined by counting the + # number of decoy hits). Expects the number of total hits to be + # monotonically increasing and the precision to roughly start high and + # decrease as more hits (of lesser quality) are added. + def frit_from_precision(total_num_hits_ar, precision_ar) + instant_pi_zeros = [] + total_num_hits_ar.reverse.zip(precision_ar.reverse).each_cons(2) do |dp1, dp0| + (x1, y1) = dp1 + (x0, y0) = dp0 + instant_pi_zeros << ((x1 * (1.0 - y1)) - (x0 * (1.0 - y0) )) / (x1 - x0) + end + instant_pi_zeros.reverse! + plateau_height(total_num_hits_ar[1..-1], instant_pi_zeros) + end + # Takes an array of doublets ([[int, int], [int, int]...]) where the first # value is the number of target hits and the second is the number of decoy # hits. Expects that best hits are at the beginning of the list. Assumes - # that each sum is a subset - # of the following group (shown as actual hits rather than number of hits): + # that each sum is a subset of the following group (shown as actual hits + # rather than number of hits): # # [[target, target, target, decoy], [target, target, target, decoy, # target, decoy, target], [target, target, target, decoy, target, # decoy, target, decoy, target, target]] # # This assumption may be relaxed somewhat and should still give good # results. - def pi_zero_from_groups(array_of_doublets) - pi_zeros = [] + def frit_from_groups(array_of_doublets) + frits = [] array_of_doublets.reverse.each_cons(2) do |two_doublets| bigger, smaller = two_doublets - bigger[0] = bigger[0] - smaller[0] - bigger[1] = bigger[1] - smaller[1] - bigger.map! {|v| v < 0 ? 0 : v } - if bigger[1] > 0 - pi_zeros << (bigger[0].to_f / bigger[1]) + num_targets = bigger[0] - smaller[0] + num_decoy = bigger[1] - smaller[1] + num_targets = 0 if num_targets < 0 + num_decoy = 0 if num_targets < 0 + if num_decoy > 0 + frits << (num_targets.to_f / num_decoy) end end - pi_zeros.reverse! - xs = (0...(pi_zeros.size)).to_a - plateau_height(xs, pi_zeros) + frits.reverse! + xs = (0...(frits.size)).to_a + plateau_height(xs, frits) end end - - -end - -if $0 == __FILE__ - #xcorrs = IO.readlines("/home/jtprince/xcorr_hist/all_xcorrs.yada").first.chomp.split(/\s+/).map {|v| v.to_f } - #PiZero.p_values_for_sequest( - #File.open("newtail.yada", 'w') {|out| out.puts new_dist.join(" ") } - - end