lib/macroape/counting.rb in macroape-3.3.7 vs lib/macroape/counting.rb in macroape-3.3.8
- old
+ new
@@ -2,31 +2,52 @@
module Bioinform
class PWM
# sets or gets limit size of calculation hash. It's a defence against overuse CPU resources by non-appropriate data
make_parameters :max_hash_size
-
+
def threshold(pvalue)
thresholds(pvalue){|_, thresh, _| return thresh }
end
def threshold_and_real_pvalue(pvalue)
thresholds(pvalue){|_, thresh, real_pv| return thresh, real_pv }
end
+ def weak_threshold(pvalue)
+ weak_thresholds(pvalue){|_, thresh, _| return thresh }
+ end
+ def weak_threshold_and_real_pvalue(pvalue)
+ weak_thresholds(pvalue){|_, thresh, real_pv| return thresh, real_pv }
+ end
def thresholds(*pvalues)
thresholds_by_pvalues(*pvalues).each do |pvalue,(thresholds, counts)|
threshold = thresholds.begin + 0.1 * (thresholds.end - thresholds.begin)
real_pvalue = counts.end.to_f / vocabulary_volume
yield pvalue, threshold, real_pvalue
end
end
+ # "weak" means that threshold has real pvalue not less than given pvalue, while usual threshold not greater
+ def weak_thresholds(*pvalues)
+ thresholds_by_pvalues(*pvalues).each do |pvalue,(thresholds, counts)|
+ threshold = thresholds.begin.to_f
+ real_pvalue = counts.begin.to_f / vocabulary_volume
+ yield pvalue, threshold, real_pvalue
+ end
+ end
+
+
def count_distribution_under_pvalue(max_pvalue)
cnt_distribution = {}
look_for_count = max_pvalue * vocabulary_volume
until cnt_distribution.inject(0.0){|sum,(score,count)| sum + count} >= look_for_count
- cnt_distribution = count_distribution_after_threshold(threshold_gauss_estimation(max_pvalue))
+ begin
+ approximate_threshold = threshold_gauss_estimation(max_pvalue)
+ rescue
+ approximate_threshold = worst_score
+ end
+ cnt_distribution = count_distribution_after_threshold(approximate_threshold)
max_pvalue *=2 # if estimation counted too small amount of words - try to lower threshold estimation by doubling pvalue
end
cnt_distribution
end
@@ -81,15 +102,20 @@
new_scores
end
def counts_by_thresholds(*thresholds)
scores = count_distribution_after_threshold(thresholds.min)
- thresholds.map{ |threshold|
- scores.inject(0.0){|sum,(score,count)| (score >= threshold) ? sum + count : sum}
+ thresholds.inject({}){ |hsh, threshold|
+ hsh[threshold] = scores.inject(0.0){|sum,(score,count)| (score >= threshold) ? sum + count : sum}
+ hsh
}
end
+ def count_by_threshold(threshold)
+ counts_by_thresholds(threshold)[threshold]
+ end
+
def pvalue_by_threshold(threshold)
- counts_by_thresholds(threshold).first / vocabulary_volume
+ count_by_threshold(threshold) / vocabulary_volume
end
end
end
\ No newline at end of file