lib/darkext/statistics.rb in darkhelmet-darkext-0.3.2 vs lib/darkext/statistics.rb in darkhelmet-darkext-0.4.0

- old
+ new

@@ -62,80 +62,101 @@ [m - i, m + i] end # Standardizes the array def standardize + self.clone.standardize! + end + + # Destructive standardize + def standardize! m = self.mean.to_f rho = self.deviation.to_f - self.map do |v| + self.map! do |v| (v.to_f - m) / rho end end -end -module Statistics - # Finds the probability of a z-score - def self.prob(z) - p = Math.erf(z.abs/2.sqrt) / 2 - return 0.5 + p if 0 < z - return 0.5 - p + def sum_of_squares + m = self.mean + self.map do |v| + (v - m).square + end.sum end +end +module Darkext + module Darkext::Statistics + # Finds the probability of a z-score + def self.prob(z) + p = Math.erf(z.abs/2.sqrt) / 2 + return 0.5 + p if 0 < z + return 0.5 - p + end - # Finds the zscore of a probability - def self.zscore(p, epsilon = 0.00000000000001) - return -1 if (1 < p || 0 > p) - minz, maxz = -6, 6 - zscore = 0.5 - prob = 0 - while (maxz - minz) > epsilon - prob = prob(zscore) - if prob > p then maxz = zscore else minz = zscore end - zscore = (maxz + minz) * 0.5 + # Finds the zscore of a probability + def self.zscore(p, epsilon = 0.00000000000001) + return -1 if (1 < p || 0 > p) + minz, maxz = -6, 6 + zscore = 0.5 + prob = 0 + while (maxz - minz) > epsilon + prob = prob(zscore) + if prob > p then maxz = zscore else minz = zscore end + zscore = (maxz + minz) * 0.5 + end + return zscore end - return zscore - end - # Finds a two tail p-val for a high/low array - def self.p_val(r, n = 30, rho = 1, mu = r.mean) - probs = r.map do |x| - (x - mu) / (rho / n.sqrt) - end.map do |x| - Statistics.prob(x) + # Finds a two tail p-val for a high/low array + def self.p_val(r, n = 30, rho = 1, mu = r.mean) + probs = r.map do |x| + (x - mu) / (rho / n.sqrt) + end.map do |x| + Statistics.prob(x) + end + return 1 - (probs[1] - probs[0]) end - return 1 - (probs[1] - probs[0]) - end - module Statistics::Regression - # Do a least squares linear regression on the two sets of x's and y's - # Returns a hash containing many relevant values - # * n (:n) - # * B_1 (:b_1) - # * B_0 (:b_0) - # * predicted values (:predicted) - # * residuals (:residuals) - # * SS_E (:ss_e) - # * unbiased estimator (:estimator) - # * the equation as a lambda (:equation) - # Raises an argument error if the arguments are not the same size - def self.least_squares(xs,ys) - raise ArgumentError("Arguments must be of equal size") if xs.size != ys.size - n = xs.size - b_1 = (xs.zip(ys).map(&:*).sum - ((ys.sum * xs.sum)/n))/(xs.map(&:square).sum - (xs.sum.square/n)) - b_0 = ys.mean - b_1 * xs.mean - equation = lambda { |x| b_0 + b_1 * x } - predicted = xs.map(&lambda) - residuals = ys.zip(predicted).map(&:-) - ss_e = residuals.map(&:square).sum - estimator = ss_e/(n - 2) - reg = Hash.new - reg[:n] = n - reg[:b_1] = b_1 - reg[:b_0] = b_0 - reg[:predicted] = predicted - reg[:residuals] = residuals - reg[:ss_e] = ss_e - reg[:estimator] = estimator - reg[:equation] = equation - return reg + module Darkext::Statistics::Regression + # Do a least squares linear regression on the two sets of x's and y's + # Returns a hash containing many relevant values + # * n (:n) + # * B_1 (:b_1) + # * B_0 (:b_0) + # * predicted values (:predicted) + # * residuals (:residuals) + # * SSE (:ss_e) + # * SST (:ss_t) + # * R^2 (:r_2) + # * R (:r) + # * unbiased estimator (:estimator) + # * the equation as a lambda (:equation) + # Raises an argument error if the arguments are not the same size + def self.least_squares(xs,ys) + raise ArgumentError("Arguments must be of equal size") if xs.size != ys.size + n = xs.size + b_1 = (xs.zip(ys).map(&:product).sum - ((ys.sum * xs.sum)/n))/(xs.map(&:square).sum - (xs.sum.square/n)) + b_0 = ys.mean - b_1 * xs.mean + equation = lambda { |x| b_0 + b_1 * x } + predicted = xs.map(&equation) + residuals = ys.zip(predicted).map { |y| y.shift - y.shift } + ss_e = residuals.map(&:square).sum + ss_t = ys.sum_of_squares + estimator = ss_e/(n - 2) + r_2 = 1 - (ss_e/ss_t) + r = r_2.sqrt + reg = {:n => n, + :b_1 => b_1, + :b_0 => b_0, + :predicted => predicted, + :residuals => residuals, + :ss_e => ss_e, + :ss_t => ss_t, + :estimator => estimator, + :equation => equation, + :r_2 => r_2, + :r => r} + return reg + end end end end