module Statsample
module Test
# A t-test is any statistical hypothesis test in which the test
# statistic follows a Student's t distribution, if the null
# hypothesis is supported
class T
class << self
include Math
# Test the null hypothesis that the population mean is equal to a specified value u, one uses the statistic.
# Is the same formula used on t-test for paired sample.
# * x: sample/differences mean
# * u: population mean
# * s: sample/differences standard deviation
# * n: sample size
def one_sample(x,u,s,n)
(x-u)*Math::sqrt(n).quo(s)
end
# Test if means of two samples are different.
# * x1: sample 1 mean
# * x2: sample 2 mean
# * s1: sample 1 standard deviation
# * s2: sample 2 standard deviation
# * n1: sample 1 size
# * n2: sample 2 size
# * equal_variance: true if equal_variance assumed
#
def two_sample_independent(x1, x2, s1, s2, n1, n2, equal_variance = false)
num=x1-x2
if equal_variance
sx1x2 = sqrt(((n1-1)*s1**2 + (n2-1)*s2**2).quo(n1+n2-2))
den = sx1x2*sqrt(1.quo(n1)+1.quo(n2))
else
den=sqrt((s1**2).quo(n1) + (s2**2).quo(n2))
end
num.quo(den)
end
# Degrees of freedom for equal variance on t test
def df_equal_variance(n1,n2)
n1+n2-2
end
# Degrees of freedom for unequal variance
# * s1: sample 1 standard deviation
# * s2: sample 2 standard deviation
# * n1: sample 1 size
# * n2: sample 2 size
# == Reference
# * http://en.wikipedia.org/wiki/Welch-Satterthwaite_equation
def df_not_equal_variance(s1,s2,n1,n2)
s2_1=s1**2
s2_2=s2**2
num=(s2_1.quo(n1)+s2_2.quo(n2))**2
den=(s2_1.quo(n1)**2).quo(n1-1) + (s2_2.quo(n2)**2).quo(n2-1)
num.quo(den)
end
end
include Statsample::Test
include Summarizable
attr_reader :standard_error, :estimate, :df
# Tails for p-value (:both, :left or :right). Default :both
attr_accessor :tails
# Name of F analysis
attr_accessor :name
attr_accessor :confidence_level
attr_reader :t
attr_accessor :estimate_name, :standard_error_name
# Creates a generic t test. Use OneSample or TwoSamplesIndependent
# classes for better summaries.
# Parameters:
# * estimate: estimate
# * standard_error: standard error of estimate
# * df: degrees of freedom
def initialize(estimate, standard_error, df, opts=Hash.new)
@estimate=estimate
@standard_error=standard_error
@df=df
@t = @estimate / @standard_error.to_f
opts_default={ :tails=>:both,
:name=>_("T Test"),
:estimate_name=>_("Estimate"),
:standard_error_name=>_("Std.Err.of Estimate"),
:confidence_level=>0.95}
@opts = opts_default.merge(opts)
@opts.keys.each {|k|
send("#{k}=", @opts[k]) if respond_to? k
}
end
alias :se :standard_error
def to_f
t
end
# probability
def probability
p_using_cdf(Distribution::T.cdf(t, df), tails)
end
def confidence_interval(cl=nil)
cl||=confidence_level
t_crit = t_critical(cl, df)
[estimate - se*t_crit, estimate + se*t_crit]
end
alias :ci :confidence_interval
def report_building(builder) #:nodoc:
builder.section(:name=>@name) do |section|
section.text _("%s: %0.4f | %s: %0.4f") % [@estimate_name, @estimate, @standard_error_name, se]
report_building_t(section)
end
end
def report_building_t(s)
df_f=@df.is_a?(Integer) ? "%d" : "%0.4f"
s.text _("t(%d) = %0.4f, p=%0.4f (%s tails)") % [df, t,probability, tails]
s.text _("CI(%d%%): %0.4f - %0.4f") % [confidence_level*100, ci[0],ci[1]]
end
# One Sample t-test
# == Usage
# a = Daru::Vector.new(1000.times.map {rand(100)})
# t_1=Statsample::Test::T::OneSample.new(a, {:u=>50})
# t_1.summary
#
# === Output
#
# = One Sample T Test
# Sample mean: 48.954
# Population mean:50
# Tails: both
# t = -1.1573, p=0.2474, d.f=999
class OneSample
include Math
include Statsample::Test
include Summarizable
# Options
attr_accessor :opts
# Name of test
attr_accessor :name
# Population mean to contrast
attr_accessor :u
# Degress of freedom
attr_reader :df
# Tails for probability (:both, :left or :right)
attr_accessor :tails
# Create a One Sample T Test
# Options:
# * :u = Mean to compare. Default= 0
# * :name = Name of the analysis
# * :tails = Tail for probability. Could be :both, :left, :right
def initialize(vector, opts=Hash.new)
@vector=vector
default={:u=>0, :name=>"One Sample T Test", :tails=>:both}
@opts=default.merge(opts)
@name=@opts[:name]
@u=@opts[:u]
@tails=@opts[:tails]
@confidence_level=@opts[:confidence_level] || 0.95
@df= @vector.n_valid-1
@t=nil
end
def t_object
T.new(@vector.mean-u, @vector.se, @vector.n_valid-1, opts)
end
def t
t_object.t
end
def probability
t_object.probability
end
def standard_error
t_object.standard_error
end
alias :se :standard_error
def confidence_interval(cl=nil)
t_object.confidence_interval(cl)
end
alias :ci :confidence_interval
def report_building(b) # :nodoc:
b.section(:name=>@name) {|s|
s.text _("Sample mean: %0.4f | Sample sd: %0.4f | se : %0.4f") % [@vector.mean, @vector.sd, se]
s.text _("Population mean: %0.4f") % u if u!=0
t_object.report_building_t(s)
}
end
end
# Two Sample t-test.
#
# == Usage
# a = Daru::Vector.new(1000.times.map {rand(100)})
# b = Daru::Vector.new(1000.times.map {rand(100)})
# t_2=Statsample::Test::T::TwoSamplesIndependent.new(a,b)
# t_2.summary
# === Output
# = Two Sample T Test
# Mean and standard deviation
# +----------+---------+---------+------+
# | Variable | m | sd | n |
# +----------+---------+---------+------+
# | 1 | 49.3310 | 29.3042 | 1000 |
# | 2 | 47.8180 | 28.8640 | 1000 |
# +----------+---------+---------+------+
#
# == Levene Test
# Levene Test
# F: 0.3596
# p: 0.5488
# T statistics
# +--------------------+--------+-----------+----------------+
# | Type | t | df | p (both tails) |
# +--------------------+--------+-----------+----------------+
# | Equal variance | 1.1632 | 1998 | 0.2449 |
# | Non equal variance | 1.1632 | 1997.5424 | 0.1362 |
# +--------------------+--------+-----------+----------------+
class TwoSamplesIndependent
include Math
include Statsample::Test
include DirtyMemoize
include Summarizable
# Options
attr_accessor :opts
# Name of test
attr_accessor :name
# Degress of freedom (equal variance)
attr_reader :df_equal_variance
# Degress of freedom (not equal variance)
attr_reader :df_not_equal_variance
# Value of t for equal_variance
attr_reader :t_equal_variance
# Value of t for non-equal_variance
attr_reader :t_not_equal_variance
# Probability(equal variance)
attr_reader :probability_equal_variance
# Probability(unequal variance)
attr_reader :probability_not_equal_variance
# Tails for probability (:both, :left or :right)
attr_accessor :tails
# Create the object
dirty_writer :tails
dirty_memoize :t_equal_variance, :t_not_equal_variance, :probability_equal_variance, :probability_not_equal_variance, :df_equal_variance, :df_not_equal_variance
# Create a Two Independent T Test
# Options:
# * :name = Name of the analysis
# * :tails = Tail for probability. Could be :both, :left, :right
def initialize(v1, v2, opts=Hash.new)
@v1=v1
@v2=v2
default={:u=>0, :name=>"Two Sample T Test", :tails=>:both}
@opts=default.merge(opts)
@name=@opts[:name]
@tails=@opts[:tails]
end
# Set t and probability for given u
def compute
@t_equal_variance= T.two_sample_independent(@v1.mean, @v2.mean, @v1.sd, @v2.sd, @v1.n_valid, @v2.n_valid,true)
@t_not_equal_variance= T.two_sample_independent(@v1.mean, @v2.mean, @v1.sd, @v2.sd, @v1.n_valid, @v2.n_valid, false)
@df_equal_variance=T.df_equal_variance(@v1.n_valid, @v2.n_valid)
@df_not_equal_variance=T.df_not_equal_variance(@v1.sd, @v2.sd, @v1.n_valid, @v2.n_valid)
@probability_equal_variance = p_using_cdf(Distribution::T.cdf(@t_equal_variance, @df_equal_variance), tails)
@probability_not_equal_variance = p_using_cdf(Distribution::T.cdf(@t_not_equal_variance, @df_not_equal_variance), tails)
end
# Cohen's d is a measure of effect size. Its defined as the difference between two means divided by a standard deviation for the data
def d
n1=@v1.n_valid
n2=@v2.n_valid
num=@v1.mean-@v2.mean
den=Math::sqrt( ((n1-1)*@v1.sd+(n2-1)*@v2.sd).quo(n1+n2))
num.quo(den)
end
def report_building(b) # :nodoc:
b.section(:name=>@name) {|g|
g.table(:name=>_("Mean and standard deviation"), :header=>[_("Variable"), _("mean"), _("sd"),_("n")]) {|t|
t.row([@v1.name,"%0.4f" % @v1.mean,"%0.4f" % @v1.sd, @v1.n_valid])
t.row([@v2.name,"%0.4f" % @v2.mean,"%0.4f" % @v2.sd, @v2.n_valid])
}
g.parse_element(Statsample::Test.levene([@v1,@v2],:name=>_("Levene test for equality of variances")))
g.table(:name=>_("T statistics"),:header=>["Type","t","df", "p (#{tails} tails)"].map{|v| _(v)}) {|t|
t.row([_("Equal variance"), "%0.4f" % t_equal_variance, df_equal_variance, "%0.4f" % probability_equal_variance])
t.row([_("Non equal variance"), "%0.4f" % t_not_equal_variance, "%0.4f" % df_not_equal_variance, "%0.4f" % probability_not_equal_variance])
}
g.table(:name=>_("Effect size")) do |t|
t.row ['x1-x2', "%0.4f" % (@v1.mean-@v2.mean)]
t.row ['d', "%0.4f" % d]
end
}
end
end
end
end
end