module Statsample module Test # A t-test is any statistical hypothesis test in which the test # statistic follows a Student's t distribution, if the null # hypothesis is supported class T class << self include Math # Test the null hypothesis that the population mean is equal to a specified value u, one uses the statistic. # Is the same formula used on t-test for paired sample. # * x: sample/differences mean # * u: population mean # * s: sample/differences standard deviation # * n: sample size def one_sample(x,u,s,n) (x-u)*Math::sqrt(n).quo(s) end # Test if means of two samples are different. # * x1: sample 1 mean # * x2: sample 2 mean # * s1: sample 1 standard deviation # * s2: sample 2 standard deviation # * n1: sample 1 size # * n2: sample 2 size # * equal_variance: true if equal_variance assumed # def two_sample_independent(x1, x2, s1, s2, n1, n2, equal_variance = false) num=x1-x2 if equal_variance sx1x2 = sqrt(((n1-1)*s1**2 + (n2-1)*s2**2).quo(n1+n2-2)) den = sx1x2*sqrt(1.quo(n1)+1.quo(n2)) else den=sqrt((s1**2).quo(n1) + (s2**2).quo(n2)) end num.quo(den) end # Degrees of freedom for equal variance on t test def df_equal_variance(n1,n2) n1+n2-2 end # Degrees of freedom for unequal variance # * s1: sample 1 standard deviation # * s2: sample 2 standard deviation # * n1: sample 1 size # * n2: sample 2 size # == Reference # * http://en.wikipedia.org/wiki/Welch-Satterthwaite_equation def df_not_equal_variance(s1,s2,n1,n2) s2_1=s1**2 s2_2=s2**2 num=(s2_1.quo(n1)+s2_2.quo(n2))**2 den=(s2_1.quo(n1)**2).quo(n1-1) + (s2_2.quo(n2)**2).quo(n2-1) num.quo(den) end end include Statsample::Test include Summarizable attr_reader :standard_error, :estimate, :df # Tails for p-value (:both, :left or :right). Default :both attr_accessor :tails # Name of F analysis attr_accessor :name attr_accessor :confidence_level attr_reader :t attr_accessor :estimate_name, :standard_error_name # Creates a generic t test. Use OneSample or TwoSamplesIndependent # classes for better summaries. # Parameters: # * estimate: estimate # * standard_error: standard error of estimate # * df: degrees of freedom def initialize(estimate, standard_error, df, opts=Hash.new) @estimate=estimate @standard_error=standard_error @df=df @t = @estimate / @standard_error.to_f opts_default={ :tails=>:both, :name=>_("T Test"), :estimate_name=>_("Estimate"), :standard_error_name=>_("Std.Err.of Estimate"), :confidence_level=>0.95} @opts = opts_default.merge(opts) @opts.keys.each {|k| send("#{k}=", @opts[k]) if respond_to? k } end alias :se :standard_error def to_f t end # probability def probability p_using_cdf(Distribution::T.cdf(t, df), tails) end def confidence_interval(cl=nil) cl||=confidence_level t_crit = t_critical(cl, df) [estimate - se*t_crit, estimate + se*t_crit] end alias :ci :confidence_interval def report_building(builder) #:nodoc: builder.section(:name=>@name) do |section| section.text _("%s: %0.4f | %s: %0.4f") % [@estimate_name, @estimate, @standard_error_name, se] report_building_t(section) end end def report_building_t(s) df_f=@df.is_a?(Integer) ? "%d" : "%0.4f" s.text _("t(%d) = %0.4f, p=%0.4f (%s tails)") % [df, t,probability, tails] s.text _("CI(%d%%): %0.4f - %0.4f") % [confidence_level*100, ci[0],ci[1]] end # One Sample t-test # == Usage # a = Daru::Vector.new(1000.times.map {rand(100)}) # t_1=Statsample::Test::T::OneSample.new(a, {:u=>50}) # t_1.summary # # === Output # # = One Sample T Test # Sample mean: 48.954 # Population mean:50 # Tails: both # t = -1.1573, p=0.2474, d.f=999 class OneSample include Math include Statsample::Test include Summarizable # Options attr_accessor :opts # Name of test attr_accessor :name # Population mean to contrast attr_accessor :u # Degress of freedom attr_reader :df # Tails for probability (:both, :left or :right) attr_accessor :tails # Create a One Sample T Test # Options: # * :u = Mean to compare. Default= 0 # * :name = Name of the analysis # * :tails = Tail for probability. Could be :both, :left, :right def initialize(vector, opts=Hash.new) @vector=vector default={:u=>0, :name=>"One Sample T Test", :tails=>:both} @opts=default.merge(opts) @name=@opts[:name] @u=@opts[:u] @tails=@opts[:tails] @confidence_level=@opts[:confidence_level] || 0.95 @df= @vector.n_valid-1 @t=nil end def t_object T.new(@vector.mean-u, @vector.se, @vector.n_valid-1, opts) end def t t_object.t end def probability t_object.probability end def standard_error t_object.standard_error end alias :se :standard_error def confidence_interval(cl=nil) t_object.confidence_interval(cl) end alias :ci :confidence_interval def report_building(b) # :nodoc: b.section(:name=>@name) {|s| s.text _("Sample mean: %0.4f | Sample sd: %0.4f | se : %0.4f") % [@vector.mean, @vector.sd, se] s.text _("Population mean: %0.4f") % u if u!=0 t_object.report_building_t(s) } end end # Two Sample t-test. # # == Usage # a = Daru::Vector.new(1000.times.map {rand(100)}) # b = Daru::Vector.new(1000.times.map {rand(100)}) # t_2=Statsample::Test::T::TwoSamplesIndependent.new(a,b) # t_2.summary # === Output # = Two Sample T Test # Mean and standard deviation # +----------+---------+---------+------+ # | Variable | m | sd | n | # +----------+---------+---------+------+ # | 1 | 49.3310 | 29.3042 | 1000 | # | 2 | 47.8180 | 28.8640 | 1000 | # +----------+---------+---------+------+ # # == Levene Test # Levene Test # F: 0.3596 # p: 0.5488 # T statistics # +--------------------+--------+-----------+----------------+ # | Type | t | df | p (both tails) | # +--------------------+--------+-----------+----------------+ # | Equal variance | 1.1632 | 1998 | 0.2449 | # | Non equal variance | 1.1632 | 1997.5424 | 0.1362 | # +--------------------+--------+-----------+----------------+ class TwoSamplesIndependent include Math include Statsample::Test include DirtyMemoize include Summarizable # Options attr_accessor :opts # Name of test attr_accessor :name # Degress of freedom (equal variance) attr_reader :df_equal_variance # Degress of freedom (not equal variance) attr_reader :df_not_equal_variance # Value of t for equal_variance attr_reader :t_equal_variance # Value of t for non-equal_variance attr_reader :t_not_equal_variance # Probability(equal variance) attr_reader :probability_equal_variance # Probability(unequal variance) attr_reader :probability_not_equal_variance # Tails for probability (:both, :left or :right) attr_accessor :tails # Create the object dirty_writer :tails dirty_memoize :t_equal_variance, :t_not_equal_variance, :probability_equal_variance, :probability_not_equal_variance, :df_equal_variance, :df_not_equal_variance # Create a Two Independent T Test # Options: # * :name = Name of the analysis # * :tails = Tail for probability. Could be :both, :left, :right def initialize(v1, v2, opts=Hash.new) @v1=v1 @v2=v2 default={:u=>0, :name=>"Two Sample T Test", :tails=>:both} @opts=default.merge(opts) @name=@opts[:name] @tails=@opts[:tails] end # Set t and probability for given u def compute @t_equal_variance= T.two_sample_independent(@v1.mean, @v2.mean, @v1.sd, @v2.sd, @v1.n_valid, @v2.n_valid,true) @t_not_equal_variance= T.two_sample_independent(@v1.mean, @v2.mean, @v1.sd, @v2.sd, @v1.n_valid, @v2.n_valid, false) @df_equal_variance=T.df_equal_variance(@v1.n_valid, @v2.n_valid) @df_not_equal_variance=T.df_not_equal_variance(@v1.sd, @v2.sd, @v1.n_valid, @v2.n_valid) @probability_equal_variance = p_using_cdf(Distribution::T.cdf(@t_equal_variance, @df_equal_variance), tails) @probability_not_equal_variance = p_using_cdf(Distribution::T.cdf(@t_not_equal_variance, @df_not_equal_variance), tails) end # Cohen's d is a measure of effect size. Its defined as the difference between two means divided by a standard deviation for the data def d n1=@v1.n_valid n2=@v2.n_valid num=@v1.mean-@v2.mean den=Math::sqrt( ((n1-1)*@v1.sd+(n2-1)*@v2.sd).quo(n1+n2)) num.quo(den) end def report_building(b) # :nodoc: b.section(:name=>@name) {|g| g.table(:name=>_("Mean and standard deviation"), :header=>[_("Variable"), _("mean"), _("sd"),_("n")]) {|t| t.row([@v1.name,"%0.4f" % @v1.mean,"%0.4f" % @v1.sd, @v1.n_valid]) t.row([@v2.name,"%0.4f" % @v2.mean,"%0.4f" % @v2.sd, @v2.n_valid]) } g.parse_element(Statsample::Test.levene([@v1,@v2],:name=>_("Levene test for equality of variances"))) g.table(:name=>_("T statistics"),:header=>["Type","t","df", "p (#{tails} tails)"].map{|v| _(v)}) {|t| t.row([_("Equal variance"), "%0.4f" % t_equal_variance, df_equal_variance, "%0.4f" % probability_equal_variance]) t.row([_("Non equal variance"), "%0.4f" % t_not_equal_variance, "%0.4f" % df_not_equal_variance, "%0.4f" % probability_not_equal_variance]) } g.table(:name=>_("Effect size")) do |t| t.row ['x1-x2', "%0.4f" % (@v1.mean-@v2.mean)] t.row ['d', "%0.4f" % d] end } end end end end end