module Statsample module Reliability class << self # Calculate Chonbach's alpha for a given dataset. # only uses tuples without missing data def cronbach_alpha(ods) ds=ods.dup_only_valid n_items=ds.fields.size sum_var_items=ds.vectors.inject(0) {|ac,v| ac+v[1].variance_sample } total=ds.vector_sum (n_items / (n_items-1).to_f) * (1-(sum_var_items/ total.variance_sample)) end # Calculate Chonbach's alpha for a given dataset # using standarized values for every vector. # Only uses tuples without missing data def cronbach_alpha_standarized(ods) ds=ods.fields.inject({}){|a,f| a[f]=ods[f].vector_standarized a }.to_dataset cronbach_alpha(ds) end end class ItemCharacteristicCurve attr_reader :totals, :counts,:vector_total def initialize (ds, vector_total=nil) vector_total||=ds.vector_sum raise "Total size != Dataset size" if vector_total.size!=ds.cases @vector_total=vector_total @ds=ds @totals={} @counts=@ds.fields.inject({}) {|a,v| a[v]={};a} process end def process i=0 @ds.each{|row| tot=@vector_total[i] @totals[tot]||=0 @totals[tot]+=1 @ds.fields.each {|f| item=row[f].to_s @counts[f][tot]||={} @counts[f][tot][item]||=0 @counts[f][tot][item] += 1 } i+=1 } end def curve_field(field, item) out={} item=item.to_s @totals.each{|value,n| count_value= @counts[field][value][item].nil? ? 0 : @counts[field][value][item] out[value]=count_value.to_f/n.to_f } out end end class ItemAnalysis attr_reader :mean, :sd,:valid_n, :alpha , :alpha_standarized def initialize(ds) @ds=ds.dup_only_valid @total=@ds.vector_sum @mean=@total.mean @median=@total.median @skew=@total.skew @kurtosis=@total.kurtosis @sd=@total.sdp @valid_n=@total.size begin @alpha=Statsample::Reliability.cronbach_alpha(ds) @alpha_standarized=Statsample::Reliability.cronbach_alpha_standarized(ds) rescue => e raise DatasetException.new(@ds,e), "Problem on calculate alpha" end end # Returns a hash with structure def item_characteristic_curve i=0 out={} total={} @ds.each{|row| tot=@total[i] @ds.fields.each {|f| out[f]||= {} total[f]||={} out[f][tot]||= 0 total[f][tot]||=0 out[f][tot]+= row[f] total[f][tot]+=1 } i+=1 } total.each{|f,var| var.each{|tot,v| out[f][tot]=out[f][tot].to_f / total[f][tot] } } out end def gnuplot_item_characteristic_curve(directory, base="crd",options={}) require 'gnuplot' crd=item_characteristic_curve @ds.fields.each {|f| x=[] y=[] Gnuplot.open do |gp| Gnuplot::Plot.new( gp ) do |plot| crd[f].sort.each{|tot,prop| x.push(tot) y.push((prop*100).to_i.to_f/100) } plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds| ds.with = "linespoints" ds.notitle end end end } end def svggraph_item_characteristic_curve(directory, base="icc",options={}) require 'statsample/graph/svggraph' crd=ItemCharacteristicCurve.new(@ds) @ds.fields.each {|f| factors=@ds[f].factors.sort options={ :height=>500, :width=>800, :key=>true }.update(options) graph = ::SVG::Graph::Plot.new(options) factors.each{|factor| factor=factor.to_s dataset=[] crd.curve_field(f, factor).each{|tot,prop| dataset.push(tot) dataset.push((prop*100).to_i.to_f/100) } graph.add_data({ :title=>"#{factor}", :data=>dataset }) } File.open(directory+"/"+base+"_#{f}.svg","w") {|fp| fp.puts(graph.burn()) } } end def item_total_correlation @ds.fields.inject({}) do |a,v| vector=@ds[v].dup ds2=@ds.dup ds2.delete_vector(v) total=ds2.vector_sum a[v]=Statsample::Bivariate.pearson(vector,total) a end end def item_statistics @ds.fields.inject({}) do |a,v| a[v]={:mean=>@ds[v].mean,:sds=>@ds[v].sds} a end end def stats_if_deleted @ds.fields.inject({}){|a,v| ds2=@ds.dup ds2.delete_vector(v) total=ds2.vector_sum a[v]={} a[v][:mean]=total.mean a[v][:sds]=total.sds a[v][:variance_sample]=total.variance_sample a[v][:alpha]=Statsample::Reliability.cronbach_alpha(ds2) a } end def html_summary html = <Summary for scale:

EOF itc=item_total_correlation sid=stats_if_deleted is=item_statistics @ds.fields.each {|f| html << < EOF } html << "
Variable Mean StDv. Mean if deletedVar. if deleted StDv. if deleted Itm-Totl Correl.Alpha if deleted
#{f} #{sprintf("%0.5f",is[f][:mean])} #{sprintf("%0.5f",is[f][:sds])} #{sprintf("%0.5f",sid[f][:mean])} #{sprintf("%0.5f",sid[f][:variance_sample])} #{sprintf("%0.5f",sid[f][:sds])} #{sprintf("%0.5f",itc[f])} #{sprintf("%0.5f",sid[f][:alpha])}

" html end end end end