ab_test.rb in vanity-1.8.3.beta

- old
+ new

@@ -1,118 +1,34 @@
 require "digest/md5"
+require "vanity/experiment/alternative"
+require "vanity/experiment/bayesian_bandit_score"
 
 module Vanity
   module Experiment
+    # The meat.
+    class AbTest < Base
+      class << self
+      	# Convert z-score to probability.
+      	def probability(score)
+      	  score = score.abs
+      	  probability = AbTest::Z_TO_PROBABILITY.find { |z,p| score >= z }
+      	  probability ? probability.last : 0
+      	end
 
-    # One of several alternatives in an A/B test (see AbTest#alternatives).
-    class Alternative
-
-      def initialize(experiment, id, value) #, participants, converted, conversions)
-        @experiment = experiment
-        @id = id
-        @name = "option #{(@id + 65).chr}"
-        @value = value
+      	def friendly_name
+      	  "A/B Test"
+      	end
       end
 
-      # Alternative id, only unique for this experiment.
-      attr_reader :id
+      DEFAULT_SCORE_METHOD = :z_score
 
-      # Alternative name (option A, option B, etc).
-      attr_reader :name
-
-      # Alternative value.
-      attr_reader :value
-
-      # Experiment this alternative belongs to.
-      attr_reader :experiment
-
-      # Number of participants who viewed this alternative.
-      def participants
-        load_counts unless @participants
-        @participants
-      end
-
-      # Number of participants who converted on this alternative (a participant is counted only once).
-      def converted
-        load_counts unless @converted
-        @converted
-      end
-
-      # Number of conversions for this alternative (same participant may be counted more than once).
-      def conversions
-        load_counts unless @conversions
-        @conversions
-      end
-
-      # Z-score for this alternative, related to 2nd-best performing alternative. Populated by AbTest#score.
-      attr_accessor :z_score
-
-      # Probability derived from z-score. Populated by AbTest#score.
-      attr_accessor :probability
-
-      # Difference from least performing alternative. Populated by AbTest#score.
-      attr_accessor :difference
-
-      # Conversion rate calculated as converted/participants
-      def conversion_rate
-        @conversion_rate ||= (participants > 0 ? converted.to_f/participants.to_f  : 0.0)
-      end
-
-      # The measure we use to order (sort) alternatives and decide which one is better (by calculating z-score).
-      # Defaults to conversion rate.
-      def measure
-        conversion_rate
-      end
-
-      def <=>(other)
-        measure <=> other.measure
-      end
-
-      def ==(other)
-        other && id == other.id && experiment == other.experiment
-      end
-
-      def to_s
-        name
-      end
-
-      def inspect
-        "#{name}: #{value} #{converted}/#{participants}"
-      end
-
-      def load_counts
-        if @experiment.playground.collecting?
-          @participants, @converted, @conversions = @experiment.playground.connection.ab_counts(@experiment.id, id).values_at(:participants, :converted, :conversions)
-        else
-          @participants = @converted = @conversions = 0
-        end
-      end
-    end
-
-
-      # The meat.
-      class AbTest < Base
-        class << self
-
-          # Convert z-score to probability.
-          def probability(score)
-            score = score.abs
-            probability = AbTest::Z_TO_PROBABILITY.find { |z,p| score >= z }
-            probability ? probability.last : 0
-          end
-
-          def friendly_name
-            "A/B Test"
-          end
-
-        end
-
       def initialize(*args)
         super
+      	@score_method = DEFAULT_SCORE_METHOD
+        @use_probabilities = nil
       end
 
-
       # -- Metric --
 
       # Tells A/B test which metric we're measuring, or returns metric in use.
       #
       # @example Define A/B test against coolness metric
@@ -166,10 +82,27 @@
       #   alternative(:blue) == alternatives[2]
       def alternative(value)
         alternatives.find { |alt| alt.value == value }
       end
 
+      # What method to use for calculating score.  Default is :ab_test, but can
+      # also be set to :bandit_score to calculate probability of each
+      # alternative being the best.
+      #
+      # @example Define A/B test which uses bayes_bandit_score in reporting
+      # ab_test "noodle_test" do
+      #   alternatives "spaghetti", "linguine"
+      #   metrics :signup
+      #   score_method :bayes_bandit_score
+      # end
+      def score_method(method=nil)
+      	if method
+      	  @score_method = method
+      	end
+      	@score_method
+      end
+
       # Defines an A/B test with two alternatives: false and true.  This is the
       # default pair of alternatives, so just syntactic sugar for those who love
       # being explicit.
       #
       # @example
@@ -195,24 +128,32 @@
       #   color = experiment(:which_blue).choose
       def choose
         if @playground.collecting?
           if active?
             identity = identity()
-      	    index = connection.ab_showing(@id, identity)
-      	    unless index
-      	      index = alternative_for(identity)
-      	      if !@playground.using_js?
-            		# if we have an on_assignment block, call it on new assignments
-            		if @on_assignment_block
-            		  assignment = alternatives[index.to_i]
-            		  if !connection.ab_seen @id, identity, assignment
-            		    @on_assignment_block.call(Vanity.context, identity, assignment, self)
-            		  end
-            		end
-            		connection.ab_add_participant @id, index, identity
-            		check_completion!
-      	      end
+            index = connection.ab_showing(@id, identity)
+            unless index
+              index = alternative_for(identity)
+              if !@playground.using_js?
+                # if we have an on_assignment block, call it on new assignments
+                if @on_assignment_block
+                  assignment = alternatives[index.to_i]
+                  if !connection.ab_seen @id, identity, assignment
+                    @on_assignment_block.call(Vanity.context, identity, assignment, self)
+                  end
+                end
+                # if we are rebalancing probabilities, keep track of how long it has been since we last rebalanced
+                if @rebalance_frequency
+                  @assignments_since_rebalancing += 1
+                  if @assignments_since_rebalancing >= @rebalance_frequency
+                    @assignments_since_rebalancing = 0
+                    rebalance!
+                  end
+                end
+                connection.ab_add_participant @id, index, identity
+                check_completion!
+              end
             end
           else
             index = connection.ab_get_outcome(@id) || alternative_for(identity)
           end
         else
@@ -286,16 +227,24 @@
       end
 
 
       # -- Reporting --
 
+      def calculate_score
+      	if respond_to?(score_method)
+      	  self.send(score_method)
+      	else
+      	  score
+      	end
+      end
+
       # Scores alternatives based on the current tracking data.  This method
       # returns a structure with the following attributes:
       # [:alts]   Ordered list of alternatives, populated with scoring info.
       # [:base]   Second best performing alternative.
       # [:least]  Least performing alternative (but more than zero conversion).
-      # [:choice] Choice alterntive, either the outcome or best alternative.
+      # [:choice] Choice alternative, either the outcome or best alternative.
       #
       # Alternatives returned by this method are populated with the following
       # attributes:
       # [:z_score]      Z-score (relative to the base alternative).
       # [:probability]  Probability (z-score mapped to 0, 90, 95, 99 or 99.9%).
@@ -327,14 +276,52 @@
         end
         # best alternative is one with highest conversion rate (best shot).
         # choice alternative can only pick best if we have high probability (>90%).
         best = sorted.last if sorted.last.measure > 0.0
         choice = outcome ? alts[outcome.id] : (best && best.probability >= probability ? best : nil)
-        Struct.new(:alts, :best, :base, :least, :choice).new(alts, best, base, least, choice)
+      	Struct.new(:alts, :best, :base, :least, :choice, :method).new(alts, best, base, least, choice, :score)
       end
 
-      # Use the result of #score to derive a conclusion.  Returns an
+      # Scores alternatives based on the current tracking data, using Bayesian
+      # estimates of the best binomial bandit. Based on the R bandit package,
+      # http://cran.r-project.org/web/packages/bandit, which is based on
+      # Steven L. Scott, A modern Bayesian look at the multi-armed bandit,
+      # Appl. Stochastic Models Bus. Ind. 2010; 26:639-658.
+      # (http://www.economics.uci.edu/~ivan/asmb.874.pdf)
+      #
+      # This method returns a structure with the following attributes:
+      # [:alts]   Ordered list of alternatives, populated with scoring info.
+      # [:base]   Second best performing alternative.
+      # [:least]  Least performing alternative (but more than zero conversion).
+      # [:choice] Choice alternative, either the outcome or best alternative.
+      #
+      # Alternatives returned by this method are populated with the following
+      # attributes:
+      # [:probability]  Probability (probability this is the best alternative).
+      # [:difference]   Difference from the least performant altenative.
+      #
+      # The choice alternative is set only if its probability is higher or
+      # equal to the specified probability (default is 90%).
+      def bayes_bandit_score(probability = 90)
+      	begin
+      	  require "backports/1.9.1/kernel/define_singleton_method" if RUBY_VERSION < "1.9"
+      	  require "integration"
+      	  require "rubystats"
+      	rescue LoadError
+      	  fail "to use bayes_bandit_score, install integration and rubystats gems"
+      	end
+
+      	begin
+      	  require "gsl"
+      	rescue LoadError
+      	  warn "for better integration performance, install gsl gem"
+      	end
+
+      	BayesianBanditScore.new(alternatives, outcome).calculate!
+      end
+
+      # Use the result of #score or #bayes_bandit_score to derive a conclusion.  Returns an
       # array of claims.
       def conclusion(score = score)
         claims = []
         participants = score.alts.inject(0) { |t,alt| t + alt.participants }
         claims << case participants
@@ -349,20 +336,28 @@
           # then alternatives with no conversion.
           sorted |= score.alts
           # we want a result that's clearly better than 2nd best.
           best, second = sorted[0], sorted[1]
           if best.measure > second.measure
-            diff = ((best.measure - second.measure) / second.measure * 100).round
-            better = " (%d%% better than %s)" % [diff, second.name] if diff > 0
-            claims << "The best choice is %s: it converted at %.1f%%%s." % [best.name, best.measure * 100, better]
-            if best.probability >= 90
-              claims << "With %d%% probability this result is statistically significant." % score.best.probability
-            else
-              claims << "This result is not statistically significant, suggest you continue this experiment."
-            end
-            sorted.delete best
-          end
+      	    diff = ((best.measure - second.measure) / second.measure * 100).round
+      	    better = " (%d%% better than %s)" % [diff, second.name] if diff > 0
+      	    claims << "The best choice is %s: it converted at %.1f%%%s." % [best.name, best.measure * 100, better]
+      	    if score.method == :bayes_bandit_score
+      	      if best.probability >= 90
+      		      claims << "With %d%% probability this result is the best." % score.best.probability
+      	      else
+      		      claims << "This result does not have strong confidence behind it, suggest you continue this experiment."
+      	      end
+      	    else
+      	      if best.probability >= 90
+      		      claims << "With %d%% probability this result is statistically significant." % score.best.probability
+      	      else
+      		      claims << "This result is not statistically significant, suggest you continue this experiment."
+      	      end
+      	    end
+      	    sorted.delete best
+      	  end
           sorted.each do |alt|
             if alt.measure > 0.0
               claims << "%s converted at %.1f%%." % [alt.name.gsub(/^o/, "O"), alt.measure * 100]
             else
               claims << "%s did not convert." % alt.name.gsub(/^o/, "O")
@@ -373,11 +368,48 @@
         end
         claims << "#{score.choice.name.gsub(/^o/, "O")} selected as the best alternative." if score.choice
         claims
       end
 
+      # -- Unequal probability assignments --
 
+      def set_alternative_probabilities(alternative_probabilities)
+        # create @use_probabilities as a function to go from [0,1] to outcome
+        cumulative_probability = 0.0
+        new_probabilities = alternative_probabilities.map {|am| [am, (cumulative_probability += am.probability)/100.0]}
+        @use_probabilities = new_probabilities
+      end
+
+      # -- Experiment rebalancing --
+
+      # Experiment rebalancing allows the app to automatically adjust the probabilities for each alternative; when one is performing better, it will increase its probability
+      #  according to Bayesian one-armed bandit theory, in order to (eventually) maximize your overall conversions.
+
+      # Sets or returns how often (as a function of number of people assigned) to rebalance. For example:
+      #   ab_test "Simple" do
+      #     rebalance_frequency 100
+      #   end
+      #
+      #  puts "The experiment will automatically rebalance after every " + experiment(:simple).description + " users are assigned."
+      def rebalance_frequency(rf = nil)
+        if rf
+          @assignments_since_rebalancing = 0
+          @rebalance_frequency = rf
+          rebalance!
+        end
+        @rebalance_frequency
+      end
+
+      # Force experiment to rebalance.
+      def rebalance!
+        return unless @playground.collecting?
+        score_results = bayes_bandit_score
+        if score_results.method == :bayes_bandit_score
+          set_alternative_probabilities score_results.alts
+        end
+      end
+
       # -- Completion --
 
       # Defines how the experiment can choose the optimal outcome on completion.
       #
       # By default, Vanity will take the best alternative (highest conversion
@@ -402,23 +434,26 @@
         return unless @playground.collecting?
         outcome = connection.ab_get_outcome(@id)
         outcome && _alternatives[outcome]
       end
 
-      def complete!
+      def complete!(outcome = nil)
         return unless @playground.collecting? && active?
         super
-        if @outcome_is
-          begin
-            result = @outcome_is.call
-            outcome = result.id if Alternative === result && result.experiment == self
-          rescue
-            warn "Error in AbTest#complete!: #{$!}"
+
+      	unless outcome
+          if @outcome_is
+            begin
+              result = @outcome_is.call
+              outcome = result.id if Alternative === result && result.experiment == self
+            rescue
+              warn "Error in AbTest#complete!: #{$!}"
+            end
+          else
+            best = score.best
+            outcome = best.id if best
           end
-        else
-          best = score.best
-          outcome = best.id if best
         end
         # TODO: logging
         connection.ab_set_outcome @id, outcome || 0
       end
 
@@ -482,10 +517,18 @@
       # Chooses an alternative for the identity and returns its index. This
       # method always returns the same alternative for a given experiment and
       # identity, and randomly distributed alternatives for each identity (in the
       # same experiment).
       def alternative_for(identity)
-        Digest::MD5.hexdigest("#{name}/#{identity}").to_i(17) % @alternatives.size
+        if @use_probabilities
+          existing_assignment = connection.ab_assigned @id, identity
+          return existing_assignment if existing_assignment
+          random_outcome = rand()
+          @use_probabilities.each do |alternative, max_prob|
+            return alternative.id if random_outcome < max_prob
+          end
+        end
+        return Digest::MD5.hexdigest("#{name}/#{identity}").to_i(17) % @alternatives.size
       end
 
       begin
         a = 50.0
         # Returns array of [z-score, percentage]