lib/vanity/experiment/ab_test.rb in vanity-1.8.2 vs lib/vanity/experiment/ab_test.rb in vanity-1.8.3.beta

- old
+ new

@@ -1,118 +1,34 @@ require "digest/md5" +require "vanity/experiment/alternative" +require "vanity/experiment/bayesian_bandit_score" module Vanity module Experiment + # The meat. + class AbTest < Base + class << self + # Convert z-score to probability. + def probability(score) + score = score.abs + probability = AbTest::Z_TO_PROBABILITY.find { |z,p| score >= z } + probability ? probability.last : 0 + end - # One of several alternatives in an A/B test (see AbTest#alternatives). - class Alternative - - def initialize(experiment, id, value) #, participants, converted, conversions) - @experiment = experiment - @id = id - @name = "option #{(@id + 65).chr}" - @value = value + def friendly_name + "A/B Test" + end end - # Alternative id, only unique for this experiment. - attr_reader :id + DEFAULT_SCORE_METHOD = :z_score - # Alternative name (option A, option B, etc). - attr_reader :name - - # Alternative value. - attr_reader :value - - # Experiment this alternative belongs to. - attr_reader :experiment - - # Number of participants who viewed this alternative. - def participants - load_counts unless @participants - @participants - end - - # Number of participants who converted on this alternative (a participant is counted only once). - def converted - load_counts unless @converted - @converted - end - - # Number of conversions for this alternative (same participant may be counted more than once). - def conversions - load_counts unless @conversions - @conversions - end - - # Z-score for this alternative, related to 2nd-best performing alternative. Populated by AbTest#score. - attr_accessor :z_score - - # Probability derived from z-score. Populated by AbTest#score. - attr_accessor :probability - - # Difference from least performing alternative. Populated by AbTest#score. - attr_accessor :difference - - # Conversion rate calculated as converted/participants - def conversion_rate - @conversion_rate ||= (participants > 0 ? converted.to_f/participants.to_f : 0.0) - end - - # The measure we use to order (sort) alternatives and decide which one is better (by calculating z-score). - # Defaults to conversion rate. - def measure - conversion_rate - end - - def <=>(other) - measure <=> other.measure - end - - def ==(other) - other && id == other.id && experiment == other.experiment - end - - def to_s - name - end - - def inspect - "#{name}: #{value} #{converted}/#{participants}" - end - - def load_counts - if @experiment.playground.collecting? - @participants, @converted, @conversions = @experiment.playground.connection.ab_counts(@experiment.id, id).values_at(:participants, :converted, :conversions) - else - @participants = @converted = @conversions = 0 - end - end - end - - - # The meat. - class AbTest < Base - class << self - - # Convert z-score to probability. - def probability(score) - score = score.abs - probability = AbTest::Z_TO_PROBABILITY.find { |z,p| score >= z } - probability ? probability.last : 0 - end - - def friendly_name - "A/B Test" - end - - end - def initialize(*args) super + @score_method = DEFAULT_SCORE_METHOD + @use_probabilities = nil end - # -- Metric -- # Tells A/B test which metric we're measuring, or returns metric in use. # # @example Define A/B test against coolness metric @@ -166,10 +82,27 @@ # alternative(:blue) == alternatives[2] def alternative(value) alternatives.find { |alt| alt.value == value } end + # What method to use for calculating score. Default is :ab_test, but can + # also be set to :bandit_score to calculate probability of each + # alternative being the best. + # + # @example Define A/B test which uses bayes_bandit_score in reporting + # ab_test "noodle_test" do + # alternatives "spaghetti", "linguine" + # metrics :signup + # score_method :bayes_bandit_score + # end + def score_method(method=nil) + if method + @score_method = method + end + @score_method + end + # Defines an A/B test with two alternatives: false and true. This is the # default pair of alternatives, so just syntactic sugar for those who love # being explicit. # # @example @@ -195,24 +128,32 @@ # color = experiment(:which_blue).choose def choose if @playground.collecting? if active? identity = identity() - index = connection.ab_showing(@id, identity) - unless index - index = alternative_for(identity) - if !@playground.using_js? - # if we have an on_assignment block, call it on new assignments - if @on_assignment_block - assignment = alternatives[index.to_i] - if !connection.ab_seen @id, identity, assignment - @on_assignment_block.call(Vanity.context, identity, assignment, self) - end - end - connection.ab_add_participant @id, index, identity - check_completion! - end + index = connection.ab_showing(@id, identity) + unless index + index = alternative_for(identity) + if !@playground.using_js? + # if we have an on_assignment block, call it on new assignments + if @on_assignment_block + assignment = alternatives[index.to_i] + if !connection.ab_seen @id, identity, assignment + @on_assignment_block.call(Vanity.context, identity, assignment, self) + end + end + # if we are rebalancing probabilities, keep track of how long it has been since we last rebalanced + if @rebalance_frequency + @assignments_since_rebalancing += 1 + if @assignments_since_rebalancing >= @rebalance_frequency + @assignments_since_rebalancing = 0 + rebalance! + end + end + connection.ab_add_participant @id, index, identity + check_completion! + end end else index = connection.ab_get_outcome(@id) || alternative_for(identity) end else @@ -286,16 +227,24 @@ end # -- Reporting -- + def calculate_score + if respond_to?(score_method) + self.send(score_method) + else + score + end + end + # Scores alternatives based on the current tracking data. This method # returns a structure with the following attributes: # [:alts] Ordered list of alternatives, populated with scoring info. # [:base] Second best performing alternative. # [:least] Least performing alternative (but more than zero conversion). - # [:choice] Choice alterntive, either the outcome or best alternative. + # [:choice] Choice alternative, either the outcome or best alternative. # # Alternatives returned by this method are populated with the following # attributes: # [:z_score] Z-score (relative to the base alternative). # [:probability] Probability (z-score mapped to 0, 90, 95, 99 or 99.9%). @@ -327,14 +276,52 @@ end # best alternative is one with highest conversion rate (best shot). # choice alternative can only pick best if we have high probability (>90%). best = sorted.last if sorted.last.measure > 0.0 choice = outcome ? alts[outcome.id] : (best && best.probability >= probability ? best : nil) - Struct.new(:alts, :best, :base, :least, :choice).new(alts, best, base, least, choice) + Struct.new(:alts, :best, :base, :least, :choice, :method).new(alts, best, base, least, choice, :score) end - # Use the result of #score to derive a conclusion. Returns an + # Scores alternatives based on the current tracking data, using Bayesian + # estimates of the best binomial bandit. Based on the R bandit package, + # http://cran.r-project.org/web/packages/bandit, which is based on + # Steven L. Scott, A modern Bayesian look at the multi-armed bandit, + # Appl. Stochastic Models Bus. Ind. 2010; 26:639-658. + # (http://www.economics.uci.edu/~ivan/asmb.874.pdf) + # + # This method returns a structure with the following attributes: + # [:alts] Ordered list of alternatives, populated with scoring info. + # [:base] Second best performing alternative. + # [:least] Least performing alternative (but more than zero conversion). + # [:choice] Choice alternative, either the outcome or best alternative. + # + # Alternatives returned by this method are populated with the following + # attributes: + # [:probability] Probability (probability this is the best alternative). + # [:difference] Difference from the least performant altenative. + # + # The choice alternative is set only if its probability is higher or + # equal to the specified probability (default is 90%). + def bayes_bandit_score(probability = 90) + begin + require "backports/1.9.1/kernel/define_singleton_method" if RUBY_VERSION < "1.9" + require "integration" + require "rubystats" + rescue LoadError + fail "to use bayes_bandit_score, install integration and rubystats gems" + end + + begin + require "gsl" + rescue LoadError + warn "for better integration performance, install gsl gem" + end + + BayesianBanditScore.new(alternatives, outcome).calculate! + end + + # Use the result of #score or #bayes_bandit_score to derive a conclusion. Returns an # array of claims. def conclusion(score = score) claims = [] participants = score.alts.inject(0) { |t,alt| t + alt.participants } claims << case participants @@ -349,20 +336,28 @@ # then alternatives with no conversion. sorted |= score.alts # we want a result that's clearly better than 2nd best. best, second = sorted[0], sorted[1] if best.measure > second.measure - diff = ((best.measure - second.measure) / second.measure * 100).round - better = " (%d%% better than %s)" % [diff, second.name] if diff > 0 - claims << "The best choice is %s: it converted at %.1f%%%s." % [best.name, best.measure * 100, better] - if best.probability >= 90 - claims << "With %d%% probability this result is statistically significant." % score.best.probability - else - claims << "This result is not statistically significant, suggest you continue this experiment." - end - sorted.delete best - end + diff = ((best.measure - second.measure) / second.measure * 100).round + better = " (%d%% better than %s)" % [diff, second.name] if diff > 0 + claims << "The best choice is %s: it converted at %.1f%%%s." % [best.name, best.measure * 100, better] + if score.method == :bayes_bandit_score + if best.probability >= 90 + claims << "With %d%% probability this result is the best." % score.best.probability + else + claims << "This result does not have strong confidence behind it, suggest you continue this experiment." + end + else + if best.probability >= 90 + claims << "With %d%% probability this result is statistically significant." % score.best.probability + else + claims << "This result is not statistically significant, suggest you continue this experiment." + end + end + sorted.delete best + end sorted.each do |alt| if alt.measure > 0.0 claims << "%s converted at %.1f%%." % [alt.name.gsub(/^o/, "O"), alt.measure * 100] else claims << "%s did not convert." % alt.name.gsub(/^o/, "O") @@ -373,11 +368,48 @@ end claims << "#{score.choice.name.gsub(/^o/, "O")} selected as the best alternative." if score.choice claims end + # -- Unequal probability assignments -- + def set_alternative_probabilities(alternative_probabilities) + # create @use_probabilities as a function to go from [0,1] to outcome + cumulative_probability = 0.0 + new_probabilities = alternative_probabilities.map {|am| [am, (cumulative_probability += am.probability)/100.0]} + @use_probabilities = new_probabilities + end + + # -- Experiment rebalancing -- + + # Experiment rebalancing allows the app to automatically adjust the probabilities for each alternative; when one is performing better, it will increase its probability + # according to Bayesian one-armed bandit theory, in order to (eventually) maximize your overall conversions. + + # Sets or returns how often (as a function of number of people assigned) to rebalance. For example: + # ab_test "Simple" do + # rebalance_frequency 100 + # end + # + # puts "The experiment will automatically rebalance after every " + experiment(:simple).description + " users are assigned." + def rebalance_frequency(rf = nil) + if rf + @assignments_since_rebalancing = 0 + @rebalance_frequency = rf + rebalance! + end + @rebalance_frequency + end + + # Force experiment to rebalance. + def rebalance! + return unless @playground.collecting? + score_results = bayes_bandit_score + if score_results.method == :bayes_bandit_score + set_alternative_probabilities score_results.alts + end + end + # -- Completion -- # Defines how the experiment can choose the optimal outcome on completion. # # By default, Vanity will take the best alternative (highest conversion @@ -402,23 +434,26 @@ return unless @playground.collecting? outcome = connection.ab_get_outcome(@id) outcome && _alternatives[outcome] end - def complete! + def complete!(outcome = nil) return unless @playground.collecting? && active? super - if @outcome_is - begin - result = @outcome_is.call - outcome = result.id if Alternative === result && result.experiment == self - rescue - warn "Error in AbTest#complete!: #{$!}" + + unless outcome + if @outcome_is + begin + result = @outcome_is.call + outcome = result.id if Alternative === result && result.experiment == self + rescue + warn "Error in AbTest#complete!: #{$!}" + end + else + best = score.best + outcome = best.id if best end - else - best = score.best - outcome = best.id if best end # TODO: logging connection.ab_set_outcome @id, outcome || 0 end @@ -482,10 +517,18 @@ # Chooses an alternative for the identity and returns its index. This # method always returns the same alternative for a given experiment and # identity, and randomly distributed alternatives for each identity (in the # same experiment). def alternative_for(identity) - Digest::MD5.hexdigest("#{name}/#{identity}").to_i(17) % @alternatives.size + if @use_probabilities + existing_assignment = connection.ab_assigned @id, identity + return existing_assignment if existing_assignment + random_outcome = rand() + @use_probabilities.each do |alternative, max_prob| + return alternative.id if random_outcome < max_prob + end + end + return Digest::MD5.hexdigest("#{name}/#{identity}").to_i(17) % @alternatives.size end begin a = 50.0 # Returns array of [z-score, percentage]