lib/vanity/experiment/ab_test.rb in vanity-1.8.2 vs lib/vanity/experiment/ab_test.rb in vanity-1.8.3.beta
- old
+ new
@@ -1,118 +1,34 @@
require "digest/md5"
+require "vanity/experiment/alternative"
+require "vanity/experiment/bayesian_bandit_score"
module Vanity
module Experiment
+ # The meat.
+ class AbTest < Base
+ class << self
+ # Convert z-score to probability.
+ def probability(score)
+ score = score.abs
+ probability = AbTest::Z_TO_PROBABILITY.find { |z,p| score >= z }
+ probability ? probability.last : 0
+ end
- # One of several alternatives in an A/B test (see AbTest#alternatives).
- class Alternative
-
- def initialize(experiment, id, value) #, participants, converted, conversions)
- @experiment = experiment
- @id = id
- @name = "option #{(@id + 65).chr}"
- @value = value
+ def friendly_name
+ "A/B Test"
+ end
end
- # Alternative id, only unique for this experiment.
- attr_reader :id
+ DEFAULT_SCORE_METHOD = :z_score
- # Alternative name (option A, option B, etc).
- attr_reader :name
-
- # Alternative value.
- attr_reader :value
-
- # Experiment this alternative belongs to.
- attr_reader :experiment
-
- # Number of participants who viewed this alternative.
- def participants
- load_counts unless @participants
- @participants
- end
-
- # Number of participants who converted on this alternative (a participant is counted only once).
- def converted
- load_counts unless @converted
- @converted
- end
-
- # Number of conversions for this alternative (same participant may be counted more than once).
- def conversions
- load_counts unless @conversions
- @conversions
- end
-
- # Z-score for this alternative, related to 2nd-best performing alternative. Populated by AbTest#score.
- attr_accessor :z_score
-
- # Probability derived from z-score. Populated by AbTest#score.
- attr_accessor :probability
-
- # Difference from least performing alternative. Populated by AbTest#score.
- attr_accessor :difference
-
- # Conversion rate calculated as converted/participants
- def conversion_rate
- @conversion_rate ||= (participants > 0 ? converted.to_f/participants.to_f : 0.0)
- end
-
- # The measure we use to order (sort) alternatives and decide which one is better (by calculating z-score).
- # Defaults to conversion rate.
- def measure
- conversion_rate
- end
-
- def <=>(other)
- measure <=> other.measure
- end
-
- def ==(other)
- other && id == other.id && experiment == other.experiment
- end
-
- def to_s
- name
- end
-
- def inspect
- "#{name}: #{value} #{converted}/#{participants}"
- end
-
- def load_counts
- if @experiment.playground.collecting?
- @participants, @converted, @conversions = @experiment.playground.connection.ab_counts(@experiment.id, id).values_at(:participants, :converted, :conversions)
- else
- @participants = @converted = @conversions = 0
- end
- end
- end
-
-
- # The meat.
- class AbTest < Base
- class << self
-
- # Convert z-score to probability.
- def probability(score)
- score = score.abs
- probability = AbTest::Z_TO_PROBABILITY.find { |z,p| score >= z }
- probability ? probability.last : 0
- end
-
- def friendly_name
- "A/B Test"
- end
-
- end
-
def initialize(*args)
super
+ @score_method = DEFAULT_SCORE_METHOD
+ @use_probabilities = nil
end
-
# -- Metric --
# Tells A/B test which metric we're measuring, or returns metric in use.
#
# @example Define A/B test against coolness metric
@@ -166,10 +82,27 @@
# alternative(:blue) == alternatives[2]
def alternative(value)
alternatives.find { |alt| alt.value == value }
end
+ # What method to use for calculating score. Default is :ab_test, but can
+ # also be set to :bandit_score to calculate probability of each
+ # alternative being the best.
+ #
+ # @example Define A/B test which uses bayes_bandit_score in reporting
+ # ab_test "noodle_test" do
+ # alternatives "spaghetti", "linguine"
+ # metrics :signup
+ # score_method :bayes_bandit_score
+ # end
+ def score_method(method=nil)
+ if method
+ @score_method = method
+ end
+ @score_method
+ end
+
# Defines an A/B test with two alternatives: false and true. This is the
# default pair of alternatives, so just syntactic sugar for those who love
# being explicit.
#
# @example
@@ -195,24 +128,32 @@
# color = experiment(:which_blue).choose
def choose
if @playground.collecting?
if active?
identity = identity()
- index = connection.ab_showing(@id, identity)
- unless index
- index = alternative_for(identity)
- if !@playground.using_js?
- # if we have an on_assignment block, call it on new assignments
- if @on_assignment_block
- assignment = alternatives[index.to_i]
- if !connection.ab_seen @id, identity, assignment
- @on_assignment_block.call(Vanity.context, identity, assignment, self)
- end
- end
- connection.ab_add_participant @id, index, identity
- check_completion!
- end
+ index = connection.ab_showing(@id, identity)
+ unless index
+ index = alternative_for(identity)
+ if !@playground.using_js?
+ # if we have an on_assignment block, call it on new assignments
+ if @on_assignment_block
+ assignment = alternatives[index.to_i]
+ if !connection.ab_seen @id, identity, assignment
+ @on_assignment_block.call(Vanity.context, identity, assignment, self)
+ end
+ end
+ # if we are rebalancing probabilities, keep track of how long it has been since we last rebalanced
+ if @rebalance_frequency
+ @assignments_since_rebalancing += 1
+ if @assignments_since_rebalancing >= @rebalance_frequency
+ @assignments_since_rebalancing = 0
+ rebalance!
+ end
+ end
+ connection.ab_add_participant @id, index, identity
+ check_completion!
+ end
end
else
index = connection.ab_get_outcome(@id) || alternative_for(identity)
end
else
@@ -286,16 +227,24 @@
end
# -- Reporting --
+ def calculate_score
+ if respond_to?(score_method)
+ self.send(score_method)
+ else
+ score
+ end
+ end
+
# Scores alternatives based on the current tracking data. This method
# returns a structure with the following attributes:
# [:alts] Ordered list of alternatives, populated with scoring info.
# [:base] Second best performing alternative.
# [:least] Least performing alternative (but more than zero conversion).
- # [:choice] Choice alterntive, either the outcome or best alternative.
+ # [:choice] Choice alternative, either the outcome or best alternative.
#
# Alternatives returned by this method are populated with the following
# attributes:
# [:z_score] Z-score (relative to the base alternative).
# [:probability] Probability (z-score mapped to 0, 90, 95, 99 or 99.9%).
@@ -327,14 +276,52 @@
end
# best alternative is one with highest conversion rate (best shot).
# choice alternative can only pick best if we have high probability (>90%).
best = sorted.last if sorted.last.measure > 0.0
choice = outcome ? alts[outcome.id] : (best && best.probability >= probability ? best : nil)
- Struct.new(:alts, :best, :base, :least, :choice).new(alts, best, base, least, choice)
+ Struct.new(:alts, :best, :base, :least, :choice, :method).new(alts, best, base, least, choice, :score)
end
- # Use the result of #score to derive a conclusion. Returns an
+ # Scores alternatives based on the current tracking data, using Bayesian
+ # estimates of the best binomial bandit. Based on the R bandit package,
+ # http://cran.r-project.org/web/packages/bandit, which is based on
+ # Steven L. Scott, A modern Bayesian look at the multi-armed bandit,
+ # Appl. Stochastic Models Bus. Ind. 2010; 26:639-658.
+ # (http://www.economics.uci.edu/~ivan/asmb.874.pdf)
+ #
+ # This method returns a structure with the following attributes:
+ # [:alts] Ordered list of alternatives, populated with scoring info.
+ # [:base] Second best performing alternative.
+ # [:least] Least performing alternative (but more than zero conversion).
+ # [:choice] Choice alternative, either the outcome or best alternative.
+ #
+ # Alternatives returned by this method are populated with the following
+ # attributes:
+ # [:probability] Probability (probability this is the best alternative).
+ # [:difference] Difference from the least performant altenative.
+ #
+ # The choice alternative is set only if its probability is higher or
+ # equal to the specified probability (default is 90%).
+ def bayes_bandit_score(probability = 90)
+ begin
+ require "backports/1.9.1/kernel/define_singleton_method" if RUBY_VERSION < "1.9"
+ require "integration"
+ require "rubystats"
+ rescue LoadError
+ fail "to use bayes_bandit_score, install integration and rubystats gems"
+ end
+
+ begin
+ require "gsl"
+ rescue LoadError
+ warn "for better integration performance, install gsl gem"
+ end
+
+ BayesianBanditScore.new(alternatives, outcome).calculate!
+ end
+
+ # Use the result of #score or #bayes_bandit_score to derive a conclusion. Returns an
# array of claims.
def conclusion(score = score)
claims = []
participants = score.alts.inject(0) { |t,alt| t + alt.participants }
claims << case participants
@@ -349,20 +336,28 @@
# then alternatives with no conversion.
sorted |= score.alts
# we want a result that's clearly better than 2nd best.
best, second = sorted[0], sorted[1]
if best.measure > second.measure
- diff = ((best.measure - second.measure) / second.measure * 100).round
- better = " (%d%% better than %s)" % [diff, second.name] if diff > 0
- claims << "The best choice is %s: it converted at %.1f%%%s." % [best.name, best.measure * 100, better]
- if best.probability >= 90
- claims << "With %d%% probability this result is statistically significant." % score.best.probability
- else
- claims << "This result is not statistically significant, suggest you continue this experiment."
- end
- sorted.delete best
- end
+ diff = ((best.measure - second.measure) / second.measure * 100).round
+ better = " (%d%% better than %s)" % [diff, second.name] if diff > 0
+ claims << "The best choice is %s: it converted at %.1f%%%s." % [best.name, best.measure * 100, better]
+ if score.method == :bayes_bandit_score
+ if best.probability >= 90
+ claims << "With %d%% probability this result is the best." % score.best.probability
+ else
+ claims << "This result does not have strong confidence behind it, suggest you continue this experiment."
+ end
+ else
+ if best.probability >= 90
+ claims << "With %d%% probability this result is statistically significant." % score.best.probability
+ else
+ claims << "This result is not statistically significant, suggest you continue this experiment."
+ end
+ end
+ sorted.delete best
+ end
sorted.each do |alt|
if alt.measure > 0.0
claims << "%s converted at %.1f%%." % [alt.name.gsub(/^o/, "O"), alt.measure * 100]
else
claims << "%s did not convert." % alt.name.gsub(/^o/, "O")
@@ -373,11 +368,48 @@
end
claims << "#{score.choice.name.gsub(/^o/, "O")} selected as the best alternative." if score.choice
claims
end
+ # -- Unequal probability assignments --
+ def set_alternative_probabilities(alternative_probabilities)
+ # create @use_probabilities as a function to go from [0,1] to outcome
+ cumulative_probability = 0.0
+ new_probabilities = alternative_probabilities.map {|am| [am, (cumulative_probability += am.probability)/100.0]}
+ @use_probabilities = new_probabilities
+ end
+
+ # -- Experiment rebalancing --
+
+ # Experiment rebalancing allows the app to automatically adjust the probabilities for each alternative; when one is performing better, it will increase its probability
+ # according to Bayesian one-armed bandit theory, in order to (eventually) maximize your overall conversions.
+
+ # Sets or returns how often (as a function of number of people assigned) to rebalance. For example:
+ # ab_test "Simple" do
+ # rebalance_frequency 100
+ # end
+ #
+ # puts "The experiment will automatically rebalance after every " + experiment(:simple).description + " users are assigned."
+ def rebalance_frequency(rf = nil)
+ if rf
+ @assignments_since_rebalancing = 0
+ @rebalance_frequency = rf
+ rebalance!
+ end
+ @rebalance_frequency
+ end
+
+ # Force experiment to rebalance.
+ def rebalance!
+ return unless @playground.collecting?
+ score_results = bayes_bandit_score
+ if score_results.method == :bayes_bandit_score
+ set_alternative_probabilities score_results.alts
+ end
+ end
+
# -- Completion --
# Defines how the experiment can choose the optimal outcome on completion.
#
# By default, Vanity will take the best alternative (highest conversion
@@ -402,23 +434,26 @@
return unless @playground.collecting?
outcome = connection.ab_get_outcome(@id)
outcome && _alternatives[outcome]
end
- def complete!
+ def complete!(outcome = nil)
return unless @playground.collecting? && active?
super
- if @outcome_is
- begin
- result = @outcome_is.call
- outcome = result.id if Alternative === result && result.experiment == self
- rescue
- warn "Error in AbTest#complete!: #{$!}"
+
+ unless outcome
+ if @outcome_is
+ begin
+ result = @outcome_is.call
+ outcome = result.id if Alternative === result && result.experiment == self
+ rescue
+ warn "Error in AbTest#complete!: #{$!}"
+ end
+ else
+ best = score.best
+ outcome = best.id if best
end
- else
- best = score.best
- outcome = best.id if best
end
# TODO: logging
connection.ab_set_outcome @id, outcome || 0
end
@@ -482,10 +517,18 @@
# Chooses an alternative for the identity and returns its index. This
# method always returns the same alternative for a given experiment and
# identity, and randomly distributed alternatives for each identity (in the
# same experiment).
def alternative_for(identity)
- Digest::MD5.hexdigest("#{name}/#{identity}").to_i(17) % @alternatives.size
+ if @use_probabilities
+ existing_assignment = connection.ab_assigned @id, identity
+ return existing_assignment if existing_assignment
+ random_outcome = rand()
+ @use_probabilities.each do |alternative, max_prob|
+ return alternative.id if random_outcome < max_prob
+ end
+ end
+ return Digest::MD5.hexdigest("#{name}/#{identity}").to_i(17) % @alternatives.size
end
begin
a = 50.0
# Returns array of [z-score, percentage]