require "test/test_helper" class AbTestController < ActionController::Base use_vanity :current_user attr_accessor :current_user def test_render render text: ab_test(:simple) end def test_view render inline: "<%= ab_test(:simple) %>" end def test_capture render inline: "<% ab_test :simple do |value| %><%= value %><% end %>" end def track track! :simple render text: "" end end class AbTestTest < ActionController::TestCase tests AbTestController # -- Experiment definition -- def test_requires_at_least_two_alternatives_per_experiment assert_raises RuntimeError do ab_test :none do alternatives [] end end assert_raises RuntimeError do ab_test :one do alternatives "foo" end end ab_test :two do alternatives "foo", "bar" end end def test_returning_alternative_by_value ab_test :abcd do alternatives :a, :b, :c, :d end assert_equal experiment(:abcd).alternatives[1], experiment(:abcd).alternative(:b) assert_equal experiment(:abcd).alternatives[3], experiment(:abcd).alternative(:d) end def test_alternative_name ab_test :abcd do alternatives :a, :b end assert_equal "option A", experiment(:abcd).alternative(:a).name assert_equal "option B", experiment(:abcd).alternative(:b).name end # -- Running experiment -- def test_returns_the_same_alternative_consistently ab_test :foobar do alternatives "foo", "bar" identify { "6e98ec" } end assert value = experiment(:foobar).choose assert_match /foo|bar/, value 1000.times do assert_equal value, experiment(:foobar).choose end end def test_returns_different_alternatives_for_each_participant ab_test :foobar do alternatives "foo", "bar" identify { rand } end alts = Array.new(1000) { experiment(:foobar).choose } assert_equal %w{bar foo}, alts.uniq.sort assert_in_delta alts.select { |a| a == "foo" }.count, 500, 100 # this may fail, such is propability end def test_records_all_participants_in_each_alternative ids = (Array.new(200) { |i| i } * 5).shuffle ab_test :foobar do alternatives "foo", "bar" identify { ids.pop } end 1000.times { experiment(:foobar).choose } alts = experiment(:foobar).alternatives assert_equal 200, alts.map(&:participants).sum assert_in_delta alts.first.participants, 100, 20 end def test_records_each_converted_participant_only_once ids = ((1..100).map { |i| [i,i] } * 5).shuffle.flatten # 3,3,1,1,7,7 etc ab_test :foobar do alternatives "foo", "bar" identify { ids.pop } end 500.times do experiment(:foobar).choose experiment(:foobar).track! end alts = experiment(:foobar).alternatives assert_equal 100, alts.map(&:converted).sum end def test_records_conversion_only_for_participants ids = ((1..100).map { |i| [-i,i,i] } * 5).shuffle.flatten # -3,3,3,-1,1,1,-7,7,7 etc ab_test :foobar do alternatives "foo", "bar" identify { ids.pop } end 500.times do experiment(:foobar).choose experiment(:foobar).track! experiment(:foobar).track! end alts = experiment(:foobar).alternatives assert_equal 100, alts.map(&:converted).sum end def test_destroy_experiment ab_test :simple do identify { "me" } complete_if { alternatives.map(&:converted).sum >= 1 } outcome_is { alternative(true) } end experiment(:simple).choose experiment(:simple).track! refute experiment(:simple).active? assert_equal true, experiment(:simple).outcome.value experiment(:simple).destroy assert experiment(:simple).active? assert_nil experiment(:simple).outcome assert_nil experiment(:simple).completed_at assert_equal 0, experiment(:simple).alternatives.map(&:participants).sum assert_equal 0, experiment(:simple).alternatives.map(&:conversions).sum assert_equal 0, experiment(:simple).alternatives.map(&:converted).sum end # -- A/B helper methods -- def test_fail_if_no_experiment assert_raise LoadError do get :test_render end end def test_ab_test_chooses_in_render ab_test(:simple) { } responses = Array.new(100) do @controller = nil ; setup_controller_request_and_response get :test_render @response.body end assert_equal %w{false true}, responses.uniq.sort end def test_ab_test_chooses_view_helper ab_test(:simple) { } responses = Array.new(100) do @controller = nil ; setup_controller_request_and_response get :test_view @response.body end assert_equal %w{false true}, responses.uniq.sort end def test_ab_test_with_capture ab_test(:simple) { } responses = Array.new(100) do @controller = nil ; setup_controller_request_and_response get :test_capture @response.body end assert_equal %w{false true}, responses.map(&:strip).uniq.sort end def test_ab_test_track ab_test(:simple) { } responses = Array.new(100) do @controller.send(:cookies).clear get :track @response.body end end # -- Testing with tests -- def test_with_given_choice ab_test(:simple) { alternatives :a, :b, :c } 100.times do |i| @controller = nil ; setup_controller_request_and_response experiment(:simple).chooses(:b) get :test_render assert "b", @response.body end end def test_which_chooses_non_existent_alternative ab_test(:simple) { } assert_raises ArgumentError do experiment(:simple).chooses(404) end end def test_chooses_cleared_with_nil ab_test :simple do identify { rand } alternatives :a, :b, :c end responses = Array.new(100) { |i| @controller = nil ; setup_controller_request_and_response experiment(:simple).chooses(:b) experiment(:simple).chooses(nil) get :test_render @response.body } assert responses.uniq.size == 3 end # -- Scoring -- def test_scoring ab_test(:abcd) { alternatives :a, :b, :c, :d } # participating, conversions, rate, z-score # Control: 182 35 19.23% N/A 182.times { |i| experiment(:abcd).send(:count_participant, i, :a) } 35.times { |i| experiment(:abcd).send(:count_conversion, i, :a) } # Treatment A: 180 45 25.00% 1.33 180.times { |i| experiment(:abcd).send(:count_participant, i, :b) } 45.times { |i| experiment(:abcd).send(:count_conversion, i, :b) } # treatment B: 189 28 14.81% -1.13 189.times { |i| experiment(:abcd).send(:count_participant, i, :c) } 28.times { |i| experiment(:abcd).send(:count_conversion, i, :c) } # treatment C: 188 61 32.45% 2.94 188.times { |i| experiment(:abcd).send(:count_participant, i, :d) } 61.times { |i| experiment(:abcd).send(:count_conversion, i, :d) } z_scores = experiment(:abcd).score.alts.map { |alt| "%.2f" % alt.z_score } assert_equal %w{-1.33 0.00 -2.47 1.58}, z_scores probabilities = experiment(:abcd).score.alts.map(&:probability) assert_equal [90, 0, 99, 90], probabilities diff = experiment(:abcd).score.alts.map { |alt| alt.difference && alt.difference.round } assert_equal [30, 69, nil, 119], diff assert_equal 3, experiment(:abcd).score.best.id assert_equal 3, experiment(:abcd).score.choice.id assert_equal 1, experiment(:abcd).score.base.id assert_equal 2, experiment(:abcd).score.least.id end def test_scoring_with_no_performers ab_test(:abcd) { alternatives :a, :b, :c, :d } assert experiment(:abcd).score.alts.all? { |alt| alt.z_score.nan? } assert experiment(:abcd).score.alts.all? { |alt| alt.probability == 0 } assert experiment(:abcd).score.alts.all? { |alt| alt.difference.nil? } assert_nil experiment(:abcd).score.best assert_nil experiment(:abcd).score.choice assert_nil experiment(:abcd).score.least end def test_scoring_with_one_performer ab_test(:abcd) { alternatives :a, :b, :c, :d } 10.times { |i| experiment(:abcd).send(:count_participant, i, :b) } 8.times { |i| experiment(:abcd).send(:count_conversion, i, :b) } assert experiment(:abcd).score.alts.all? { |alt| alt.z_score.nan? } assert experiment(:abcd).score.alts.all? { |alt| alt.probability == 0 } assert experiment(:abcd).score.alts.all? { |alt| alt.difference.nil? } assert 1, experiment(:abcd).score.best.id assert_nil experiment(:abcd).score.choice assert 1, experiment(:abcd).score.base.id assert 1, experiment(:abcd).score.least.id end def test_scoring_with_some_performers ab_test(:abcd) { alternatives :a, :b, :c, :d } 10.times { |i| experiment(:abcd).send(:count_participant, i, :b) } 8.times { |i| experiment(:abcd).send(:count_conversion, i, :b) } 12.times { |i| experiment(:abcd).send(:count_participant, i, :d) } 5.times { |i| experiment(:abcd).send(:count_conversion, i, :d) } z_scores = experiment(:abcd).score.alts.map { |alt| "%.2f" % alt.z_score } assert_equal %w{NaN 2.01 NaN 0.00}, z_scores probabilities = experiment(:abcd).score.alts.map(&:probability) assert_equal [0, 95, 0, 0], probabilities diff = experiment(:abcd).score.alts.map { |alt| alt.difference && alt.difference.round } assert_equal [nil, 92, nil, nil], diff assert_equal 1, experiment(:abcd).score.best.id assert_equal 1, experiment(:abcd).score.choice.id assert_equal 3, experiment(:abcd).score.base.id assert_equal 3, experiment(:abcd).score.least.id end def test_scoring_with_different_probability ab_test(:abcd) { alternatives :a, :b, :c, :d } 10.times { |i| experiment(:abcd).send(:count_participant, i, :b) } 8.times { |i| experiment(:abcd).send(:count_conversion, i, :b) } 12.times { |i| experiment(:abcd).send(:count_participant, i, :d) } 5.times { |i| experiment(:abcd).send(:count_conversion, i, :d) } assert_equal 1, experiment(:abcd).score(90).choice.id assert_equal 1, experiment(:abcd).score(95).choice.id assert_nil experiment(:abcd).score(99).choice end # -- Conclusion -- def test_conclusion ab_test(:abcd) { alternatives :a, :b, :c, :d } # participating, conversions, rate, z-score # Control: 182 35 19.23% N/A 182.times { |i| experiment(:abcd).send(:count_participant, i, :a) } 35.times { |i| experiment(:abcd).send(:count_conversion, i, :a) } # Treatment A: 180 45 25.00% 1.33 180.times { |i| experiment(:abcd).send(:count_participant, i, :b) } 45.times { |i| experiment(:abcd).send(:count_conversion, i, :b) } # treatment B: 189 28 14.81% -1.13 189.times { |i| experiment(:abcd).send(:count_participant, i, :c) } 28.times { |i| experiment(:abcd).send(:count_conversion, i, :c) } # treatment C: 188 61 32.45% 2.94 188.times { |i| experiment(:abcd).send(:count_participant, i, :d) } 61.times { |i| experiment(:abcd).send(:count_conversion, i, :d) } assert_equal <<-TEXT, experiment(:abcd).conclusion.join("\n") << "\n" There are 739 participants in this experiment. The best choice is option D: it converted at 32.4% (30% better than option B). With 90% probability this result is statistically significant. Option B converted at 25.0%. Option A converted at 19.2%. Option C converted at 14.8%. Option D selected as the best alternative. TEXT end def test_conclusion_with_some_performers ab_test(:abcd) { alternatives :a, :b, :c, :d } # Treatment A: 180 45 25.00% 1.33 180.times { |i| experiment(:abcd).send(:count_participant, i, :b) } 45.times { |i| experiment(:abcd).send(:count_conversion, i, :b) } # treatment C: 188 61 32.45% 2.94 188.times { |i| experiment(:abcd).send(:count_participant, i, :d) } 61.times { |i| experiment(:abcd).send(:count_conversion, i, :d) } assert_equal <<-TEXT, experiment(:abcd).conclusion.join("\n") << "\n" There are 368 participants in this experiment. The best choice is option D: it converted at 32.4% (30% better than option B). With 90% probability this result is statistically significant. Option B converted at 25.0%. Option A did not convert. Option C did not convert. Option D selected as the best alternative. TEXT end def test_conclusion_without_clear_winner ab_test(:abcd) { alternatives :a, :b, :c, :d } # Treatment A: 180 45 25.00% 1.33 180.times { |i| experiment(:abcd).send(:count_participant, i, :b) } 58.times { |i| experiment(:abcd).send(:count_conversion, i, :b) } # treatment C: 188 61 32.45% 2.94 188.times { |i| experiment(:abcd).send(:count_participant, i, :d) } 61.times { |i| experiment(:abcd).send(:count_conversion, i, :d) } assert_equal <<-TEXT, experiment(:abcd).conclusion.join("\n") << "\n" There are 368 participants in this experiment. The best choice is option D: it converted at 32.4% (1% better than option B). This result is not statistically significant, suggest you continue this experiment. Option B converted at 32.2%. Option A did not convert. Option C did not convert. TEXT end def test_conclusion_without_close_performers ab_test(:abcd) { alternatives :a, :b, :c, :d } # Treatment A: 180 45 25.00% 1.33 186.times { |i| experiment(:abcd).send(:count_participant, i, :b) } 60.times { |i| experiment(:abcd).send(:count_conversion, i, :b) } # treatment C: 188 61 32.45% 2.94 188.times { |i| experiment(:abcd).send(:count_participant, i, :d) } 61.times { |i| experiment(:abcd).send(:count_conversion, i, :d) } assert_equal <<-TEXT, experiment(:abcd).conclusion.join("\n") << "\n" There are 374 participants in this experiment. The best choice is option D: it converted at 32.4%. This result is not statistically significant, suggest you continue this experiment. Option B converted at 32.3%. Option A did not convert. Option C did not convert. TEXT end def test_conclusion_without_equal_performers ab_test(:abcd) { alternatives :a, :b, :c, :d } # Treatment A: 180 45 25.00% 1.33 188.times { |i| experiment(:abcd).send(:count_participant, i, :b) } 61.times { |i| experiment(:abcd).send(:count_conversion, i, :b) } # treatment C: 188 61 32.45% 2.94 188.times { |i| experiment(:abcd).send(:count_participant, i, :d) } 61.times { |i| experiment(:abcd).send(:count_conversion, i, :d) } assert_equal <<-TEXT, experiment(:abcd).conclusion.join("\n") << "\n" There are 376 participants in this experiment. Option D converted at 32.4%. Option B converted at 32.4%. Option A did not convert. Option C did not convert. TEXT end def test_conclusion_with_one_performers ab_test(:abcd) { alternatives :a, :b, :c, :d } # Treatment A: 180 45 25.00% 1.33 180.times { |i| experiment(:abcd).send(:count_participant, i, :b) } 45.times { |i| experiment(:abcd).send(:count_conversion, i, :b) } assert_equal <<-TEXT, experiment(:abcd).conclusion.join("\n") << "\n" There are 180 participants in this experiment. This experiment did not run long enough to find a clear winner. TEXT end def test_conclusion_with_no_performers ab_test(:abcd) { alternatives :a, :b, :c, :d } assert_equal <<-TEXT, experiment(:abcd).conclusion.join("\n") << "\n" There are no participants in this experiment yet. This experiment did not run long enough to find a clear winner. TEXT end # -- Completion -- def test_completion_if ab_test :simple do identify { rand } complete_if { true } end experiment(:simple).choose refute experiment(:simple).active? end def test_completion_if_fails ab_test :simple do identify { rand } complete_if { fail } end experiment(:simple).choose assert experiment(:simple).active? end def test_completion ids = Array.new(100) { |i| i.to_s }.shuffle ab_test :simple do identify { ids.pop } complete_if { alternatives.map(&:participants).sum >= 100 } end 99.times do |i| experiment(:simple).choose assert experiment(:simple).active? end experiment(:simple).choose refute experiment(:simple).active? end def test_ab_methods_after_completion ids = Array.new(200) { |i| [i, i] }.shuffle.flatten ab_test :simple do identify { ids.pop } complete_if { alternatives.map(&:participants).sum >= 100 } outcome_is { alternatives[1] } end # Run experiment to completion (100 participants) results = Set.new 100.times do results << experiment(:simple).choose experiment(:simple).track! end assert results.include?(true) && results.include?(false) refute experiment(:simple).active? # Test that we always get the same choice (true) 100.times do assert_equal true, experiment(:simple).choose experiment(:simple).track! end # We don't get to count the 100 participant's conversion, but that's ok. assert_equal 99, experiment(:simple).alternatives.map(&:converted).sum assert_equal 99, experiment(:simple).alternatives.map(&:conversions).sum end # -- Outcome -- def test_completion_outcome ab_test :quick do outcome_is { alternatives[1] } end experiment(:quick).complete! assert_equal experiment(:quick).alternatives[1], experiment(:quick).outcome end def test_outcome_is_returns_nil ab_test :quick do outcome_is { nil } end experiment(:quick).complete! assert_equal experiment(:quick).alternatives.first, experiment(:quick).outcome end def test_outcome_is_returns_something_else ab_test :quick do outcome_is { "error" } end experiment(:quick).complete! assert_equal experiment(:quick).alternatives.first, experiment(:quick).outcome end def test_outcome_is_fails ab_test :quick do outcome_is { fail } end experiment(:quick).complete! assert_equal experiment(:quick).alternatives.first, experiment(:quick).outcome end def test_outcome_choosing_best_alternative ab_test :quick do end 2.times { |i| experiment(:quick).send(:count_participant, i, false) } 10.times { |i| experiment(:quick).send(:count_participant, i, true).send(:count_conversion, i, true) } experiment(:quick).complete! assert_equal experiment(:quick).alternative(true), experiment(:quick).outcome end def test_outcome_only_performing_alternative ab_test :quick do end 2.times { |i| experiment(:quick).send(:count_participant, i, true).send(:count_conversion, i, true) } experiment(:quick).complete! assert_equal experiment(:quick).alternative(true), experiment(:quick).outcome end def test_outcome_choosing_equal_alternatives ab_test :quick do end 8.times { |i| experiment(:quick).send(:count_participant, i, false).send(:count_conversion, i, false) } 8.times { |i| experiment(:quick).send(:count_participant, i, true). send(:count_conversion, i, true) } experiment(:quick).complete! assert_equal experiment(:quick).alternative(true), experiment(:quick).outcome end def ab_test(name, &block) Vanity.playground.define name, :ab_test, &block end end