forked from WinVector/WinVector.github.io
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrateTestExample.html
More file actions
83 lines (76 loc) · 7.75 KB
/
rateTestExample.html
File metadata and controls
83 lines (76 loc) · 7.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># R example working a rate significance test related to the problem stated in http://blog.sumall.com/journal/optimizely-got-me-fired.html</span>
<span class="co"># See: http://www.win-vector.com/blog/2014/05/a-clear-picture-of-power-and-significance-in-ab-tests/ for more writing on this</span>
<span class="co"># Note: we are using exact tail-probability from the binomial distribution exactly. This is easy</span>
<span class="co"># to do (as it is a built in function in many languages) and more faithful to the underlying</span>
<span class="co"># assumed model than using a normal approximation (though a normal approximation is certainly good enough).</span>
<span class="co"># to rebuild</span>
<span class="co"># echo "library(knitr); knit('rateTestExample.Rmd')" | R --vanilla ; pandoc rateTestExample.md -o rateTestExample.html</span>
<span class="co"># type in data</span>
d <-<span class="st"> </span><span class="kw">data.frame</span>(<span class="dt">name=</span><span class="kw">c</span>(<span class="st">'variation#1'</span>,<span class="st">'variation#2'</span>),<span class="dt">visitors=</span><span class="kw">c</span>(<span class="dv">3920</span>,<span class="dv">3999</span>),<span class="dt">conversions=</span><span class="kw">c</span>(<span class="dv">721</span>,<span class="dv">623</span>))
d$rates <-<span class="st"> </span>d$conversions/d$visitors
<span class="kw">print</span>(d)</code></pre>
<pre><code>## name visitors conversions rates
## 1 variation#1 3920 721 0.1839
## 2 variation#2 3999 623 0.1558</code></pre>
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># For our null-hypothesis assume the two rates are identical.</span>
<span class="co"># under this hypothesis we can pool the data to get an estimate of what</span>
<span class="co"># common rate we are looking at. Not we don't actually know the common</span>
<span class="co"># rate, so using a single number from the data is a bit of a short-cut.</span>
baseRate <-<span class="st"> </span><span class="kw">sum</span>(d$conversions)/<span class="kw">sum</span>(d$visitors)
<span class="kw">print</span>(baseRate)</code></pre>
<pre><code>## [1] 0.1697</code></pre>
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Write down how far the observed counts are from the expected values.</span>
d$expectation <-<span class="st"> </span>d$visitors*baseRate
d$difference <-<span class="st"> </span>d$conversions-d$expectation
<span class="co"># Compute the one and two-sided significances of this from a Binomial model.</span>
<span class="co"># return p( lowConversions <= conversions <= highConversions | visitors,rate)</span>
pInterval <-<span class="st"> </span>function(visitors,rate,lowConversions,highConversions) {
<span class="co"># pbinom(obs,total,rate) = P[obs <= total | rate]</span>
<span class="kw">pbinom</span>(highConversions,visitors,rate) -<span class="st"> </span>
<span class="st"> </span><span class="kw">pbinom</span>(lowConversions<span class="dv">-1</span>,visitors,rate)
}
d$pAtLeastAbsDiff <-<span class="st"> </span><span class="dv">1</span> -<span class="st"> </span><span class="kw">pInterval</span>(d$visitors,baseRate,
d$expectation-(<span class="kw">abs</span>(d$difference)-<span class="dv">1</span>),
d$expectation+(<span class="kw">abs</span>(d$difference)-<span class="dv">1</span>))
<span class="co"># Also show estimate of typical deviation and z-like score.</span>
d$expectedDeviation <-<span class="st"> </span><span class="kw">sqrt</span>(baseRate*(<span class="dv">1</span>-baseRate)*d$visitors)
d$Z <-<span class="st"> </span><span class="kw">abs</span>(d$difference)/d$expectedDeviation
<span class="kw">print</span>(d)</code></pre>
<pre><code>## name visitors conversions rates expectation difference
## 1 variation#1 3920 721 0.1839 665.3 55.7
## 2 variation#2 3999 623 0.1558 678.7 -55.7
## pAtLeastAbsDiff expectedDeviation Z
## 1 0.01821 23.50 2.370
## 2 0.02051 23.74 2.347</code></pre>
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Plot pooled rate null-hypothesis</span>
<span class="kw">library</span>(ggplot2)
<span class="kw">library</span>(reshape2)
plotD <-<span class="st"> </span><span class="kw">data.frame</span>(<span class="dt">conversions=</span>
(<span class="kw">floor</span>(<span class="kw">min</span>(d$expectation) -<span class="st"> </span><span class="dv">3</span>*<span class="kw">max</span>(d$expectedDeviation))):
<span class="st"> </span>(<span class="kw">ceiling</span>(<span class="kw">max</span>(d$expectation) +<span class="st"> </span><span class="dv">3</span>*<span class="kw">max</span>(d$expectedDeviation))))
plotD[,<span class="kw">as.character</span>(d$name[<span class="dv">1</span>])] <-<span class="st"> </span><span class="kw">dbinom</span>(plotD$conversions,d$visitors[<span class="dv">1</span>],baseRate)
plotD[,<span class="kw">as.character</span>(d$name[<span class="dv">2</span>])] <-<span class="st"> </span><span class="kw">dbinom</span>(plotD$conversions,d$visitors[<span class="dv">2</span>],baseRate)
thinD <-<span class="st"> </span><span class="kw">melt</span>(plotD,<span class="dt">id.vars=</span><span class="kw">c</span>(<span class="st">'conversions'</span>),
<span class="dt">variable.name=</span><span class="st">'assumedNullDistribution'</span>,<span class="dt">value.name=</span><span class="st">'probability'</span>)
<span class="co"># In this plot the two distributions are assumed to have the same</span>
<span class="co"># conversion rate, so the only distributional difference is from the</span>
<span class="co"># different total number of visitors. The vertical lines are the</span>
<span class="co"># observed conversion counts for each group.</span>
<span class="kw">ggplot</span>(<span class="dt">data=</span>thinD,<span class="kw">aes</span>(<span class="dt">x=</span>conversions,<span class="dt">y=</span>probability,<span class="dt">color=</span>assumedNullDistribution)) +<span class="st"> </span>
<span class="st"> </span><span class="kw">geom_line</span>() +<span class="st"> </span><span class="kw">geom_vline</span>(<span class="dt">data=</span>d,<span class="kw">aes</span>(<span class="dt">xintercept=</span>conversions,<span class="dt">color=</span>name))</code></pre>
<div class="figure">
<img src="figure/rateExample.png" alt="plot of chunk rateExample" /><p class="caption">plot of chunk rateExample</p>
</div>
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># The important thing to remember is your exact</span>
<span class="co"># significances/probabilities are a function of the unknown true rates,</span>
<span class="co"># your data, and your modeling assumptions. The usual advice is to</span>
<span class="co"># control the undesirable dependence on modeling assumptions by using only "brand</span>
<span class="co"># name tests." I actually prefer using ad-hoc tests, but discussion what</span>
<span class="co"># is assumed in them (one-sided/two-sided, pooled data for null, and so</span>
<span class="co"># on). You definitely can't assume away a thumb on the scale.</span>
<span class="co">#</span>
<span class="co"># Also this calculation is not compensating for any multiple trial or</span>
<span class="co"># early stopping effect. It (rightly or wrongly) assumes this is the only</span>
<span class="co"># experiment run and it was stopped without looking at the rates.</span>
<span class="co">#</span>
<span class="co"># This may look like a lot of code, but the code doesn't change over different data.</span></code></pre>