Steve Howard
7/28/2014
(But this could vary greatly with context)
Imagine a sequence of experiments on a single page over one million total visitors.
Consume 5,000 visitors during "implementation" of new treatment
def loop_posterior(baseline_alpha, baseline_beta,
treatment_alpha, treatment_beta):
sum_result = 0
for i in xrange(treatment_alpha):
sum_result += math.exp(
log_beta(baseline_alpha + i, treatment_beta + baseline_beta)
- log_beta(1 + i, treatment_beta)
- log_beta(baseline_alpha, baseline_beta)
) / (treatment_beta + i)
return sum_result
def vectorized_posterior(baseline_alpha, baseline_beta,
treatment_alpha, treatment_beta):
i_values = numpy.arange(treatment_alpha)
return numpy.sum(
numpy.exp(
log_beta(baseline_alpha + i_values, treatment_beta + baseline_beta)
- log_beta(1 + i_values, treatment_beta)
- log_beta(baseline_alpha, baseline_beta)
) / (treatment_beta + i_values)
)
$ python benchmark_bayesian_posterior.py
Samples Slow Fast Ratio
10 0.032us 0.033us 1.0x
100 0.208us 0.041us 5.1x
1000 2.006us 0.101us 19.8x
10000 20.282us 0.601us 33.7x
100000 206.210us 6.160us 33.5x