import matplotlib.pyplot as plt
import numpy as np
import pyro
import pyro.distributions as dist
import seaborn as sns
import torch
from pyro.infer import MCMC, NUTS
from scipy.stats import beta
sns.set_palette("Set2")
# We will skip connecting to the reddit API and just use the voting ratios
votes = torch.tensor([[32, 3], [1918, 101], [79, 17], [33, 6]])
def voting_model(up, down):
up_rate = pyro.sample("up_rate", dist.Uniform(0, 1))
pyro.sample("rating", dist.Binomial(total_count=up + down, probs=up_rate), obs=up)
nuts_kernel = NUTS(voting_model)
mcmc = MCMC(nuts_kernel, num_samples=1000, warmup_steps=200)
results = []
for up, down in votes:
mcmc.run(up, down)
results.append(mcmc.get_samples()["up_rate"].detach().cpu().numpy())
Sample: 100%|██████████| 1200/1200 [00:03, 380.68it/s, step size=8.41e-01, acc. prob=0.894] Sample: 100%|██████████| 1200/1200 [00:04, 299.38it/s, step size=1.03e+00, acc. prob=0.940] Sample: 100%|██████████| 1200/1200 [00:03, 345.89it/s, step size=1.25e+00, acc. prob=0.908] Sample: 100%|██████████| 1200/1200 [00:03, 356.30it/s, step size=1.17e+00, acc. prob=0.907]
fig, ax = plt.subplots()
for r in results:
ax.hist(r, bins=40, range=(0.5, 1.0), alpha=0.7)
ax.set_xlim((0.5, 1))
plt.show()
# The TF tutorial uses
votes_tf = torch.tensor([[45, 6], [612, 35], [335, 10], [162, 20]])
results_tf = []
for up, down in votes_tf:
mcmc.run(up, down)
results_tf.append(mcmc.get_samples()["up_rate"].detach().cpu().numpy())
Sample: 100%|██████████| 1200/1200 [00:03, 343.41it/s, step size=1.00e+00, acc. prob=0.883] Sample: 100%|██████████| 1200/1200 [00:03, 344.96it/s, step size=1.31e+00, acc. prob=0.924] Sample: 100%|██████████| 1200/1200 [00:03, 385.46it/s, step size=1.44e+00, acc. prob=0.889] Sample: 100%|██████████| 1200/1200 [00:03, 339.03it/s, step size=1.25e+00, acc. prob=0.906]
fig, ax = plt.subplots()
for r in results_tf:
ax.hist(r, bins=40, range=(0.7, 1.0), alpha=0.7)
ax.set_xlim((0.7, 1))
plt.show()
# We know that the Beta-(1, 1) prior is conjugate to our Binomial likelihood
# This makes calculating the exact posterior distributions really easy
posterior_parameters = 1 + votes_tf.numpy()
fig, ax = plt.subplots()
for i, r in enumerate(results_tf):
ax.hist(r, bins=40, range=(0.7, 1.0), alpha=0.3, color=sns.color_palette()[i], density=True)
xx = np.linspace(0.5, 1.0, 100)
yy = beta.pdf(np.tile(xx, [4, 1]).T, posterior_parameters[:, 0], posterior_parameters[:, 1]).T
for i, y in enumerate(yy):
ax.plot(xx, y, color=sns.color_palette()[i])
ax.set_xlim((0.7, 1))
plt.show()