# get a bunch of imports out of the way
import matplotlib.pyplot as plt
plt.rc('text', usetex=True)
plt.rcParams['xtick.labelsize'] = 'x-large'
plt.rcParams['ytick.labelsize'] = 'x-large'
import numpy as np
import scipy.stats as st
%matplotlib inline

P_theta1 = 1e-5
r_fp = 0.01
r_fn = 0.0001
def q(theta):
    return theta*(1.0-r_fn) + (1.0-theta)*r_fp

# unnormalized posterior (numerator of Bayes' Law rhs)
unp0 = (1.0 - P_theta1) * q(0.0) # proportional to P(theta=0|T=1)
unp1 = P_theta1 * q(1.0)         # proportional to P(theta=1|T=1)

# normalize the posterior
evidence = unp0 + unp1
post0 = unp0 / evidence
post1 = unp1 / evidence

print("posterior:\n P(theta=0|T=1) =", post0)
print(" P(theta=1|T=1) =", post1)
print("\nevidence:\n P(T=1) =", evidence)

posterior:
 P(theta=0|T=1) = 0.9990010888221749
 P(theta=1|T=1) = 0.000998911177825071

evidence:
 P(T=1) = 0.010009899000000001

P_theta1 = 1e-2

# unnormalized posterior (numerator of Bayes' Law rhs)
unp0 = (1.0 - P_theta1) * q(0.0) # proportional to P(theta=0|T=1)
unp1 = P_theta1 * q(1.0)         # proportional to P(theta=1|T=1)

# normalize the posterior
evidence = unp0 + unp1
post0 = unp0 / evidence
post1 = unp1 / evidence

print("posterior:\n P(theta=0|T=1) =", post0)
print(" P(theta=1|T=1) =", post1)
print("\nevidence:\n P(T=1) =", evidence)

posterior:
 P(theta=0|T=1) = 0.49751243781094534
 P(theta=1|T=1) = 0.5024875621890548

evidence:
 P(T=1) = 0.019899

def sampling_distribution(N, mu):
    return st.poisson.pmf(N, mu)

N = np.arange(16)
plt.plot(N, sampling_distribution(N, mu=0.5), 'o', label=r'$\mu=0.5$');
plt.plot(N, sampling_distribution(N, mu=2.0), 'x', label=r'$\mu=2.0$');
plt.plot(N, sampling_distribution(N, mu=7.0), '+', label=r'$\mu=7.0$');
plt.xlabel(r'$N$', fontsize='x-large');
plt.ylabel(r'$P(N|\mu)$', fontsize='x-large');
plt.legend(fontsize='x-large');

mu = np.linspace(0.0, 15.0, 100)
plt.plot(mu, sampling_distribution(0.0, mu), '-', label=r'$N=0$');
plt.plot(mu, sampling_distribution(2.0, mu), '-', label=r'$N=2$');
plt.plot(mu, sampling_distribution(7.0, mu), '-', label=r'$N=7$');
plt.xlabel(r'$\mu$', fontsize='x-large');
plt.ylabel(r'$P(N|\mu)$', fontsize='x-large');
plt.legend(fontsize='x-large');

def prior_distribution(mu):
    big = 1.0e10 # can't deal with actual infinities here...
    # The construction below allows mu to be scalar or an array. It's equivalent to (for scalar mu)
    # if mu>=0.0 and mu<big:
    #     return 1.0/big
    # else:
    #     return 0.0
    return np.where(np.logical_and(mu>=0.0, mu<big), 1.0/big, 0.0)

def posterior_distribution(mu, N):
    # NB - This assumes mu is an equally spaced array covering the range where the posterior will be significantly
    # different from zero.
    post = prior_distribution(mu) * sampling_distribution(data_N, mu)
    return post / (np.sum(post) * (mu[1]-mu[0])) # bother to normalize, so that the comparison to option 2 works below

data_N = 5
mu = np.linspace(0.0, 15.0, 100)
plt.plot(mu, prior_distribution(mu), '.', label=r'$p(\mu)$');
plt.plot(mu, sampling_distribution(data_N, mu), '-', label=r'$p(N|\mu)$');
plt.plot(mu, posterior_distribution(mu, data_N), '.', label=r'$p(\mu|N)$');
plt.xlabel(r'$\mu$', fontsize='x-large');
plt.legend(fontsize='x-large');

alpha = 1.0
beta = 0.0001 # putting in exactly zero here will get us into numerical trouble

plt.plot(mu, prior_distribution(mu), '.', label=r'grid $p(\mu)$');
plt.plot(mu, st.gamma.pdf(mu, alpha, scale=1.0/beta), '-', label=r'Gamma $p(\mu)$');
plt.plot(mu, posterior_distribution(mu, data_N), '.', label=r'grid $p(\mu|N)$');
plt.plot(mu, st.gamma.pdf(mu, alpha+data_N, scale=1.0/(beta+1.0)), '-', label=r'Gamma $p(\mu|N)$');
plt.xlabel(r'$\mu$', fontsize='x-large');
plt.legend(fontsize='x-large');

# uniform prior
alpha = 1.0
beta = 0.0001 # putting in exactly zero here will get us into trouble
plt.plot(mu, 300*st.gamma.pdf(mu, alpha, scale=1.0/beta), '-', label=r'$p(\mu)\propto\mathrm{const}$')
plt.plot(mu, st.gamma.pdf(mu, alpha+data_N, scale=1.0/(beta+1.0)), '-', label=r'$p(\mu|N)$');

# uniform-in-log prior
alpha = 0.0001 # putting in exactly zero here will (also) get us into trouble
beta = 0.0001 # putting in exactly zero here will (still) get us into trouble
plt.plot(mu, 300*st.gamma.pdf(mu, alpha, scale=1.0/beta), '-', label=r'$p(\mu)\propto \mu^{-1}$')
plt.plot(mu, st.gamma.pdf(mu, alpha+data_N, scale=1.0/(beta+1.0)), '-', label=r'$p(\mu|N)$');

plt.xlabel(r'$\mu$', fontsize='x-large');
plt.legend(fontsize='x-large');

data_Ns = [5, 3, 6, 5, 5, 9, 6, 8, 5, 2]
# uniform prior
alpha = 1.0
beta = 0.0001 # putting in exactly zero here will get us into trouble
plt.plot(mu, st.gamma.pdf(mu, alpha, scale=1.0/beta), '-');
# incorporate each datum
for data_N in data_Ns:
    alpha = alpha + data_N
    beta = beta + 1.0
    plt.plot(mu, st.gamma.pdf(mu, alpha, scale=1.0/beta), '-');
plt.xlabel(r'$\mu$', fontsize='x-large');
plt.ylabel(r'$p(\mu|...)$', fontsize='x-large');

nmc = 10000 # number of posterior predictions to make
bins = np.arange(0, 18, 2);
plt.hist(st.poisson.rvs(st.gamma.rvs(alpha, scale=1.0/beta, size=nmc)), density=True, label='PPD', bins=bins);
plt.hist(data_Ns, bins=bins, density=True, histtype='step', label='data');
plt.xlabel(r'$N$', fontsize='x-large');
plt.legend(fontsize='x-large');

Notes: Bayes' Law¶

Bayesian inference¶

Aside¶

Components of Bayes' Law¶

The prior distribution¶

The sampling distribution¶

The evidence¶

The posterior distribution¶

Example: does a positive test mean positive?¶

Example: measuring the flux of a source¶

Numerical (on a grid) solution¶

Analytical solution¶

Choosing a prior¶

Bayes' Law as a model for accumulating information¶

Posterior predictions¶

Just about everything is (or could be) an inference¶

Endnotes¶

Note 1¶