from os import getcwd
from yaml import safe_load
import sys
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

thisTutorial = 'essential_probability'
if getcwd() == '/content':
    # assume we are in Colab, and the user's data directory is linked to their drive/Physics267_data
    from google.colab import drive
    drive.mount('/content/drive')
    datapath = '/content/drive/MyDrive/Physics267_data/' + thisTutorial + '/' 
else:
    # assume we are running locally somewhere and have the data under ./data/
    datapath = 'data/'

def p_xy(x, y, mu_x, mu_y, sigma_x, sigma_y, rho):

def p_x(x, mu_x, mu_y, sigma_x, sigma_y, rho):
    # note that `y' is not an argument of this function!

def p_y_given_x(y, x, mu_x, mu_y, sigma_x, sigma_y, rho):
    # note the order of `y' and `x' above!

test_values = safe_load(open(datapath+'data.yaml', 'r').read())
params = test_values['params']
params

{'mu_x': -0.6622828704260115,
 'mu_y': 2.3888101527152896,
 'rho': -0.7217394324343991,
 'sigma_x': 0.9267964821658732,
 'sigma_y': 1.0079054376556293}

fixed_x = test_values['fixed_x']
fixed_x

-1.86

assert np.isclose(p_xy(0.0, 0.0, **params), p_x(0.0, **params) * p_y_given_x(0.0, 0.0, **params))
assert np.isclose(p_xy(1.0, -1.0, **params), p_x(1.0, **params) * p_y_given_x(-1.0, 1.0, **params))

# x bounds and spacing
# xmin = ...
# xmax = ...
# dx = ...

# y bounds and spacing
# ymin = ...
# ymax = ...
# dy = ...

xvalues = np.arange(xmin, xmax+dx, dx)
yvalues = np.arange(ymin, ymax+dy, dy)
grid_x, grid_y = np.meshgrid(xvalues, yvalues, indexing='ij')

plt.rcParams['figure.figsize'] = (10.0, 8.0)
fig, ax = plt.subplots(1,2);
ax[0].imshow(grid_x.T, cmap='gray', origin='lower', extent=[xmin, xmax, ymin, ymax]);
ax[0].set_xlabel('x'); ax[0].set_ylabel('y'); ax[0].set_title('grid_x');
ax[1].imshow(grid_y.T, cmap='gray', origin='lower', extent=[xmin, xmax, ymin, ymax]);
ax[1].set_xlabel('x'); ax[1].set_ylabel('y'); ax[1].set_title('grid_y');

grid_p_xy = p_xy(grid_x, grid_y, **params)

plt.rcParams['figure.figsize'] = (7.0, 5.0)
plt.imshow(grid_p_xy.T, cmap='gray', origin='lower', extent=[xmin, xmax, ymin, ymax]);
plt.xlabel('x'); plt.ylabel('y');

# grid_p_x = ...

# grid_p_x_integral = ...

print(grid_p_x_integral)
assert np.isclose(grid_p_x_integral, 1.0)

0.9999992350574406

plt.rcParams['figure.figsize'] = (6.0, 4.0)
plt.plot(xvalues, grid_p_x, 'b.', label='grid');
plt.plot(xvalues, p_x(xvalues, **params), 'r-', label='analytic');
plt.xlabel('x'); plt.ylabel('p(x)'); plt.legend();

assert np.allclose(grid_p_x, p_x(xvalues, **params), atol=1e-6)

fixed_x

-1.86

# grid_p_y_given_x = ...

# grid_p_y_given_x_integral = ...

print(grid_p_y_given_x_integral)
assert np.isclose(grid_p_y_given_x_integral, 1.0)

0.9999999994211752

plt.rcParams['figure.figsize'] = (6.0, 4.0)
plt.plot(yvalues, grid_p_y_given_x, 'b.', label='grid');
plt.plot(yvalues, p_y_given_x(yvalues, fixed_x, **params), 'r-', label='analytic');
plt.xlabel('y'); plt.ylabel('p(y|x='+str(fixed_x)+')'); plt.legend();

assert np.allclose(grid_p_y_given_x, p_y_given_x(yvalues, fixed_x, **params))

Nsamples = 100000 # this is more than we will typically have in practice, but should be good for illustrating how things work here

# samples = ... (a dictionary as described above)

plt.rcParams['figure.figsize'] = (5.0, 5.0)
plt.imshow(grid_p_xy.T, cmap='gray', origin='lower', extent=[xmin, xmax, ymin, ymax]);
plt.plot(samples['x'][:5000], samples['y'][:5000], 'r,');
plt.xlabel('x'); plt.ylabel('y');

plt.rcParams['figure.figsize'] = (6.0, 4.0)
plt.hist(samples['x'], density=True, bins=50);
plt.plot(xvalues, p_x(xvalues, **params), 'r-');
plt.xlabel('x'); plt.ylabel('p(x)');

plt.rcParams['figure.figsize'] = (12.0, 4.0)
fig, ax = plt.subplots(1,2);
ax[0].hist(samples['x'][:10000], density=True, bins=50);
ax[0].plot(xvalues, p_x(xvalues, **params), 'r-');
ax[0].set_xlabel('x'); ax[0].set_ylabel('p(x)'); ax[0].set_title('10000 samples');
ax[1].hist(samples['x'][:1000], density=True, bins=50);
ax[1].plot(xvalues, p_x(xvalues, **params), 'r-');
ax[1].set_xlabel('x'); ax[1].set_ylabel('p(x)'); ax[1].set_title('1000 samples');

j = np.flatnonzero(np.abs(samples['x'] - fixed_x)<0.1*samples['x'].std())

len(j)

3435

plt.rcParams['figure.figsize'] = (6.0, 4.0)
plt.hist(samples['y'][j], density=True, bins=50);
plt.plot(yvalues, p_y_given_x(yvalues, fixed_x, **params), 'r-');
plt.xlabel('y'); plt.ylabel('p(y|x='+str(fixed_x)+')');

# samples_y_given_x = ...

plt.rcParams['figure.figsize'] = (6.0, 4.0)
plt.hist(samples_y_given_x, density=True, bins=50);
plt.plot(yvalues, p_y_given_x(yvalues, fixed_x, **params), 'r-');
plt.xlabel('y'); plt.ylabel('p(y|x='+str(fixed_x)+')');

assert('scipy' not in sys.modules)

Tutorial: Essential Probability¶

Background¶

Implementing analytically derived PDFs¶

Numerical comparison: on a grid¶

Marginalization¶

Conditioning¶

Numerical comparison: monte carlo¶

Marginalization¶

Conditioning¶

Parting thoughts¶

Endnotes¶

Note 1¶

Note 2¶

Note 3¶

Note 4¶