from os import getcwd
from os.path import exists as file_exists
from yaml import safe_load
from glob import glob
import numpy as np
import pandas as pd
import scipy.stats as st
import matplotlib.pyplot as plt
%matplotlib inline

import incredible as cr

thisTutorial = 'clredshift' # NB we are reading data from a previous tutorial!
if getcwd() == '/content':
    # assume we are in Colab, and the user's data directory is linked to their drive/Physics267_data
    from google.colab import drive
    drive.mount('/content/drive')
    datapath = '/content/drive/MyDrive/Physics267_data/' + thisTutorial + '/' 
else:
    # assume we are running locally somewhere and have the data under ./data/
    datapath = 'data/'

chains = [np.loadtxt(f) for f in glob(datapath+'clredshift_gibbs_*.txt.gz')]

param_labels = [r'$z_\mathrm{cl}$', r'$\sigma^2$']

plt.rcParams['figure.figsize'] = (16.0, 6.0)
fig, ax = plt.subplots(len(param_labels), 1);
cr.plot_traces(chains, ax, labels=param_labels, Line2D_kwargs={'markersize':1.0})

I_have_answered = False # change to True when true

assert I_have_answered

# burn =

for i in range(len(chains)):
    chains[i] = chains[i][burn:,:]

cr.GelmanRubinR(chains)

array([0.99997119, 0.99996812])

assert np.allclose(cr.GelmanRubinR(chains), [1.0]*len(param_labels), atol=1e-3)

plt.rcParams['figure.figsize'] = (16.0, 6.0)
fig, ax = plt.subplots(len(param_labels), 1);
for j,lab in enumerate(param_labels):
    pd.plotting.autocorrelation_plot(chains[0][:,j], ax=ax[j]);
    ax[j].set_ylabel(lab+' autocorrelation')

Yup_that_checks_out = False # change to True if, yup, that checks out

assert Yup_that_checks_out

maxlag = 500
cr.effective_samples(chains, maxlag=maxlag) # `maxlag' might be something you need to play with, in practice

array([39603.76123811, 36826.36833789])

assert np.all(cr.effective_samples(chains, maxlag=maxlag) > 30e3)

chain = np.concatenate(chains, axis=0)

chain.shape[0]

39960

print(chain.shape[0], 'samples')
plt.rcParams['figure.figsize'] = (12.0, 4.0)
fig, ax = plt.subplots(1, 2);
h40k = cr.whist(chain[:,0], plot=ax[0])
ci40k = cr.whist_ci(h40k, plot=ax[1]);
ax[0].set_xlabel(param_labels[0]);
ax[1].set_xlabel(param_labels[0]);
ci40k

39960 samples

{'mode': np.float64(1.9781141777429876),
 'level': array([0.68268949, 0.95449974]),
 'prob': array([0.68446821, 0.95472974]),
 'density': array([134.31573489,  27.90110838]),
 'min': array([1.97622887, 1.97443782]),
 'max': array([1.97968527, 1.98150774]),
 'low': array([-0.00188531, -0.00367636]),
 'high': array([0.00157109, 0.00339356]),
 'center': array([1.97795707, 1.97797278]),
 'width': array([0.0017282 , 0.00353496])}

# No clues here, but it's pretty much cut and paste.
# Analogous to the cell above, save the output of `whist` in h10k, h1k, h100, and the output of
# whist_ci in ci10k, ci1k and ci100. This is so we can plot them all togther later.

print('40k:', ci40k['min'][0], ci40k['max'][0])
print('10k:', ci10k['min'][0], ci10k['max'][0])
print(' 1k:', ci1k['min'][0], ci1k['max'][0])
print('100:', ci100['min'][0], ci100['max'][0])

40k: 1.9762288650313484 1.9796852716693534
10k: 1.9762328043301312 1.9797232683318629
 1k: 1.9761655262325257 1.9797539892774076
100: 1.9765343165947464 1.980246564087866

plt.rcParams['figure.figsize'] = (14.0, 5.0)
fig, ax = plt.subplots(1, 2);
ax[0].plot(h40k['x'], h40k['density'], '-', label='40k');
ax[0].plot(h10k['x'], h10k['density'], '-', label='10k');
ax[0].plot(h1k['x'], h1k['density'], '-', label='1k');
ax[0].plot(h100['x'], h100['density'], '-', label='100');
ax[0].legend();
ax[0].set_xlabel(param_labels[0]);
ax[1].plot(0.0, ci40k['mode'], 'o', color='C0', label='40k');
ax[1].plot([0.0]*2, [ci40k['min'][0],ci40k['max'][0]], '-', color='C0', linewidth=3);
ax[1].plot([0.0]*2, [ci40k['min'][1],ci40k['max'][1]], '--', color='C0');
ax[1].plot(1.0, ci10k['mode'], 'o', color='C1', label='10k');
ax[1].plot([1.0]*2, [ci10k['min'][0],ci10k['max'][0]], '-', color='C1', linewidth=3);
ax[1].plot([1.0]*2, [ci10k['min'][1],ci10k['max'][1]], '--', color='C1');
ax[1].plot(2.0, ci1k['mode'], 'o', color='C2', label='1k');
ax[1].plot([2.0]*2, [ci1k['min'][0],ci1k['max'][0]], '-', color='C2', linewidth=3);
ax[1].plot([2.0]*2, [ci1k['min'][1],ci1k['max'][1]], '--', color='C2');
ax[1].plot(3.0, ci100['mode'], 'o', color='C3', label='100');
ax[1].plot([3.0]*2, [ci100['min'][0],ci100['max'][0]], '-', color='C3', linewidth=3);
ax[1].plot([3.0]*2, [ci100['min'][1],ci100['max'][1]], '--', color='C3');
ax[1].legend();
ax[1].set_ylabel(param_labels[0]);

roundto = 4
print('~40k samples:', (np.round(ci40k['mode'], roundto)), (np.round(ci40k['low'][0], roundto)), '+'+str((np.round(ci40k['high'][0], roundto))))
print('~10k samples:', (np.round(ci10k['mode'], roundto)), (np.round(ci10k['low'][0], roundto)), '+'+str((np.round(ci10k['high'][0], roundto))))
print(' ~1k samples:', (np.round(ci1k['mode'], roundto)), (np.round(ci1k['low'][0], roundto)), '+'+str((np.round(ci1k['high'][0], roundto))))
print('~100 samples:', (np.round(ci100['mode'], roundto)), (np.round(ci100['low'][0], roundto)), '+'+str((np.round(ci100['high'][0], roundto))))

~40k samples: 1.9781 -0.0019 +0.0016
~10k samples: 1.9781 -0.0019 +0.0016
 ~1k samples: 1.9781 -0.0019 +0.0017
~100 samples: 1.9788 -0.0023 +0.0014

chains = [np.loadtxt(f) for f in glob(datapath+'clredshift_metro_*.txt.gz')]

plt.rcParams['figure.figsize'] = (16.0, 6.0)
fig, ax = plt.subplots(len(param_labels), 1);
cr.plot_traces(chains, ax, labels=param_labels, Line2D_kwargs={'markersize':1.0})

# mburn =

for i in range(len(chains)):
    chains[i] = chains[i][mburn:,:]

plt.rcParams['figure.figsize'] = (16.0, 6.0)
fig, ax = plt.subplots(len(param_labels), 1);
cr.plot_traces(chains, ax, labels=param_labels, Line2D_kwargs={'markersize':1.0})

I_have_answered_everything = False # change True when it is true

assert I_have_answered_everything

cr.GelmanRubinR(chains)

array([1.00055024, 1.00004532])

assert np.allclose(cr.GelmanRubinR(chains), [1.0]*len(param_labels), atol=1e-2)

plt.rcParams['figure.figsize'] = (16.0, 6.0)
fig, ax = plt.subplots(len(param_labels), 1);
for j,lab in enumerate(param_labels):
    pd.plotting.autocorrelation_plot(chains[0][:,j], ax=ax[j]);
    ax[j].set_ylabel(lab+' autocorrelation')

maxlag = 500
cr.effective_samples(chains, maxlag=maxlag)

array([3164.2126463 , 4620.66829709])

assert np.all(cr.effective_samples(chains, maxlag=maxlag) > 1e3)

chain = np.concatenate(chains, axis=0)
print('Total samples:', chain.shape[0])
plt.rcParams['figure.figsize'] = (12.0, 4.0)
fig, ax = plt.subplots(1, 2);
hmetro = cr.whist(chain[:,0], plot=ax[0])
cimetro = cr.whist_ci(hmetro, plot=ax[1]);
ax[0].set_xlabel(param_labels[0]);
ax[1].set_xlabel(param_labels[0]);
cimetro

Total samples: 38000

{'mode': np.float64(1.9781765699674),
 'level': array([0.68268949, 0.95449974]),
 'prob': array([0.68398875, 0.95523014]),
 'density': array([132.29231941,  28.05091835]),
 'min': array([1.97617317, 1.97428425]),
 'max': array([1.97975067, 1.98149649]),
 'low': array([-0.0020034 , -0.00389232]),
 'high': array([0.0015741 , 0.00331992]),
 'center': array([1.97796192, 1.97789037]),
 'width': array([0.00178875, 0.00360612])}

print('Gibbs:', round(ci40k['min'][0], roundto), round(ci40k['max'][0], roundto))
print('Metro:', round(cimetro['min'][0], roundto), round(cimetro['max'][0], roundto))

Gibbs: 1.9762 1.9797
Metro: 1.9762 1.9798

Tutorial: MCMC Diagnostics¶

Gibbs samples¶

Visual inspection¶

Gelman-Rubin statistic¶

Autocorrelation¶

Effective number of independent samples¶

Something to do¶

Metropolis samples¶

Parting thoughts¶

Parting technical note¶