from os import getcwd
from yaml import safe_load
import numpy as np
import scipy.stats as st
from pygtc import plotGTC
import matplotlib.pyplot as plt
%matplotlib inline

thisTutorial = 'credible_regions'
if getcwd() == '/content':
    # assume we are in Colab, and the user's data directory is linked to their drive/Physics267_data
    from google.colab import drive
    drive.mount('/content/drive')
    datapath = '/content/drive/MyDrive/Physics267_data/' + thisTutorial + '/' 
else:
    # assume we are running locally somewhere and have the data under ./data/
    datapath = 'data/'

beta_pars = safe_load(open(datapath+'beta.yaml', 'r').read())
beta_pars

{'alpha': 3.632234756199539, 'beta': 6.527326933635796}

theta1 = st.beta.rvs(beta_pars['alpha'], beta_pars['beta'], size=100000)

plt.rcParams['figure.figsize'] = (4.0, 3.0)
plt.hist(theta1, bins=100, density=True, histtype='step');
plt.xlabel(r'$\theta_1$');

theta2 = np.concatenate((st.norm.rvs(loc=-2.0, size=60000), st.norm.rvs(loc=2.0, size=40000)))
plt.hist(theta2, bins=100, density=True, histtype='step');
plt.xlabel(r'$\theta_2$');

st.chi2.cdf([1.0, 4.0], df=1)

array([0.68268949, 0.95449974])

import incredible as cr

h1 = cr.whist(theta1, plot=plt);

h1_bin20 = cr.whist(theta1, plot=plt, bins=20, smooth=False);

cr.whist(theta1, plot=plt, bins=200, smooth=1);

ci1 = cr.whist_ci(h1, plot=plt);

ci1_bin20 = cr.whist_ci(h1_bin20, plot=plt);

ci1

{'mode': np.float64(0.3301256127274579),
 'level': array([0.68268949, 0.95449974]),
 'prob': array([0.68492636, 0.9545425 ]),
 'density': array([1.68843824, 0.46965163]),
 'min': array([0.1875834 , 0.09079547]),
 'max': array([0.48498629, 0.64336653]),
 'low': array([-0.14254221, -0.23933014]),
 'high': array([0.15486068, 0.31324092]),
 'center': array([0.33628484, 0.367081  ]),
 'width': array([0.14870145, 0.27628553])}

# med1 = ...
# quant1_1 =  ...
# quant1_2 = ...

print(med1, quant1_1, quant1_2)

0.34860023670053947 [0.20877855 0.50825784] [0.10700324 0.66362543]

mea1 = np.mean(theta1)
std1 = np.std(theta1)
# print the intervals in the same format as above, for ease of comparison
print(mea1, [mea1-std1, mea1+std1], [mea1-2*std1, mea1+2*std1])

0.35798922505860237 [np.float64(0.2140627767072179), np.float64(0.5019156734099868)] [np.float64(0.07013632835583344), np.float64(0.6458421217613712)]

ci1 = cr.whist_ci(h1, plot=plt, plot_levels=False);
plt.plot(quant1_2, [2.,2.], color='C1')
plt.plot(quant1_1, [2.05,2.05], color='C1')
plt.plot(med1, 2.05, 'o', color='C1', label='median/quantiles')
plt.plot(mea1, 2.55, 'o', color='C2',label='mean/stdev')
plt.plot([mea1-std1, mea1+std1], [2.55,2.55], color='C2')
plt.plot([mea1-2*std1, mea1+2*std1], [2.5,2.5], color='C2');
plt.legend();

h2 = cr.whist(theta2, plot=plt);

# med2 = ...
# quant2_1 =  ...
# quant2_2 = ...

print(med2, quant2_1, quant2_2)

-1.0287997290749193 [-2.63096527  2.26699112] [-3.77443435  3.56618376]

mea2 = np.mean(theta2)
std2 = np.std(theta2)
print(mea2, [mea2-std2, mea2+std2], [mea2-2*std2, mea2+2*std2])

-0.39894475380363664 [np.float64(-2.5989941492167046), np.float64(1.8011046416094314)] [np.float64(-4.799043544629773), np.float64(4.001154037022499)]

ci2 = cr.whist_ci(h2, plot=plt, plot_levels=False);
plt.plot(quant2_2, [0.2,0.2], color='C1')
plt.plot(quant2_1, [0.205,0.205], color='C1')
plt.plot(med2, 0.205, 'o', color='C1', label='median/quantiles')
plt.plot(mea2, 0.155, 'o', color='C2', label='mean/stdev')
plt.plot([mea2-std2, mea2+std2], [0.155,0.155], color='C2')
plt.plot([mea2-2*std2, mea2+2*std2], [0.15,0.15], color='C2')
plt.legend();
ci2

{'mode': np.float64(-1.9902856892671315),
 'level': array([0.68268949, 0.68268949, 0.95449974]),
 'prob': array([0.45989172, 0.22306093, 0.95512198]),
 'density': array([0.11722269, 0.11722269, 0.04573012]),
 'min': array([-3.18154835,  1.24314153, -3.83795838]),
 'max': array([-0.77471155,  2.77476494,  3.57704388]),
 'low': array([-1.19126266,  3.23342722, -1.84767269]),
 'high': array([1.21557414, 4.76505063, 5.56732957]),
 'center': array([-1.97812995,  2.00895324, -0.13045725]),
 'width': array([1.2034184 , 0.76581171, 3.70750113])}

h3 = cr.whist2d(theta1, theta2, bins=25, smooth=0, plot=True);

h3 = cr.whist2d(theta1, theta2, bins=50, smooth=1, plot=True);

ci3 = cr.whist2d_ci(h3, plot=plt);

h3a = cr.whist2d(theta1, theta2, bins=100, smooth=0);
ci3a = cr.whist2d_ci(h3a, mode_fmt=None, contour_color='r', plot=plt);
h3 = cr.whist2d(theta1, theta2, bins=50, smooth=1);
ci3 = cr.whist2d_ci(h3, mode_fmt=None, plot=plt);

# this is how you plot contours from whist2d_ci without having to repeat the calculations
cr.ci2D_plot(ci3a, plt, Line2D_kwargs={'color':'r'});

h3b = cr.whist2d(theta1, theta2, bins=25, smooth=2); # too much smoothing!
ci3b = cr.whist2d_ci(h3b, mode_fmt=None, plot=plt);

cr.ci2D_plot(ci3, plt);

cv = np.cov(theta1, theta2)
cr.cov_ellipse(cv, center=[mea1,mea2], level=0.68268949, plot=plt, fmt='r--');
cr.cov_ellipse(cv, center=[mea1,mea2], level=0.95449974, plot=plt, fmt='r--');

# plotting contours for different levels with different colors is clunky currently,
# requiring a separate call for each (specified by `which`)
cr.ci2D_plot(ci3, plt, which=[1], fill=True, fill_kwargs={'color':str(0.954)});
cr.ci2D_plot(ci3, plt, which=[0], fill=True, fill_kwargs={'color':str(0.683)});

tri = cr.whist_triangle(np.array([theta1, theta2]).T, bins=50, smooth2D=1);

cr.whist_triangle_plot(tri, paramNames=[r'$x$', r'$y$']);

f, a = cr.whist_triangle_plot(tri, paramNames=[r'$x$', r'$y$'], linecolor1D='b',
                              fillcolors2D=[(0.026, 0.818, 1.000), (0.714, 0.936, 1.000)]);
cr.cov_ellipse(cv, center=[mea1,mea2], level=0.68268949, plot=a[1][0], fmt='r--');
cr.cov_ellipse(cv, center=[mea1,mea2], level=0.95449974, plot=a[1][0], fmt='r--');

plotGTC(np.array([theta1, theta2]).T, paramNames=[r'$x$', r'$y$'],
        figureSize=5, customLabelFont={'size':12}, customTickFont={'size':12}, customLegendFont={'size':16});

Tutorial: Credible Intervals/Regions¶

Generate samples to analyze¶

1D intervals¶

2D regions¶

Alternative packages¶