# !pip install incredible lmc pygtc

from copy import deepcopy
from os import getcwd
from os.path import exists as file_exists
from yaml import safe_load
import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt
%matplotlib inline

import incredible as cr
import lmc
from pygtc import plotGTC

thisTutorial = 'cl_membership'
if getcwd() == '/content':
    # assume we are in Colab, and the user's data directory is linked to their drive/Physics267_data
    from google.colab import drive
    drive.mount('/content/drive')
    datapath = '/content/drive/MyDrive/Physics267_data/' + thisTutorial + '/'
else:
    # assume we are running locally somewhere and have the data under ./data/
    datapath = 'data/'

data = [np.loadtxt(datapath+'bin'+str(i)+'.txt.gz') for i in range(5)]
richness = np.array([9., 24., 33., 42., 61.])

plt.rcParams['figure.figsize'] = (18.0, 8.0)
fig, ax = plt.subplots(2, 3);
for i in range(len(data)):
    ii = np.unravel_index(i, ax.shape)
    ax[ii].hist(data[i], bins=100, label='bin '+str(i));
    ax[ii].legend();
    ax[ii].set_xlim(-0.1, 0.1);
    ax[ii].set_xlabel('x');
ax[1,2].axis(False);

plt.rcParams['figure.figsize'] = (18.0, 8.0)
fig, ax = plt.subplots(2, 3);
for i in range(len(data)):
    ii = np.unravel_index(i, ax.shape)
    ax[ii].hist(data[i], bins=100, label='bin '+str(i), log=True);
    ax[ii].legend();
    ax[ii].set_xlim(-0.1, 0.1);
    ax[ii].set_xlabel('x');
ax[1,2].axis(False);

def p_of_x(x, fb, sc, mb, sb):
    """
    Return the sampling distribution PDF, p(x), for a single bin, modeled by the given background fraction (fb), cluster standard
    deviation (sc), and background mean and standard deviation (mb, sb).
    Should work if `x` is an array.
    """
    # YOUR CODE HERE
    raise NotImplementedError()

def cdf_of_x(x, fb, sc, mb, sb):
    """
    Return the sampling distribution CDF for a single bin, modeled by the given background fraction (fb), cluster standard
    deviation (sc), and background mean and standard deviation (mb, sb).
    Should work if `x` is an array.
    """
    # YOUR CODE HERE
    raise NotImplementedError()

def compare(x, *args, **kwargs):
    sortx = np.sort(x)
    xx = np.linspace(sortx[0], sortx[-1], 1000)
    plt.rcParams['figure.figsize'] = (10.0, 3.0)
    fig, ax = plt.subplots(1, 2);
    ax[0].hist(sortx, bins=100, log=True, density=True, label='data');
    ax[0].plot(xx, p_of_x(xx, *args, **kwargs), label='model');
    ax[0].set_ylim(1e-1);
    ax[0].legend(); ax[0].set_xlabel('x'); ax[0].set_ylabel('p(x)');
    ax[1].plot(sortx, np.arange(len(x))/len(x), '.', label='data');
    ax[1].plot(xx, cdf_of_x(xx, *args, **kwargs), label='model');
    ax[1].legend(); ax[1].set_xlabel('x'); ax[1].set_ylabel('CDF(x)');

# guess = {'fb':..., 'sc':..., 'mb':..., 'sb':...}
# YOUR CODE HERE
raise NotImplementedError()

compare(data[1], **guess)

paramnames = ['fb1', 'sigma_cl1', 'mu_b', 'sigma_b']
param_labels = [r'$f_\mathrm{b,1}$', r'$\sigma_\mathrm{cl,1}$', r'$\mu_\mathrm{b}$', r'$\sigma_\mathrm{b}$']

def lnpost(x):
    """
    `x` is the data vector for a single bin
    Return the log-posterior (don't forget the prior!)
    """
    f_b1,sigma_cl1,mu_b,sigma_b = [p() for p in parameter_space] # parameter_space is global, to be defined below
    # YOUR CODE HERE
    raise NotImplementedError()

parameter_space = lmc.ParameterSpace([lmc.Parameter(name=s) for s in paramnames], lnpost)
parameter_dict = {k:p for k,p in zip(paramnames, parameter_space)}

for k1,k2 in zip(['fb', 'sc', 'mb', 'sb'], paramnames):
    parameter_dict[k2].set(guess[k1])
print(lnpost(data[1]))
assert np.isfinite(lnpost(data[1]))

# def initialize_params():
#     parameter_dict['fb1'].set(...)
#     parameter_dict['sigma_cl1'].set(...)
#     parameter_dict['mu_b'].set(...)
#     parameter_dict['sigma_b'].set(...)
#     parameter_dict['fb1'].width = ...
#     parameter_dict['sigma_cl1'].width = ...
#     parameter_dict['mu_b'].width = ...
#     parameter_dict['sigma_b'].width = ...
# YOUR CODE HERE
raise NotImplementedError()

initialize_params()
print([p() for p in parameter_space])
assert np.isfinite( lnpost(data[1]) )

%%time
nchains = 4
nsamples = 3000
chains = [lmc.dictBackend() for i in range(nchains)]
for chain in chains:
    initialize_params()
    vehicle = lmc.Vehicle(parameter_space, backend=chain, adapt_every=100, adapt_starting=200)
    vehicle(1, nsamples, struct=data[1], scatter=False)

chains = np.array([np.array([chain[p] for p in paramnames+['_LOGP_']]).T for chain in chains])

fig, ax = plt.subplots(len(paramnames)+1, 1, figsize=(16.0, 2.0*len(paramnames)+2));
cr.plot_traces(chains, ax, labels=param_labels+['_LOGP_']);

# burn =
# YOUR CODE HERE
raise NotImplementedError()

fig, ax = plt.subplots(len(paramnames)+1, 1, figsize=(16.0, 2.0*len(paramnames)+2));
cr.plot_traces(chains[:,burn:,:], ax, labels=param_labels+['_LOGP_']);

chains = chains[:,burn:,:len(paramnames)] # ditch the _LOGP_ column, too

R = cr.GelmanRubinR(chains)
print("R:", R)
assert np.all(R < 1.1)

maxlag = 500 # may need to adjust this

neff = cr.effective_samples(chains, maxlag=maxlag, throw=True)
print("neff:", neff)
assert np.all(neff >= 100) # we'll get better chains from Gibbs, so these don't have to be great

plotGTC(np.concatenate(chains), paramNames=param_labels,
        figureSize=6, customLabelFont={'size':12}, customTickFont={'size':12}, customLegendFont={'size':16});

# paste your functions from the "clredshift" notebook, but note the z_cl -> mu change of notation

def condpost_Gaussian_mean(data, par, hypar):
    """
    `data` is a 1D array of all the data the Gaussian is supposed to describe
    `par` is a dictionary {'mu', 'sigma2'} of the Gaussian's current parameters (sigma2 is the variance)
    `hypar` is a dictionary {'mu0', 'tau0', 'alpha0', 'beta0'} of the hyperparameters (Gaussian mean and *STANDARD DEVIATION* for mu,
            inverse-gamma shape and rate for sigma2)
    return a tuple (mean, stdev) encoding the normal distribution from which the new mean (mu) should be drawn
    """
    # YOUR CODE HERE
    raise NotImplementedError()

def condpost_Gaussian_variance(data, par, hypar):
    """
    `data` is a 1D array of all the data the Gaussian is supposed to describe
    `par` is a dictionary {'mu', 'sigma2'} of the Gaussian's current parameters (sigma2 is the variance)
    `hypar` is a dictionary {'mu0', 'tau0', 'alpha0', 'beta0'} of the hyperparameters (Gaussian mean and *STANDARD DEVIATION* for mu,
            inverse-gamma shape and rate for sigma2)
    return a tuple (alpha, beta) encoding the inverse-gamma distribution from which the new variance (sigma2) should be drawn
    """
    # YOUR CODE HERE
    raise NotImplementedError()

condpost_test = safe_load(open(datapath+'test_condpost.yaml', 'r').read())
assert np.allclose(condpost_Gaussian_mean(np.array(condpost_test['data']), condpost_test['testpar'], condpost_test['testhypar']), condpost_test['mu'])
assert np.allclose(condpost_Gaussian_variance(np.array(condpost_test['data']), condpost_test['testpar'], condpost_test['testhypar']), condpost_test['sigma2'])

def condpost_group_membership(x, f1, par0, par1):
    """
    `x` is a 1D array of all the data to be assigned to groups 0 or 1
    `f1` is the a priori probability of belonging to group 1
    `par0` is a dictionary {'mu', 'sigma2'} of parameters describing the group 0 Gaussian PDF
    `par1` is a dictionary {'mu', 'sigma2'} of parameters describing the group 1 Gaussian PDF
    return an array (same shape as data) holding the conditional posterior probability for each datum to belong to group 1
    """
    # YOUR CODE HERE
    raise NotImplementedError()

assert np.allclose(condpost_group_membership(np.array(condpost_test['data']), condpost_test['testpar']['f'], condpost_test['testpar'], condpost_test['testpar2']), condpost_test['g'])

def condpost_fraction(groups, hypar):
    """
    `groups` is a 1D array of group assignments for all data (0 or 1) that this fraction applies to
    `hypar` is a dictionary {'alpha', 'beta'} of the Beta distribution hyperparameters
    return a tuple (alpha, beta) encoding the Beta distribution from which the new fraction for group 1 should be drawn
    """
    # YOUR CODE HERE
    raise NotImplementedError()

assert np.allclose(condpost_fraction(np.array(condpost_test['testpar']['g']), condpost_test['testhypar']), condpost_test['f'])

guess2 = {'fb1':guess['fb'], 'gauss1':{'mu':0.0, 'sigma2':guess['sc']**2}, 'gaussb':{'mu':guess['mb'], 'sigma2':guess['sb']**2}}

guess2['g'] = st.bernoulli.rvs( condpost_group_membership(data[1], guess2['fb1'], guess2['gauss1'], guess2['gaussb']) )

plt.rcParams['figure.figsize'] = (6., 4.)
plt.hist(data[1][guess2['g']==1], label='background', color='C1');
plt.hist(data[1][guess2['g']==0], label='clusters', color='C0');
plt.legend();
print('Fraction of g in background component:', (guess2['g']==1).sum()/len(guess2['g']))

# hyperparams = {'fb1':{'alpha':..., 'beta':...},
#                'gauss1':{'mu0':..., 'tau0':..., 'alpha0':..., 'beta0':...},
#                'gaussb':{'mu0':..., 'tau0':..., 'alpha0':..., 'beta0':...}}
# YOUR CODE HERE
raise NotImplementedError()

def gibbs_iteration2(x, par, hypar):
    # x is the data for a single bin
    # We could randomize the order of these updates if we cared to, but it's convenient to not have to initialize g beforehand.
    # Sample group assignments
    p = condpost_group_membership(x, par['fb1'], par['gauss1'], par['gaussb'])
    par['g'] = st.bernoulli.rvs(p)
    # Sample the background fraction parameter
    alpha,beta = condpost_fraction(par['g'], hypar['fb1'])
    par['fb1'] = st.beta.rvs(alpha, beta)
    # Sample the variance of the cluster-member Gaussian component (mean is assumed fixed)
    xcl = x[par['g'] == 0]
    if len(xcl) > 0: # because you never know
        alpha,beta = condpost_Gaussian_variance(xcl, par['gauss1'], hypar['gauss1'])
        par['gauss1']['sigma2'] = st.invgamma.rvs(alpha, scale=beta)
    # Sample the mean and variance of the background Gaussian component
    xb = x[par['g'] == 1]
    if len(xb) > 0: # because, seriously, you never know
        alpha,beta = condpost_Gaussian_variance(xb, par['gaussb'], hypar['gaussb'])
        par['gaussb']['sigma2'] = st.invgamma.rvs(alpha, scale=beta)
        mean,sd = condpost_Gaussian_mean(xb, par['gaussb'], hypar['gaussb'])
        par['gaussb']['mu'] = st.norm.rvs(mean, sd)

params = deepcopy(guess2)
gibbs_iteration2(data[1], params, hyperparams)
params

%%time
nchains = 4
nsamples = 3000
chains2 = np.empty((nchains, nsamples, len(paramnames)))
for i in range(chains2.shape[0]):
    initialize_params() # puts initial values in parameter_dict
    params = {'fb1':parameter_dict['fb1'](),
              'gauss1':{'mu':0.0, 'sigma2':parameter_dict['sigma_cl1']()**2},
              'gaussb':{'mu':parameter_dict['mu_b'](), 'sigma2':parameter_dict['sigma_b']()**2}
             } # assumes params['g'] gets updated first, so we don't need to give it a starting value here
    for j in range(chains2.shape[1]):
        gibbs_iteration2(data[1], params, hyperparams)
        chains2[i,j,:] = [params['fb1'], np.sqrt(params['gauss1']['sigma2']), params['gaussb']['mu'], np.sqrt(params['gaussb']['sigma2'])]

fig, ax = plt.subplots(len(paramnames), 1, figsize=(16.0, 2.0*len(paramnames)));
cr.plot_traces(chains2, ax, labels=param_labels);

# burn2 = ...
# YOUR CODE HERE
raise NotImplementedError()

fig, ax = plt.subplots(len(paramnames), 1, figsize=(16.0, 2.0*len(paramnames)));
cr.plot_traces(chains2[:,burn2:,:], ax, labels=param_labels);

chains2 = chains2[:,burn2:,:]

R = cr.GelmanRubinR(chains2)
print("R =", R)
assert np.all(R < 1.1)

maxlag2 = 500 # change if needed

neff = cr.effective_samples(chains2, maxlag=maxlag2, throw=True)
print("neff =", neff)
assert np.all(neff > 1000) # should be easily exceeded

plotGTC([np.concatenate(chains), np.concatenate(chains2)], paramNames=param_labels, chainLabels=['non-Gibbs','Gibbs'],
        figureSize=6, customLabelFont={'size':12}, customTickFont={'size':12}, customLegendFont={'size':16});

assert np.allclose(np.concatenate(chains).mean(axis=0), np.concatenate(chains2).mean(axis=0), rtol=0.1, atol=1e-4)
assert np.allclose(np.concatenate(chains).std(axis=0), np.concatenate(chains2).std(axis=0), rtol=0.1, atol=1e-4)

paramnames3 = ['fb0', 'fb1', 'fb2', 'fb3', 'fb4', 'sigma_cl0', 'sigma_cl1', 'sigma_cl2', 'sigma_cl3', 'sigma_cl4',
               'mu_b', 'sigma_b']
param_labels3 = [r'$f_\mathrm{b,0}$', r'$f_\mathrm{b,1}$', r'$f_\mathrm{b,2}$', r'$f_\mathrm{b,3}$', r'$f_\mathrm{b,4}$',
                 r'$\sigma_\mathrm{cl,0}$',r'$\sigma_\mathrm{cl,1}$',r'$\sigma_\mathrm{cl,2}$',r'$\sigma_\mathrm{cl,3}$',r'$\sigma_\mathrm{cl,4}$',
                 r'$\mu_\mathrm{b}$', r'$\sigma_\mathrm{b}$']

# hyperparams3 = {'fb0':{'alpha':..., 'beta':...},
#                 'fb1':{'alpha':..., 'beta':...},
#                 ...
#                 'fb4':{'alpha':..., 'beta':...},
#                 'gauss0':{'mu0':..., 'tau0':..., 'alpha0':..., 'beta0':...},
#                 'gauss1':{'mu0':..., 'tau0':..., 'alpha0':..., 'beta0':...},
#                 ...
#                 'gauss4':{'mu0':..., 'tau0':..., 'alpha0':..., 'beta0':...},
#                 'gaussb':{'mu0':..., 'tau0':..., 'alpha0':..., 'beta0':...}}
# YOUR CODE HERE
raise NotImplementedError()

def gibbs_iteration3(xs, par, hypar):
    # xs is a list of data vectors for each bin
    nbins = len(xs)
    for i in range(nbins):
        si = str(i)
        p = condpost_group_membership(xs[i], par['fb'+si], par['gauss'+si], par['gaussb'])
        par['g'+si] = st.bernoulli.rvs(p)
        alpha,beta = condpost_fraction(par['g'+si], hypar['fb'+si])
        par['fb'+si] = st.beta.rvs(alpha, beta)
        x = xs[i][par['g'+si] == 0]
        if len(x) > 0:
            alpha,beta = condpost_Gaussian_variance(x, par['gauss'+si], hypar['gauss'+si])
            par['gauss'+si]['sigma2'] = st.invgamma.rvs(alpha, scale=beta)
    # add the updates of par['gaussb']['mu'] and par['gaussb']['sigma2'] here
    # YOUR CODE HERE
    raise NotImplementedError()

def initialize_params3():
    initialize_params() # puts values in parameter_dict
    params = {'fb0':parameter_dict['fb1'](),
              'gauss0':{'mu':0.0, 'sigma2':parameter_dict['sigma_cl1']()**2},
              'gaussb':{'mu':parameter_dict['mu_b'](), 'sigma2':parameter_dict['sigma_b']()**2}
             }
    for i in range(1, len(data)):
        initialize_params()
        params['fb'+str(i)] = parameter_dict['fb1']()
        params['gauss'+str(i)] = {'mu':0.0, 'sigma2':parameter_dict['sigma_cl1']()**2}
    return params

params = initialize_params3()
params

gibbs_iteration3(data, params, hyperparams3)
params

%%time
nchains = 4
nsteps = 1000
chains3 = np.empty((nchains, nsteps, len(paramnames3)))
for i in range(chains3.shape[0]):
    params = initialize_params3()
    for j in range(chains3.shape[1]):
        gibbs_iteration3(data, params, hyperparams3)
        chains3[i,j,:] = [params['fb0'], params['fb1'], params['fb2'], params['fb3'], params['fb4'],
            np.sqrt(params['gauss0']['sigma2']), np.sqrt(params['gauss1']['sigma2']), np.sqrt(params['gauss2']['sigma2']), np.sqrt(params['gauss3']['sigma2']), np.sqrt(params['gauss4']['sigma2']),
            params['gaussb']['mu'], np.sqrt(params['gaussb']['sigma2'])]

fig, ax = plt.subplots(len(paramnames3), 1, figsize=(16.0, 2.0*len(paramnames3)));
cr.plot_traces(chains3, ax, labels=param_labels3);

# burn3 = ... # this can probably be very short again
# YOUR CODE HERE
raise NotImplementedError()

fig, ax = plt.subplots(len(paramnames3), 1, figsize=(16.0, 2.0*len(paramnames3)));
cr.plot_traces(chains3[:,burn3:,:], ax, labels=param_labels3);

chains3 = chains3[:,burn3:,:]

R = cr.GelmanRubinR(chains3)
print('R =', R)
assert np.all(R < 1.1)

maxlag3 = 500 # doubt this needs to be raised

neff = cr.effective_samples(chains3, maxlag=maxlag3, throw=True)
print('neff =', neff)
assert np.all(neff > 1000)

plotGTC(np.concatenate(chains3), paramNames=param_labels3,
        figureSize=20, customLabelFont={'size':12}, customTickFont={'size':12}, customLegendFont={'size':16});

fig, ax = plt.subplots(1, 2, figsize=(10.0, 3.0));
mid,low,high = np.quantile(chains3[:,:,np.arange(len(data))], [0.5,0.1585, 0.8415], axis=(0,1))
ax[0].errorbar(richness, mid, [mid-low, high-mid], fmt='.'); ax[0].set_xlabel(r'$\lambda$'), ax[0].set_ylabel(r'$f_\mathrm{b}$');
mid,low,high = np.quantile(chains3[:,:,np.arange(len(data))+len(data)], [0.5,0.1585, 0.8415], axis=(0,1))
ax[1].errorbar(richness, 1e3*mid, 1e3*np.array([mid-low, high-mid]), fmt='.'); ax[1].set_xlabel(r'$\lambda$'), ax[1].set_ylabel(r'$10^3\sigma_\mathrm{cl}$');

I_checked_and_everything_looks_good = False # make True when true
# YOUR CODE HERE
raise NotImplementedError()

assert I_checked_and_everything_looks_good

# guess_A = ...
# guess_alpha = ...
# YOUR CODE HERE
raise NotImplementedError()

xvals = np.arange(7,75)
yvals = guess_A * (xvals / 10.)**guess_alpha

fig, ax = plt.subplots(1, 1, figsize=(4.0, 3.0));
mid,low,high = np.quantile(chains3[:,:,np.arange(len(data))+len(data)], [0.5,0.1585, 0.8415], axis=(0,1))
ax.errorbar(richness, mid, [mid-low, high-mid], fmt='.'); ax.set_xlabel(r'$\lambda$'), ax.set_ylabel(r'$\sigma_\mathrm{cl}$');
ax.set_xscale('log'); ax.set_yscale('log');
ax.plot(xvals, yvals);

paramnames4 = ['fb0', 'fb1', 'fb2', 'fb3', 'fb4', 'A_sigma', 'alpha_sigma', 'mu_b', 'sigma_b']
param_labels4 = [r'$f_\mathrm{b,0}$', r'$f_\mathrm{b,1}$', r'$f_\mathrm{b,2}$', r'$f_\mathrm{b,3}$', r'$f_\mathrm{b,4}$',
                r'$A_\sigma$',r'$\alpha_\sigma$', r'$\mu_\mathrm{b}$', r'$\sigma_\mathrm{b}$']

hyperparams4 = {}
for k in ['fb0', 'fb1', 'fb2', 'fb3', 'fb4', 'gaussb']:
    hyperparams4[k] = hyperparams3[k]

def gibbs_iteration4(xs, par, hypar):
    # xs is a list of data vectors for each bin
    nbins = len(xs)
    for i in range(nbins):
        si = str(i)
        p = condpost_group_membership(xs[i], par['fb'+si], par['gauss'+si], par['gaussb'])
        par['g'+si] = st.bernoulli.rvs(p)
        alpha,beta = condpost_fraction(par['g'+si], hypar['fb'+si])
        par['fb'+si] = st.beta.rvs(alpha, beta)
    # add the updates of par['gaussb']['mu'] and par['gaussb']['sigma2'] here (identical to the last section)
    # YOUR CODE HERE
    raise NotImplementedError()

parameter_space4 = lmc.ParameterSpace([lmc.Parameter(name=s) for s in paramnames4])
parameter_dict4 = {k:p for k,p in zip(paramnames4, parameter_space4)}

class GibbsUpdater(lmc.Updater):
    def __call__(self, par):
        # update the parameter values in the par dictionary
        gibbs_iteration4(data, par, hyperparams4)
        # set lmc Parameter values from our dictionary, so current values are available that way
        for k in ['fb0', 'fb1', 'fb2', 'fb3', 'fb4']:
            parameter_dict4[k].set(par[k])
        parameter_dict4['mu_b'].set(par['gaussb']['mu'])
        parameter_dict4['sigma_b'].set(np.sqrt(par['gaussb']['sigma2']))
        # force re-evaluation of the log-posterior with these new values the next time the non-Gibbs updater is called
        self.engine.current_logP = None

gibbs_updater4 = GibbsUpdater(None, None, 0, 0, None, None) # all arguments are ultimately ignored, as they do not apply to this simple class

def nongibbs_lnpost4(par):
    # compute log priors
    # sum up the log likelihood for each galaxy and bin
    # return the log-posterior
    # keep in mind that ONLY terms involving A_sigma and alpha_sigma need be included here
# YOUR CODE HERE
raise NotImplementedError()

def initialize_params4():
    # Initialize global Parameter objects
    # parameter_dict4['fb0'].set( ... )
    # parameter_dict4['fb1'].set( ... )
    # parameter_dict4['fb2'].set( ... )
    # parameter_dict4['fb3'].set( ... )
    # parameter_dict4['fb4'].set( ... )
    # parameter_dict4['A_sigma'].set( ... )
    # parameter_dict4['alpha_sigma'].set( ... )
    # parameter_dict4['mu_b'].set( ... )
    # parameter_dict4['sigma_b'].set( ... )
    # parameter_dict4['A_sigma'].width = ...
    # parameter_dict4['alpha_sigma'].width = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    # Copy everything to our conjugate-style parameter dictionary, and return it
    params = {'gaussb':{'mu':parameter_dict4['mu_b'](), 'sigma2':parameter_dict4['sigma_b']()**2}}
    for k in ['fb0', 'fb1', 'fb2', 'fb3', 'fb4', 'A_sigma', 'alpha_sigma']:
        params[k] = parameter_dict4[k]()
    for i in range(len(data)):
        params['gauss'+str(i)] = {'mu':0.0, 'sigma2':(params['A_sigma']*(richness[i]/10.)**(params['alpha_sigma']))**2}
    return params

# YOUR CODE HERE
raise NotImplementedError()
params = initialize_params4()
for p in parameter_dict4.keys():
    print(p, '=', parameter_dict4[p]())
params

gibbs_iteration4(data, params, hyperparams4)
params

lnp = nongibbs_lnpost4(params)
print(lnp)
assert np.isfinite(lnp)

nongibbs_space4 = lmc.ParameterSpace([parameter_dict4['A_sigma'], parameter_dict4['alpha_sigma']], nongibbs_lnpost4)

def onStep(par):
    # set our dictionary from the lmc Parameters that the non-Gibbs updater just changed
    for k in ['A_sigma', 'alpha_sigma']:
        par[k] = parameter_dict4[k]()
    # don't forget the cluster distribution widths for the individual bins!
    for i in range(len(data)):
        par['gauss'+str(i)]['sigma2'] = (par['A_sigma']*(richness[i]/10.)**(par['alpha_sigma']))**2

%%time
nchains = 4
nsamples = 1500
chains4 = [lmc.dictBackend() for i in range(nchains)]
for chain in chains4:
    params = initialize_params4()
    nongibbs_updater4 = lmc.MultiDimRotationUpdater(nongibbs_space4, lmc.Slice(), 100, 100, None, None)
    engine = lmc.Engine([gibbs_updater4, nongibbs_updater4], parameter_space4, onStep)
    engine(nsamples, params, [chain])

chains4 = np.array([np.array([chain[p] for p in paramnames4]).T for chain in chains4])

fig, ax = plt.subplots(len(paramnames4), 1, figsize=(16.0, 2.0*len(paramnames4)));
cr.plot_traces(chains4, ax, labels=param_labels4);

# burn4 = ...
# YOUR CODE HERE
raise NotImplementedError()

fig, ax = plt.subplots(len(paramnames4), 1, figsize=(16.0, 2.0*len(paramnames4)));
cr.plot_traces(chains4[:,burn4:,:], ax, labels=param_labels4);

chains4 = chains4[:,burn4:,:]

R = cr.GelmanRubinR(chains4)
print('R =', R)
assert np.all(R < 1.1)

maxlag4 = 500 # may need to adjust

neff = cr.effective_samples(chains4, maxlag=maxlag4, throw=True)
print('neff =', neff)
assert np.all(neff > 400)

plotGTC(np.concatenate(chains4), paramNames=param_labels4,
        figureSize=12, customLabelFont={'size':12}, customTickFont={'size':12}, customLegendFont={'size':16});

xvals = np.arange(7,75)

fig, ax = plt.subplots(1, 1, figsize=(6.0, 4.0));
mid,low,high = np.quantile(chains3[:,:,np.arange(len(data))+len(data)], [0.5,0.1585, 0.8415], axis=(0,1))
ax.errorbar(richness, mid, [mid-low, high-mid], fmt='.'); ax.set_xlabel(r'$\lambda$'), ax.set_ylabel(r'$\sigma_\mathrm{cl}$');
ax.set_xscale('log'); ax.set_yscale('log');
for A,alpha in np.concatenate(chains4)[np.random.choice(np.prod(chains4.shape[0:2]), size=100), 5:7]:
    yvals = A * (xvals / 10.)**alpha
    ax.plot(xvals, yvals, color='C1', alpha=0.1, zorder=0);

I_have_pondered_this = False # make True when true
# YOUR CODE HERE
raise NotImplementedError()

assert I_have_pondered_this

Tutorial: Cluster Membership Fractions of redMaPPer Galaxies¶

Gaussian Mixtures and Mixed Sampling Methods¶

Background¶

Data¶

Model¶

Visualizing the model¶

Fitting a single bin¶

Brute-force MCMC¶

Conjugate Gibbs¶

Conjugate updates¶

Updating $g$¶

Updating $f$¶

Implementation¶

Running the MCMC¶

Fitting all bins with linked backgrounds¶

Fitting all bins with linked backgrounds and a power-law scaling relation¶

Implementation¶

Running the MCMC¶

Parting thoughts¶