TutorialName = 'vdisp'
exec(open('tbc.py').read()) # define TBC and TBC_above
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import scipy.stats as st
import emcee
import incredible as cr
from pygtc import plotGTC
from copy import deepcopy


data = [np.loadtxt('data/rm_redshifts_bin'+str(i)+'.txt.gz') for i in range(6)]


plt.rcParams['figure.figsize'] = (18.0, 8.0)
fig, ax = plt.subplots(2, 3);
for i in range(len(data)):
    ii = np.unravel_index(i, ax.shape)
    ax[ii].hist(data[i], bins=100, label='bin '+str(i));
    ax[ii].legend();
    ax[ii].set_xlim(-0.1, 0.1);
    ax[ii].set_xlabel('x');


plt.rcParams['figure.figsize'] = (18.0, 8.0)
fig, ax = plt.subplots(2, 3);
for i in range(len(data)):
    ii = np.unravel_index(i, ax.shape)
    ax[ii].hist(data[i], bins=100, label='bin '+str(i), log=True);
    ax[ii].legend();
    ax[ii].set_xlim(-0.1, 0.1);
    ax[ii].set_xlabel('x');


TBC() # answer in Markdown


param_names = ['f', 'sc', 'mb', 'sb']
param_labels = [r'$f$', r'$\sigma_c$', r'$\mu_b$', r'$\sigma_b$']


def p_of_x(x, f, sc, mb, sb):
    TBC()

def cdf_of_x(x, f, sc, mb, sb):
    TBC()

TBC_above()


def compare(x, *params):
    x = np.sort(x)
    xx = np.linspace(x[0], x[-1])
    plt.rcParams['figure.figsize'] = (12.0, 4.0)
    fig, ax = plt.subplots(1, 2);
    ax[0].hist(x, bins=100, log=True, density=True, label='data');
    ax[0].plot(xx, p_of_x(xx, *params), label='model');
    ax[0].set_ylim(1e-1);
    ax[0].legend();
    ax[1].plot(x, np.arange(len(x))/len(x), '.', label='data');
    ax[1].plot(xx, cdf_of_x(xx, *params), label='model');
    ax[1].legend();


TBC() # guess = [..., ..., ..., ...]


compare(data[1], *guess)


def lnpost(paramvec, x):
    params = {k:v for k,v in zip(param_names,paramvec)}
    # can now address parameters as params['f'], etc.
    TBC()

TBC_above()


lnpost(guess, data[1])


%%time

nsteps = 2500

npars = len(guess)
nwalkers = 2*npars
sampler = emcee.EnsembleSampler(nwalkers, npars, lnpost, kwargs={'x':data[1]})
start = np.array([np.array(guess)*(1.0 + 0.1*np.random.randn(npars)) for j in range(nwalkers)])
sampler.run_mcmc(start, nsteps)
print('Yay!');


plt.rcParams['figure.figsize'] = (16.0, 3.0*npars)
fig, ax = plt.subplots(npars, 1);
cr.plot_traces(sampler.chain[:min(8,nwalkers),:,:npars], ax, labels=param_labels);


burn = 500
maxlag = 1500

tmp_samples = [sampler.chain[i,burn:,:] for i in range(nwalkers)]
print('R =', cr.GelmanRubinR(tmp_samples))
print('neff =', cr.effective_samples(tmp_samples, maxlag=maxlag))
print('NB: Since walkers are not independent, these will be optimistic!')


esamples = sampler.chain[:,burn:,:].reshape(nwalkers*(nsteps-burn), npars)
compare(data[1], *esamples.mean(axis=0))


plotGTC([esamples], paramNames=param_labels,
        truths=[0.72711667, 0.00165783, 0.00250541, 0.02579882], truthLineStyles='-',
        figureSize=8, customLabelFont={'size':12}, customTickFont={'size':12}, customLegendFont={'size':16});


TBC() # answer in Markdown


TBC() # answer in Markdown


guessdict = {'f':[guess[0], 1-guess[0]], 'g':None,
          'p':[{'loc':0.0, 'scale':guess[1]}, {'loc':guess[2], 'scale':guess[3]}]}
guessdict['Ngauss'] = len(guessdict['f']) # for convenience


def param_dict_to_array(params):
    return [params['f'][0], params['p'][0]['scale'], params['p'][1]['loc'], params['p'][1]['scale']]


TBC()
#hyperparams = {'alpha':np.array([..., ...]), 
#               'p':[{'nu0':..., 'sigmasq0':...}, {'mx':..., 'sx':..., 'nu0':..., 'sigmasq0':...}]}


def update_sigma(group, x, par, hypar):
    """
    `x' is the full data set, including both groups
    `group' is the index (0 or 1) of the Gaussian whose sigma we want to update
    `par' and `hypar' are our params and hyperparams dictionaries
    Instead of returning anything, we UPDATE par in place
    (Remember to return sigma instead of sigma^2!)
    """
    j = (par['g'] == group) # galaxies belonging to this Gaussian
    # ...
    par['p'][group]['scale'] = TBC()

TBC_above()


def update_mu(group, x, par, hypar):
    """
    `x' is the full data set, including both groups
    `group' is the index (0 or 1) of the Gaussian whose sigma we want to update
    `par' and `hypar' are our params and hyperparams dictionaries
    Instead of returning anything, we UPDATE par in place
    """
    j = (par['g'] == group) # galaxies belonging to this Gaussian
    # ...
    par['p'][group]['loc'] = TBC()
    
TBC_above()


def update_g(x, par, hypar):
    """
    `x' is the full data set, including both groups
    `par' and `hypar' are our params and hyperparams dictionaries
    Instead of returning anything, we UPDATE par in place
    """
    # ...
    par['g'] = TBC()

TBC_above()


def update_f(x, par, hypar):
    """
    `x' is the full data set, including both groups
    `par' and `hypar' are our params and hyperparams dictionaries
    Instead of returning anything, we UPDATE par in place
    """
    # ...
    par['f'] = TBC()
    
TBC_above()


params = deepcopy(guessdict)
update_g(data[1], params, hyperparams)
for k in range(params['Ngauss']):
    update_sigma(k, data[1], params, hyperparams)
for k in range(1, params['Ngauss']): # NB: not updating the mean of component 0 (the cluster)
    update_mu(k, data[1], params, hyperparams)
update_f(data[1], params, hyperparams)
params


plt.rcParams['figure.figsize'] = (6., 4.)
plt.hist(data[1][params['g']==1], label='component 1', color='C1');
plt.hist(data[1][params['g']==0], label='component 0', color='C0');
plt.legend();
print('Fraction of g in component 0:', (params['g']==0).sum()/len(params['g']))


def gibbs_iteration(x, params, hyperparams):
    update_g(x, params, hyperparams)
    for k in range(params['Ngauss']):
        update_sigma(k, x, params, hyperparams)
    for k in range(1, params['Ngauss']):
        update_mu(k, x, params, hyperparams) # NB: not updating the mean of component 0 (the cluster)
    update_f(x, params, hyperparams)


params = deepcopy(guessdict)
for i in range(100):
    gibbs_iteration(data[1], params, hyperparams)
params


np.array([esamples.mean(axis=0), esamples.std(axis=0)])


class GibbsThing:
    def __init__(self, data, iterate, reformat, pnames):
        self.data = data
        self.iterate = iterate
        self.reformat = reformat
        self.pnames = pnames
    def mcmc(self, nchains, chain_length, start_params, hypar, copyfun=deepcopy):
        npars = len(self.pnames)
        gchains = [np.empty((chain_length, npars)) for i in range(nchains)]
        for j in range(len(gchains)):
            params = copyfun(start_params)
            for i in range(gchains[j].shape[0]):
                self.iterate(self.data, params, hypar)
                gchains[j][i,:] = self.reformat(params)
        self.chains = gchains
        plt.rcParams['figure.figsize'] = (16.0, 3.0*npars)
        fig, ax = plt.subplots(npars, 1);
        cr.plot_traces(gchains, ax, labels=self.pnames);
    def check_chains(self, burn, maxlag):
        tmp_samples = [g[burn:,] for g in self.chains]
        print('R =', cr.GelmanRubinR(tmp_samples))
        print('neff =', cr.effective_samples(tmp_samples, maxlag=maxlag))
    def remove_burnin(self, burn):
        self.samples = np.concatenate([g[burn:,] for g in self.chains], axis=0)


gibbs = [GibbsThing(x, gibbs_iteration, param_dict_to_array, param_labels) for x in data]


%time gibbs[1].mcmc(nchains=4, chain_length=2500, start_params=guessdict, hypar=hyperparams)


gibbs[1].check_chains(burn=50, maxlag=1500)


gibbs[1].remove_burnin(burn=50)


plotGTC([esamples, gibbs[1].samples], chainLabels=['emcee','Gibbs'], paramNames=param_labels,
        figureSize=8, customLabelFont={'size':12}, customTickFont={'size':12}, customLegendFont={'size':16});


i = 0
%time gibbs[i].mcmc(4, 2500, guessdict, hyperparams)


gibbs[i].check_chains(50, 1500)


gibbs[i].remove_burnin(50)


compare(data[i], *gibbs[i].samples.mean(axis=0))


i = 2
%time gibbs[i].mcmc(4, 2500, guessdict, hyperparams)


gibbs[i].check_chains(50, 1500)


gibbs[i].remove_burnin(50)


compare(data[i], *gibbs[i].samples.mean(axis=0))


i = 3
%time gibbs[i].mcmc(4, 2500, guessdict, hyperparams)


gibbs[i].check_chains(50, 1500)


gibbs[i].remove_burnin(50)


compare(data[i], *gibbs[i].samples.mean(axis=0))


i = 4
%time gibbs[i].mcmc(4, 2500, guessdict, hyperparams)


gibbs[i].check_chains(50, 1500)


gibbs[i].remove_burnin(50)


compare(data[i], *gibbs[i].samples.mean(axis=0))


i = 5
%time gibbs[i].mcmc(4, 2500, guessdict, hyperparams)


gibbs[i].check_chains(50, 1500)


gibbs[i].remove_burnin(50)


compare(data[i], *gibbs[i].samples.mean(axis=0))


TBC() # answer in Markdown


plotGTC([g.samples for g in gibbs], paramNames=param_labels,
        chainLabels=['bin '+str(i) for i in range(6)], truthLineStyles='-',
        truths = [0.86409348, 0.00259062, 0.00301248, 0.03346776],
        figureSize=12, customLabelFont={'size':12}, customTickFont={'size':12}, customLegendFont={'size':16});

Tutorial: Cluster Membership Fractions of RedMaPPer Galaxies¶

Behold the power of conjugacy!¶

Background¶

Data¶

Model¶

Visualize the model¶

MCMC fit¶

Conjugate Gibbs fit¶

Conjugate updates¶

Updating $g$¶

Updating $f$¶

Implementation¶

Results for all bins¶

Parting thoughts¶