from os import getcwd
from os.path import exists as file_exists
from yaml import safe_load
import numpy as np
from scipy.special import logsumexp
import scipy.stats as st
import matplotlib.pyplot as plt
%matplotlib inline

import emcee
import incredible as cr
from pygtc import plotGTC

thisTutorial = 'model_evaluation'
if getcwd() == '/content':
    # assume we are in Colab, and the user's data directory is linked to their drive/Physics267_data
    from google.colab import drive
    drive.mount('/content/drive')
    datapath = '/content/drive/MyDrive/Physics267_data/' + thisTutorial + '/' 
else:
    # assume we are running locally somewhere and have the data under ./data/
    datapath = 'data/'

y = np.loadtxt(datapath+'offsets.txt')

plt.rcParams['figure.figsize'] = (6.0, 4.0)
bins = np.linspace(0,1000,20)
plt.hist(y, bins=bins);
plt.xlabel("Offset $y$ (kpc)", fontsize=12);
plt.ylabel("Frequency", fontsize=12);

def T(yy):
    """
    Argument: a data vector (either the real data or a simulated data set)
    Returns: a scalar test statistic computed from the argument
    """

T_true = T(y)
print("The test statistic of the real data is T =", T_true)

The test statistic of the real data is T = 118

# This is something we can throw to discourage direct instantiation of the base class
class VirtualClassError(Exception):
    def __init__(self):
        Exception.__init__(self,"Do not directly instantiate the base Model class!")

class Model:
    """
    Base class for inference and model evaluation in a simple cluster mis-centering analysis.
    In all these functions, `params' is a dictionary of model parameters.
    """
    def __init__(self, priors, samples=None):
        # Storage for MCMC samples from fitting the model
        self.samples = samples
        if samples is None:
            self.Nsamples = 0
        else:
            self.Nsamples = samples.shape[0]
        # a dictionary of priors (keys match param_names, values should have scipy distribution-like functionality)
        # for simplicity, assume independent priors for each parameter
        self.priors = priors
        # The __init__ for derived classes should not repeat the lines above, but should provide what follows:
        # # A name for the model!
        # self.name = 
        # # As part of the model definition, store an ordered list of parameter names (for dictionary keys)
        # # Named arguments of other functions that correspond to parameters should appear in this order
        # self.param_names = 
        # # parameter names as display text (for plot labels),
        # self.param_labels = 
        # # The next line finishes initialization by calling the parent class' __init__
        # Model.__init__(self, *args, **kwargs)
  
    def log_likelihood(self, **params):
        """
        Return the log of the likelihood function L(params) = p(y|params,H)
        """
        raise VirtualClassError # to be overriden by child classes
    
    def sampling_distribution(self, yy, **params):
        """
        Return the sampling distribution p(yy|params,H) at a point in data space yy given parameter(s) args
        We expect a vector input yy, and return the corresponding probabilities.
            
        Note: This is useful for making plots of "the model" overlaid on the histogram of the data
        """
        raise VirtualClassError # to be overriden by child classes 
        
    def generate_replica_dataset(self, N, **params):
        """
        Return a replica dataset y_rep of length N from the sampling distribution p(y_rep|args,H) as an array.
        """
        raise VirtualClassError # to be overriden by child classes

    def log_prior(self, **params):
        """
        Return the log prior PDF p(params|H)
        """
        return np.sum([self.priors[k].logpdf(params[k]) for k in params.keys()])

    def draw_samples_from_prior(self, N):
        """
        Return N samples from the prior PDF p(params|H) as a list of dictionaries
        """
        return [{k:self.priors[k].rvs() for k in self.param_names} for i in range(N)]
        
    def log_posterior(self, parameterlist=None, **params):
        """
        Return the log of the (unnormalized) posterior PDF p(params|y,H)
        
        The parameterlist argument is there for compatibility with emcee.
        This will be an infinite loop if you named one of your parameters "parameterlist", but in that case
        you deserve it.
        This also means that for even a simple call, we would need to specify the parameter name(s),
         i.e. not log_posterior(pval), but log_posterior(param=pval). This doesn't seem unreasonable.
        """
        if parameterlist is not None:
            pdict = {k:parameterlist[i] for i,k in enumerate(self.param_names)}
            return self.log_posterior(**pdict)
        lnp = self.log_prior(**params)
        if lnp != -np.inf:
            lnp += self.log_likelihood(**params)
        return lnp

    def draw_samples_from_posterior(self, starting_params, nsteps, nwalkers=8, threads=1):
        """
        Use emcee to draw samples from P(params|y,H). Hopefully it Just Works.
        You could try using e.g. threads=4 to speed things up with multiprocessing.
        """
      
        # The density to sample is this model's own posterior PDF
        npars = len(starting_params)        
        self.sampler = emcee.EnsembleSampler(nwalkers, npars, self.log_posterior, threads=threads) 
        
        # Generate an ensemble of walkers within +/-1% of the guess:
        theta_0 = np.array([starting_params*(1.0 + 0.01*np.random.randn(npars)) for j in range(nwalkers)])
        # Note that the initial parameter array theta_0 should have dimensions nwalkers x npars
        
        # Evolve the ensemble:
        self.sampler.run_mcmc(theta_0, nsteps)
        
        # Plot the raw samples:
        plt.rcParams['figure.figsize'] = (12.0, 4.0*npars)
        fig, ax = plt.subplots(npars, 1);
        cr.plot_traces(self.sampler.chain[:min(8,nwalkers),:,:], ax, labels=self.param_labels);

    def check_chains(self, burn, maxlag):
        '''
        Ignoring `burn` samples from the front of each chain, compute convergence criteria and
        effective number of samples.
        '''
        nwalk, nsteps, npars = self.sampler.chain.shape
        if burn < 1 or burn >= nsteps:
            return
        tmp_samples = [self.sampler.chain[i,burn:,:] for i in range(nwalk)]
        self.R = cr.GelmanRubinR(tmp_samples)
        self.neff = cr.effective_samples(tmp_samples, maxlag=maxlag)
        print('R =', self.R)
        print('neff =', self.neff)
        print('NB: Since walkers are not independent, these will be optimistic!')

    def remove_burnin(self, burn):
        '''
        Remove `burn` samples from the front of each chain, and concatenate
        Plot, and store the result in self.samples
        '''
        nwalk, nsteps, npars = self.sampler.chain.shape
        if burn < 1 or burn >= nsteps:
            return
        self.samples = self.sampler.chain[:,burn:,:].reshape(nwalk*(nsteps-burn), npars)
        self.Nsamples = self.samples.shape[0]
        plt.rcParams['figure.figsize'] = (12.0, 4.0*npars)
        fig, ax = plt.subplots(npars, 1);
        cr.plot_traces(self.samples, ax, labels=self.param_labels);
        
    def posterior_mean(self):
        '''
        Helper function for computing the posterior mean of each parameter (from MCMC samples)
        '''
        m = np.mean(self.samples, axis=0)
        return {k:m[i] for i,k in enumerate(self.param_names)}

class ExponentialModel(Model):
    """
    Simple exponential model for mis-centering.
    """
    def __init__(self, *args, **kwargs):
        # A name for the model!
        self.name = 'Exponential Model'
        # Named arguments of other functions that correspond to parameters should appear in this order
        self.param_names = ['a']
        # parameter names as display text (for plot labels),
        self.param_labels = [r'$a$']
        # The next line finishes initialization by calling the parent class' __init__
        Model.__init__(self, *args, **kwargs)
    
    def log_likelihood(self, a):
        """
        Evaluate the log of the likelihood function L(a) = p(y|a,H)
        """
        return np.sum(st.expon.logpdf(y, scale=a))
    
    def sampling_distribution(self, yy, a):
        """
        Evaluate the sampling distribution PDF p(yy|a,H) at a point in data space yy given parameter value a
        We expect a vector input yy, and return the corresponding probabilities.
            
        Note: This is useful for making plots of "the model" overlaid on the histogram of the data
        """
        return st.expon.pdf(yy, scale=a)
        
    def generate_replica_dataset(self, N, a):
        """
        Return a replica data set y_rep of length N from the sampling distribution p(y_rep|a,H).
        """
        return st.expon.rvs(scale=a, size=N)

# expon_priors = {'a':...}

Model1 = ExponentialModel(expon_priors)

print("12 sample values drawn from the prior of Model1: ", Model1.draw_samples_from_prior(12))

12 sample values drawn from the prior of Model1:  [{'a': np.float64(727.3576276349148)}, {'a': np.float64(307.0519863538348)}, {'a': np.float64(336.73296560586175)}, {'a': np.float64(113.92485398960983)}, {'a': np.float64(508.09518486933024)}, {'a': np.float64(765.3522652407509)}, {'a': np.float64(85.433236103721)}, {'a': np.float64(875.4919896458606)}, {'a': np.float64(751.8351251546003)}, {'a': np.float64(457.1531865269841)}, {'a': np.float64(267.0892677824944)}, {'a': np.float64(763.5849977620708)}]
Note that the prior used here (and below) may not match the one we decide on in class

for a in [1.0, 10.0, 100.0, -3.14]:
    print('Log-posterior for a=', a, ' = ', Model1.log_posterior(a=a))

Log-posterior for a= 1.0  =  -14245.699234078982
Log-posterior for a= 10.0  =  -1776.1746671080894
Log-posterior for a= 100.0  =  -840.0711979651959
Log-posterior for a= -3.14  =  -inf

plt.rcParams['figure.figsize'] = (6.0, 4.0)
plt.hist(Model1.generate_replica_dataset(N=len(y), a=500.), bins=bins, color="lightgray");
plt.xlabel("Center offset $y$ (kpc)", fontsize=12);
plt.ylabel("Frequency (replica)", fontsize=12);

plt.rcParams['figure.figsize'] = (6.0, 4.0)
plt.plot(bins, Model1.sampling_distribution(bins, a=500.));
plt.xlabel("Center offset $y$ (kpc)", fontsize=12);
plt.ylabel("$p(y|a)$", fontsize=12);

%%time
Model1.draw_samples_from_posterior(starting_params=[100.0], nsteps=2500, nwalkers=16)

CPU times: user 14.8 s, sys: 81.5 ms, total: 14.8 s
Wall time: 14.9 s

Model1.check_chains(burn=200, maxlag=200)

R = [1.0048015]
neff = [1332.34037394]
NB: Since walkers are not independent, these will be optimistic!

Model1.remove_burnin(burn=100)

assert Model1.neff[0] >= 1000

print("Posterior mean:", Model1.posterior_mean())

Posterior mean: {'a': np.float64(96.28398736675786)}

This_all_looks_good = False # change to True when true

assert This_all_looks_good

plt.rcParams['figure.figsize'] = (6.0, 4.0)

# First the histogram of observed data, as backdrop:
plt.hist(y, bins=bins, color="skyblue", density=True, label="Observed")

# Now overlay a curve following the sampling distribution conditioned on the posterior mean value of a1:
yy = np.linspace(bins.min(), bins.max(), len(bins)*10)
pp = Model1.sampling_distribution(bins, **Model1.posterior_mean())
plt.plot(bins, pp, linestyle="dashed", color="red", label="Posterior mean model")

plt.xlabel("Offset $y$ (kpc)", fontsize=12)
plt.ylabel("Normalized Frequency", fontsize=12)
plt.legend();

plt.rcParams['figure.figsize'] = (6.0, 4.0)

# First the histogram of observed data, as backdrop:
plt.hist(y, bins=bins, color="skyblue", density=True, label="Observed");

# Choose a posterior sample at random and generate a replica dataset, and show its histogram
j = np.random.randint(0, Model1.Nsamples)
mock = Model1.generate_replica_dataset(len(y), *Model1.samples[j,:])
plt.hist(mock, bins=bins, alpha=1.0, histtype="step", color="red", density=True, label="Sample posterior prediction")

plt.xlabel("Offset $y$ (kpc)", fontsize=12)
plt.ylabel("Normalized Frequency", fontsize=12)
plt.legend();

def visual_check(axes, Model, Nreps=None):
    # First the histogram of observed data, as backdrop:
    axes.hist(y, bins=bins, color="skyblue", density=True, label="Observed");

    # Compute the posterior mean parameters
    pm = Model.posterior_mean()

    # Make a large number of replica datasets, and overlay histograms of them all 
    if Nreps is None: Nreps = len(Model.samples)
    alpha = 5.0 / Nreps
    for jj in np.round(np.linspace(0, len(Model.samples), num=Nreps, endpoint=False)): 
        j = int(jj)
        if j==0:  
            # Plot a dataset drawn from the posterior mean, to give a good legend
            mock = Model.generate_replica_dataset(len(y), **pm)
            axes.hist(mock, bins=bins, histtype="step", alpha=1.0, color="red", density=True,
                     label="Sample posterior predictions");
        else:     
            # Take the next posterior sample a and generate a replica dataset
            mock = Model.generate_replica_dataset(len(y), *Model.samples[j,:])
            axes.hist(mock, bins=bins, histtype="step", alpha=alpha, color="red", density=True);

    # Include the posterior mean model for comparison
    yy = np.linspace(bins.min(), bins.max(), len(bins)*10)
    pp = Model.sampling_distribution(yy, **pm)
    axes.plot(yy, pp, linestyle="dashed", color="red", label="Posterior mean model");

    axes.set_title(Model.name, fontsize=14)
    axes.set_xlabel("Offset $y$ (kpc)", fontsize=12)
    axes.set_ylabel("Normalized Frequency", fontsize=12)
    axes.legend();

plt.rcParams['figure.figsize'] = (6.0, 4.0)
fig, ax = plt.subplots(1)
visual_check(ax, Model1, Nreps=100);

I_have_thought_about_the_above_question = False # change to True when true

assert I_have_thought_about_the_above_question

def distribution_of_T(Model):
    """
    Compute T(yrep) for each yrep drawn from the posterior predictive distribution,
    using parameter samples stored in Model.
    """
    return np.array([T(Model.generate_replica_dataset(len(y), *p)) for p in Model.samples])

def pvalue(Model):
    """
    Return the posterior predictive p-value, P(T > T(y)|y,H):
    """

def posterior_predictive_check(Model, nbins=25):
    """
    Compute the posterior predictive distribution of the test statistic T(y_rep), and compare with T(y_obs) 
    """
    # Compute distribution of T(yrep):
    TT = distribution_of_T(Model)
    
    # Plot:
    plt.rcParams['figure.figsize'] = (7.0, 5.0)
    plt.hist(TT, bins=nbins, histtype="step", color="red", label="$P(T(y_{\\rm rep})|y)$")
    
    # Overlay T(y_obs):
    plt.axvline(x=T_true, color="gray", linestyle="dashed", label="$T(y_{\\rm observed})$")
    plt.xlabel("Test statistic T(y)", fontsize=12)
    plt.ylabel("Posterior predictive probability density", fontsize=12)
    plt.legend();
    
    # Compute p-value:
    p = pvalue(Model)
    print("p-value =", p)

    return p

p1 = posterior_predictive_check(Model1)

p-value = 0.0009635416666666667

This_checks_out_and_makes_sense = False # change to True if true

assert This_checks_out_and_makes_sense

# class AlternativeModel(Model):
#     ....

# alt_priors = {...}

Model2 = AlternativeModel(alt_priors)

for pars in Model2.draw_samples_from_prior(12):
    print(pars)

{'param1': np.float64(0.01389095381986083), 'param2': np.float64(0.8035291913252819)}
{'param1': np.float64(0.8461334461554492), 'param2': np.float64(0.8717777387295593)}
{'param1': np.float64(0.4632988244284161), 'param2': np.float64(0.3168974981161762)}
{'param1': np.float64(0.5785665691920485), 'param2': np.float64(0.8574218356793406)}
{'param1': np.float64(0.9779222040088376), 'param2': np.float64(0.8095346084677374)}
{'param1': np.float64(0.9502190656853983), 'param2': np.float64(0.653530563065794)}
{'param1': np.float64(0.8936745455923217), 'param2': np.float64(0.2650802756331634)}
{'param1': np.float64(0.6266555117798686), 'param2': np.float64(0.2783939401349742)}
{'param1': np.float64(0.4081740672808708), 'param2': np.float64(0.010736923999304593)}
{'param1': np.float64(0.33229705450592617), 'param2': np.float64(0.08997453955135737)}
{'param1': np.float64(0.2844501626019076), 'param2': np.float64(0.9205648566857763)}
{'param1': np.float64(0.22223343446526467), 'param2': np.float64(0.1690792186109169)}
Note that the model and priors used here (and below) may not match the ones we decide on in class

for pars in Model2.draw_samples_from_prior(5):
    print('Log-posterior for', pars, '=', Model2.log_posterior(**pars))

Log-posterior for {'param1': np.float64(0.9484111265991709), 'param2': np.float64(0.35008052358881037)} = -3153.2090533737755
Log-posterior for {'param1': np.float64(0.2995109968073547), 'param2': np.float64(0.6782952504289327)} = -887.3137946876261
Log-posterior for {'param1': np.float64(0.4271204943604747), 'param2': np.float64(0.322002085941422)} = -1089.3790118487182
Log-posterior for {'param1': np.float64(0.042181781483559), 'param2': np.float64(0.5274328593466929)} = -1299.5961872082398
Log-posterior for {'param1': np.float64(0.37180097276747237), 'param2': np.float64(0.3331120154290481)} = -966.4614009016556
Note that the model and priors used here (and below) may not match the ones we decide on in class

plt.rcParams['figure.figsize'] = (6.0, 4.0)
plt.hist(Model2.generate_replica_dataset(len(y), **Model2.draw_samples_from_prior(1)[0]), bins=bins, color="lightgray");
plt.xlabel("Offset $y$ (kpc)", fontsize=12);
plt.ylabel("Frequency (replica)", fontsize=12);

plt.rcParams['figure.figsize'] = (6.0, 4.0)
yy = np.linspace(bins.min(), bins.max(), len(bins)*10)
plt.plot(yy, Model2.sampling_distribution(yy, **Model2.draw_samples_from_prior(1)[0]));
plt.xlabel("Measured distance $y$", fontsize=12);
plt.ylabel("$p(y|a)$", fontsize=12);

%%time
# Model2.draw_samples_from_posterior(starting_params=[...], nsteps=..., nwalkers=...)

CPU times: user 24.9 s, sys: 108 ms, total: 25 s
Wall time: 25 s

# Model2.check_chains(burn=..., maxlag=...)

R = [1.00493771 1.00525318]
neff = [1338.95282033 1247.8696096 ]
NB: Since walkers are not independent, these will be optimistic!

# Model2.remove_burnin(...)

assert np.all(Model2.neff >= 1000)

plotGTC(Model2.samples, paramNames=Model2.param_labels, figureSize=6,
        customLabelFont={'size':12}, customTickFont={'size':12}, customLegendFont={'size':16});

print("Posterior mean parameters = ", Model2.posterior_mean())

Posterior mean parameters =  {'param1': np.float64(0.09781076455568112), 'param2': np.float64(0.12476844271779633)}

plt.rcParams['figure.figsize'] = (14.0, 5.0)
fig, ax = plt.subplots(1,2)
visual_check(ax[0], Model1, Nreps=100);
visual_check(ax[1], Model2, Nreps=100);

p2 = posterior_predictive_check(Model2)

p-value = 0.005535714285714285

I_have_pondered_the_above = False # change to True when true

assert I_have_pondered_the_above

def DIC(Model):
    """
    Compute the Deviance Information Criterion for the given model.
    (In a less pedagogical world, this would logically be a method of the base Model class.)
    Return a tuple: (DIC,pD)
    """
    # Compute the deviance D for each sample
    D = -2.0*np.array([ Model.log_likelihood(*params) for params in Model.samples ])
    #pD = 
    #DIC = 
    #return DIC, pD

DIC1, pD1 = DIC(Model1)
print(Model1.name+':')
print("Effective number of fitted parameters =", pD1)
print("DIC =", DIC1)

Exponential Model:
Effective number of fitted parameters = 1.0122735932332034
DIC = 1667.981707101023

DIC2, pD2 = DIC(Model2)
print(Model2.name+':')
print("Effective number of fitted parameters =", pD2)
print("DIC =", DIC2)

Slightly Better Model:
Effective number of fitted parameters = 1.8845280132823063
DIC = 1642.8226541457996

DIC1 - DIC2, np.exp(0.5*(DIC1-DIC2))

(np.float64(25.159052955223387), np.float64(290548.69837042346))

I_have_an_answer = False # change to True when true

assert I_have_an_answer

def log_evidence(Model, N=1000):
    """
    Estimate the log-evidence for the model using N samples from the prior
    """

log_evidence(Model1, N=2), log_evidence(Model2, N=2)

print('Model 1:')
for Nevidence in [1, 10, 100, 1000, 10000]:
    %time logE1 = log_evidence(Model1, N=Nevidence)
    print("From", Nevidence, "samples, the log-evidence is", logE1)

Model 1:
CPU times: user 775 μs, sys: 140 μs, total: 915 μs
Wall time: 1.03 ms
From 1 samples, the log-evidence is -1031.6045315628517
CPU times: user 4.12 ms, sys: 482 μs, total: 4.6 ms
Wall time: 4.57 ms
From 10 samples, the log-evidence is -835.5219816446375
CPU times: user 31.2 ms, sys: 1.61 ms, total: 32.8 ms
Wall time: 33.5 ms
From 100 samples, the log-evidence is -836.5067436340679

CPU times: user 326 ms, sys: 6.49 ms, total: 333 ms
Wall time: 388 ms
From 1000 samples, the log-evidence is -836.8428324442315

CPU times: user 3.27 s, sys: 80.5 ms, total: 3.35 s
Wall time: 4.37 s
From 10000 samples, the log-evidence is -836.8746637433588

print('Model 2:')
for Nevidence in [1, 10, 100, 1000, 10000]:
    %time logE2 = log_evidence(Model2, N=Nevidence)
    print("From", Nevidence, "samples, the log-evidence is", logE2)

Model 2:
CPU times: user 833 μs, sys: 148 μs, total: 981 μs
Wall time: 998 μs
From 1 samples, the log-evidence is -1068.1995631974924
CPU times: user 6.16 ms, sys: 1.1 ms, total: 7.26 ms
Wall time: 7.56 ms
From 10 samples, the log-evidence is -852.7993233549583
CPU times: user 45.9 ms, sys: 2.32 ms, total: 48.2 ms
Wall time: 54.3 ms
From 100 samples, the log-evidence is -825.6775636366577

CPU times: user 423 ms, sys: 13.3 ms, total: 436 ms
Wall time: 481 ms
From 1000 samples, the log-evidence is -828.9747236104766

CPU times: user 3.17 s, sys: 46.4 ms, total: 3.22 s
Wall time: 3.29 s
From 10000 samples, the log-evidence is -826.8910250245049

It_looks_convincingly_converged = False # change to True when true

assert It_looks_convincingly_converged

logE2 - logE1, np.exp(logE2 - logE1)

(np.float64(9.983638718853967), np.float64(21669.016731400014))

prior_H1 = 0.9 # or your choice
prior_H2 = 1.0 - prior_H1 # assuming only these two options

log_post_H1 = np.log(prior_H1) + logE1
log_post_H2 = np.log(prior_H2) + logE2

print('Difference of log posteriors (H2-H1):', log_post_H2-log_post_H1)
print('Ratio of posteriors (H2/H1):', np.exp(log_post_H2-log_post_H1)) # NB this one might over/underflow

Difference of log posteriors (H2-H1): 7.786414141517753
Ratio of posteriors (H2/H1): 2407.668525711124

Tutorial: Evaluation of Galaxy Cluster Centering Models¶

The Dataset¶

1. Choosing a Test Statistic¶

Setting up a Computational Framework¶

2. Evaluating a Simple Model¶

2a. Implementation in code¶

2b. Fit the model to the data¶

2c. Visually compare the posterior predictions with the data.¶

2c. Quantitative posterior predictive model check¶

3. Evaluating a more complex model¶

3. Compare the models using the DIC¶

4. Compare the models using the evidence¶