TutorialName = 'missing_data'
exec(open('tbc.py').read()) # define TBC and TBC_above
from io import StringIO
import numpy as np
from pygtc import plotGTC
import emcee
import incredible as cr
import matplotlib.pyplot as plt
%matplotlib inline


oring_data_string = \
"""# temperature incidents
53 3
56 1
57 1
63 1
66 0
67 0
67 0
67 0
68 0
69 0
70 1
70 1
70 0
70 0
72 0
73 0
75 2
75 0
76 0
76 0
78 0
79 0
80 0
81 0
"""
oring_data = np.loadtxt(StringIO(oring_data_string), skiprows=1)
oring_temps = oring_data[:,0]
oring_incidents = oring_data[:,1]


plt.rcParams['figure.figsize'] = (6, 4)
plt.plot(oring_temps, oring_incidents, 'bo');
plt.xlabel('temperature (F)');
plt.ylabel('Number of incidents');


failure_temps = oring_temps[np.where(oring_incidents > 0)[0]]
Nfailure = len(failure_temps)
success_temps = oring_temps[np.where(oring_incidents == 0)[0]]
Nsuccess = len(success_temps)
print('temperatures corresponding to failures:', failure_temps)
print('temperatures corresponding to successes:', success_temps)


def P_success(T, T0, beta, Pcold, Phot):
    """
    Evaluate Psuccess as given above, as a function of T, for parameters T0, beta, Pcold, Phot.
    """
    TBC()
    
TBC_above()


plt.rcParams['figure.figsize'] = (6, 4)
T_axis = np.arange(32., 100.)
plt.plot(T_axis, P_success(T_axis, 70.0, 0.3, 0.0, 1.0));
plt.plot(T_axis, P_success(T_axis, 65.0, 0.1, 0.4, 0.9));
plt.plot(T_axis, P_success(T_axis, 45.0, 1.0, 0.1, 0.5));
plt.plot(T_axis, P_success(T_axis, 80.0, 0.5, 0.9, 0.2));
plt.xlabel('temperature (F)');
plt.ylabel('probability of a clean launch');


TBC() # answer in Markdown


def ln_prior(T0, beta, Pcold, Phot):
    """
    Return the log-prior density for parameters T0, beta, Pcold, Phot
    """
    TBC()
    
TBC_above()


class Model:
    def __init__(self, log_prior, log_likelihood):
        self.log_prior = log_prior
        self.log_likelihood = log_likelihood
        self.param_names = ['T0', 'beta', 'Pcold', 'Phot']
        self.param_labels = [r'$T_0$', r'$\beta$', r'$P_\mathrm{cold}$', r'$P_\mathrm{hot}$']
        self.sampler = None
        self.samples = None
    def log_posterior(self, pvec=None, **params):
        '''
        Our usual log-posterior function, able to take a vector argument to satisfy emcee
        '''
        if pvec is not None:
            pdict = {k:pvec[i] for i,k in enumerate(self.param_names)}
            return self.log_posterior(**pdict)
        lnp = self.log_prior(**params)
        if lnp != -np.inf:
            lnp += self.log_likelihood(**params)
        return lnp
    def sample_posterior(self, nwalkers=8, nsteps=10000, guess=[65.0, 0.1, 0.25, 0.75], threads=1):
        # use emcee to sample the posterior
        npars = len(self.param_names)
        self.sampler = emcee.EnsembleSampler(nwalkers, npars, self.log_posterior, threads=threads)
        start = np.array([np.array(guess)*(1.0 + 0.01*np.random.randn(npars)) for j in range(nwalkers)])
        self.sampler.run_mcmc(start, nsteps)
        plt.rcParams['figure.figsize'] = (16.0, 3.0*npars)
        fig, ax = plt.subplots(npars, 1);
        cr.plot_traces(self.sampler.chain[:min(8,nwalkers),:,:], ax, labels=self.param_labels);
    def check_chains(self, burn=500, maxlag=500):
        '''
        Ignoring `burn` samples from the front of each chain, compute convergence criteria and
        effective number of samples.
        '''
        nwalk, nsteps, npars = self.sampler.chain.shape
        if burn < 1 or burn >= nsteps:
            return
        tmp_samples = [self.sampler.chain[i,burn:,:] for i in range(nwalk)]
        print('R =', cr.GelmanRubinR(tmp_samples))
        print('neff =', cr.effective_samples(tmp_samples, maxlag=maxlag))
        print('NB: Since walkers are not independent, these will be optimistic!')
    def remove_burnin(self, burn=500):
        '''
        Remove `burn` samples from the front of each chain, and concatenate.
        Store the result in self.samples.
        '''
        nwalk, nsteps, npars = self.sampler.chain.shape
        if burn < 1 or burn >= nsteps:
            return
        self.samples = self.sampler.chain[:,burn:,:].reshape(nwalk*(nsteps-burn), npars)
    def posterior_prediction_Pfailure(self, temperatures=np.arange(30., 85.), probs=[0.5, 0.16, 0.84]):
        '''
        For the given temperatures, compute and store quantiles of the posterior predictive distribution for O-ring failure.
        By default, return the median and a 68% credible interval (defined via quantiles).
        '''
        Pfail = np.array([1.0-P_success(T, self.samples[:,0], self.samples[:,1], self.samples[:,2], self.samples[:,3]) for T in temperatures])
        res = {'T':temperatures, 'p':[str(p) for p in probs]}
        for p in probs:
            res[str(p)] = np.quantile(Pfail, p, axis=1)
        self.post_failure = res
    def plot_Pfailure(self, color, label):
        '''
        Plot summaries of the posterior predictive distribution for O-ring failure.
        Show the center as a solid line and credible interval(s) bounded by dashed lines.
        '''
        plt.plot(self.post_failure['T'], self.post_failure[self.post_failure['p'][0]], color+'-', label=label)
        n = len(self.post_failure['p'])
        if n > 1:
            for j in range(1,n):
                plt.plot(self.post_failure['T'], self.post_failure[self.post_failure['p'][j]], color+'--')


def ln_like_complete(T0, beta, Pcold, Phot):
    """
    Return the log-likelihood corresponding to a complete data set
    (Go ahead and access `failure_temps' and `success_temps' from global scope)
    """
    TBC()
    
TBC_above()


complete_model = Model(ln_prior, ln_like_complete)


%%time
complete_model.sample_posterior(nwalkers=8, nsteps=10000, guess=[65.0, 0.1, 0.25, 0.75])


complete_model.check_chains(burn=1000, maxlag=2000)


complete_model.remove_burnin(burn=1000)
plotGTC(complete_model.samples, paramNames=complete_model.param_labels,
       figureSize=8, customLabelFont={'size':12}, customTickFont={'size':12}, customLegendFont={'size':16});


complete_model.posterior_prediction_Pfailure()

plt.rcParams['figure.figsize'] = (6., 4.)
complete_model.plot_Pfailure('C0', 'complete')
plt.xlabel(r'$T$');
plt.ylabel(r'$P_\mathrm{failure}(T)$');
plt.legend();


TBC() # answer in Markdown


j = np.where(complete_model.post_failure['T']==36.)[0]
thing = [float(complete_model.post_failure[p][j]) for p in complete_model.post_failure['p']]
print('Pfailure(T=36F|data) =', round(thing[0],2), '+'+str(round(thing[2]-thing[0],2)), round(thing[1]-thing[0],2))


success_Tmin = np.min(success_temps)
success_Tmax = np.max(success_temps)
Nsuccess = len(success_temps)


TBC() # answer in Markdown


def ln_like_censored(T0, beta, Pcold, Phot):
    """
    Return the log-likelihood for the case of censored success temperatures.
    This prototype assumes the success temperatures will be marginalized over within this function; otherwise
    they would need to be additional parameters to be sampled.
    
    (Go ahead and access `failure_temps', `Nsuccess', `success_Tmin' and `success_Tmax' from global scope)
    """
    TBC()

TBC_above()


censored_model = Model(ln_prior, ln_like_censored)


%%time
censored_model.sample_posterior(nwalkers=8, nsteps=10000, guess=[65.0, 0.1, 0.25, 0.75])


censored_model.check_chains(burn=1000, maxlag=2000)


censored_model.remove_burnin(burn=1000)
plotGTC([complete_model.samples, censored_model.samples], paramNames=complete_model.param_labels,
        chainLabels=['complete', 'censored'],
        figureSize=8, customLabelFont={'size':12}, customTickFont={'size':12}, customLegendFont={'size':16});


censored_model.posterior_prediction_Pfailure()

plt.rcParams['figure.figsize'] = (6., 4.)
complete_model.plot_Pfailure('C0', 'complete')
censored_model.plot_Pfailure('C1', 'censored')
plt.xlabel(r'$T$');
plt.ylabel(r'$P_\mathrm{failure}(T)$');
plt.legend();


TBC() # answer in Markdown


j = np.where(censored_model.post_failure['T']==75.)[0]
thing = [float(censored_model.post_failure[p][j]) for p in censored_model.post_failure['p']]
print('Pfailure(T=75F|data) =', round(thing[0],2), '+'+str(round(thing[2]-thing[0],2)), round(thing[1]-thing[0],2))


success_Tmin = 45.0
success_Tmax = 80.0


verycensored_model = Model(ln_prior, ln_like_censored)


%%time
verycensored_model.sample_posterior(nwalkers=8, nsteps=10000, guess=[65.0, 0.1, 0.25, 0.75])


verycensored_model.check_chains(burn=1000, maxlag=2000)


verycensored_model.remove_burnin(burn=1000)
plotGTC([complete_model.samples, censored_model.samples, verycensored_model.samples], paramNames=complete_model.param_labels,
        chainLabels=['complete', 'censored', 'very censored'],
        figureSize=8, customLabelFont={'size':12}, customTickFont={'size':12}, customLegendFont={'size':16});


verycensored_model.posterior_prediction_Pfailure()

plt.rcParams['figure.figsize'] = (7., 5.)
complete_model.plot_Pfailure('C0', 'complete')
censored_model.plot_Pfailure('C1', 'censored')
verycensored_model.plot_Pfailure('C2', 'very censored')
plt.axvline(36.0, color='k', linestyle='dotted')
plt.xlabel(r'$T$');
plt.ylabel(r'$P_\mathrm{failure}(T)$');
plt.legend();


TBC() # answer in Markdown


j = np.where(verycensored_model.post_failure['T']==60.)[0]
thing = [float(verycensored_model.post_failure[p][j]) for p in verycensored_model.post_failure['p']]
print('Pfailure(T=60F|data) =', round(thing[0],2), '+'+str(round(thing[2]-thing[0],2)), round(thing[1]-thing[0],2))


TBC() # your wisdom in Markdown

Tutorial: O-ring Failure Rates Prior to the Challenger Shuttle Loss¶

Coping with missing information¶

Background¶

Temperature Effects¶

1. Defining a model¶

1a. Implement this function and have a look¶

1b. PGM and priors¶

1c. Model fitting code¶

2. Solution for complete data¶

3. Censored (but somewhat informed) success temperatures¶

3a. Censored model definition¶

3b. Censored model fit¶

4. Censored (less informed) success temperatures¶

5. Draw a conclusion¶

6. OPTIONAL: Truncated success temperatures¶