Skip to content
Snippets Groups Projects
stan_modelling_theoretic.py 42.1 KiB
Newer Older
  • Learn to ignore specific revisions
  • #!/usr/bin/env python3
    # -*- coding: utf-8 -*-
    
    Riku-Laine's avatar
    Riku-Laine committed
    '''
    # Author: Riku Laine
    
    # Date: 12AUG2019
    
    Riku-Laine's avatar
    Riku-Laine committed
    # Project name: Potential outcomes in model evaluation
    # Description: This script creates the figures and results used 
    #              in synthetic data experiments.
    #
    # Parameters:
    # -----------
    
    # (1) figure_path : file location for saving the created figures.
    
    Riku-Laine's avatar
    Riku-Laine committed
    # (2) N_sim : Size of simulated data set.
    # (3) M_sim : Number of judges in simulated data, 
    #             N_sim must be divisible by M_sim!
    
    # (4) which : Which data + outcome analysis should be performed. Ranges from
    #             1 to 14. See at the end of the script which figure is which.
    #             Separate execution preferred for keeping runtime and memory decent.
    # (5) group_amount : How many groups if non-linear model is used.
    
    Riku-Laine's avatar
    Riku-Laine committed
    # (6) stan_code_file_name : Name of file containing the stan model code.
    
    # (7) sigma_tau : Values of prior variance.
    
    Riku-Laine's avatar
    Riku-Laine committed
    #
    '''
    # Refer to the `notes.tex` file for explanations about the modular framework.
    
    # Imports
    
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    import scipy.stats as scs
    import scipy.special as ssp
    import numpy.random as npr
    from sklearn.linear_model import LogisticRegression
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.model_selection import train_test_split
    import pystan
    import gc
    
    plt.switch_backend('agg')
    
    import sys
    
    # figure storage name
    figure_path = sys.argv[1]
    
    # Size of simulated data set
    N_sim = int(sys.argv[2])
    
    # Number of judges in simulated data, N_sim must be divisible by M_sim!
    M_sim = int(sys.argv[3])
    
    # Which data + outcome generation should be performed.
    which = int(sys.argv[4])
    
    # How many groups if jung model is used
    group_amount = int(sys.argv[5])
    
    # Name of stan model code file
    stan_code_file_name = sys.argv[6]
    
    # Variance prior
    sigma_tau = float(sys.argv[7])
    
    # Settings
    
    plt.rcParams.update({'font.size': 16})
    plt.rcParams.update({'figure.figsize': (10, 6)})
    
    print("These results have been obtained with the following settings:")
    
    print("Number of observations in the simulated data:", N_sim)
    
    print("Number of judges in the simulated data:", M_sim)
    
    print("Number of groups:", group_amount)
    
    print("Prior for the variances:", sigma_tau)
    
    # Basic functions
    
    
    def inverseLogit(x):
    
    Riku-Laine's avatar
    Riku-Laine committed
        return 1.0 / (1.0 + np.exp(-1.0 * x))
    
    
    def logit(x):
        return np.log(x) - np.log(1.0 - x)
    
    
    
    def inverseCumulative(x, mu, sigma):
    
    Riku-Laine's avatar
    Riku-Laine committed
        '''Compute the inverse of the cumulative distribution of logit-normal
        distribution at x with parameters mu and sigma (mean and st.dev.).'''
    
    
        return inverseLogit(ssp.erfinv(2 * x - 1) * np.sqrt(2 * sigma**2) - mu)
    
    def standardError(p, n):
        denominator = p * (1 - p)
        
        return np.sqrt(denominator / n)
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
    # ## Data generation modules
    
    def bernoulliDGWithoutUnobservables(N_total=50000):
        '''Generates data | Variables: X, Y | Outcome from Bernoulli'''
    
        df = pd.DataFrame()
    
        # Sample feature X from standard Gaussian distribution, N(0, 1).
        df = df.assign(X=npr.normal(size=N_total))
    
    
        # Calculate P(Y=0|X=x) = 1 / (1 + exp(-X)) = inverseLogit(X)
        df = df.assign(probabilities_Y=inverseLogit(df.X))
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
        # Draw Y ~ Bernoulli(1 - inverseLogit(X))
        # Note: P(Y=1|X=x) = 1 - P(Y=0|X=x) = 1 - inverseLogit(X)
    
    Riku-Laine's avatar
    Riku-Laine committed
        results = npr.binomial(n=1, p=1 - df.probabilities_Y, size=N_total)
    
        df = df.assign(result_Y=results)
    
        return df
    
    
    def thresholdDGWithUnobservables(N_total=50000,
                                     beta_X=1.0,
                                     beta_Z=1.0,
                                     beta_W=0.2):
        '''Generates data | Variables: X, Z, W, Y | Outcome by threshold'''
    
        df = pd.DataFrame()
    
        # Sample the variables from standard Gaussian distributions.
        df = df.assign(X=npr.normal(size=N_total))
        df = df.assign(Z=npr.normal(size=N_total))
        df = df.assign(W=npr.normal(size=N_total))
    
        # Calculate P(Y=0|X, Z, W)
    
        probabilities_Y = inverseLogit(beta_X * df.X + beta_Z * df.Z + beta_W * df.W)
    
    Riku-Laine's avatar
    Riku-Laine committed
    
        df = df.assign(probabilities_Y=probabilities_Y)
    
        # Result is 0 if P(Y = 0| X = x; Z = z; W = w) >= 0.5 , 1 otherwise
        df = df.assign(result_Y=np.where(df.probabilities_Y >= 0.5, 0, 1))
    
        return df
    
    
    def bernoulliDGWithUnobservables(N_total=50000,
                                    beta_X=1.0,
                                    beta_Z=1.0,
                                    beta_W=0.2):
        '''Generates data | Variables: X, Z, W, Y | Outcome from Bernoulli'''
        
        df = pd.DataFrame()
    
        # Sample feature X, Z and W from standard Gaussian distribution, N(0, 1).
        df = df.assign(X=npr.normal(size=N_total))
        df = df.assign(Z=npr.normal(size=N_total))
        df = df.assign(W=npr.normal(size=N_total))
    
    
        # Calculate P(Y=0|X=x) = 1 / (1 + exp(-X)) = inverseLogit(X)
        probabilities_Y = inverseLogit(beta_X * df.X + beta_Z * df.Z + beta_W * df.W)
    
    Riku-Laine's avatar
    Riku-Laine committed
    
        df = df.assign(probabilities_Y=probabilities_Y)
    
        # Draw Y from Bernoulli distribution
        results = npr.binomial(n=1, p=1 - df.probabilities_Y, size=N_total)
    
        df = df.assign(result_Y=results)
    
        return df
    
    
    # ## Decider modules
    
    def humanDeciderLakkaraju(df,
                              featureX_col,
                              featureZ_col=None,
                              nJudges_M=100,
                              beta_X=1,
                              beta_Z=1,
                              add_epsilon=True):
        '''Decider module | Non-independent batch decisions.'''
    
        # Decider as specified by Lakkaraju et al.
    
    Riku-Laine's avatar
    Riku-Laine committed
    
        # Assert that every judge will have the same number of subjects.
        assert df.shape[0] % nJudges_M == 0, "Can't assign subjets evenly!"
    
        # Compute the number of subjects allocated for each judge.
        nSubjects_N = int(df.shape[0] / nJudges_M)
    
        # Assign judge IDs as running numbering from 0 to nJudges_M - 1
        df = df.assign(judgeID_J=np.repeat(range(0, nJudges_M), nSubjects_N))
    
        # Sample acceptance rates uniformly from a closed interval
        # from 0.1 to 0.9 and round to tenth decimal place.
    
        acceptance_rates = npr.uniform(.1, .9, nJudges_M)
    
    Riku-Laine's avatar
    Riku-Laine committed
        acceptance_rates = np.round(acceptance_rates, 10)
    
    
    Riku-Laine's avatar
    Riku-Laine committed
        # Replicate the rates so they can be attached to the corresponding judge ID.
        df = df.assign(acceptanceRate_R=np.repeat(acceptance_rates, nSubjects_N))
    
    
    Riku-Laine's avatar
    Riku-Laine committed
        if add_epsilon:
            epsilon = np.sqrt(0.1) * npr.normal(size=df.shape[0])
        else:
            epsilon = 0
        
    
        # Compute probability for jail decision
    
    Riku-Laine's avatar
    Riku-Laine committed
        if featureZ_col is None:
    
            probabilities_T = inverseLogit(beta_X * df[featureX_col] + epsilon)
    
    Riku-Laine's avatar
    Riku-Laine committed
        else:
    
            probabilities_T = inverseLogit(beta_X * df[featureX_col] +
    
    Riku-Laine's avatar
    Riku-Laine committed
                                        beta_Z * df[featureZ_col] + epsilon)
    
    
        df = df.assign(probabilities_T=probabilities_T)
    
        # Sort by judges then probabilities in decreasing order
        # Most dangerous for each judge are at the top.
        df.sort_values(by=["judgeID_J", "probabilities_T"],
                       ascending=False,
                       inplace=True)
    
        # Iterate over the data. Subject will be given a negative decision
        # if they are in the top (1-r)*100% of the individuals the judge will judge.
        # I.e. if their within-judge-index is under 1 - acceptance threshold times
        # the number of subjects assigned to each judge they will receive a
        # negative decision.
        df.reset_index(drop=True, inplace=True)
    
        df['decision_T'] = np.where((df.index.values % nSubjects_N) <
                                    ((1 - df['acceptanceRate_R']) * nSubjects_N),
                                    0, 1)
    
        df_labeled = df.copy()
    
        # Hide unobserved
        df_labeled.loc[df.decision_T == 0, 'result_Y'] = np.nan
    
        return df_labeled, df
    
    
    def bernoulliDecider(df,
                        featureX_col,
                        featureZ_col=None,
                        nJudges_M=100,
                        beta_X=1,
                        beta_Z=1,
                        add_epsilon=True):
        '''Use X and Z to make a decision with probability 
    
        P(T=0|X, Z)=inverseLogit(beta_X*X+beta_Z*Z).'''
        # No leniencies involved
        
    
    Riku-Laine's avatar
    Riku-Laine committed
        # Assert that every judge will have the same number of subjects.
        assert df.shape[0] % nJudges_M == 0, "Can't assign subjets evenly!"
    
        # Compute the number of subjects allocated for each judge.
        nSubjects_N = int(df.shape[0] / nJudges_M)
    
        # Assign judge IDs as running numbering from 0 to nJudges_M - 1
        df = df.assign(judgeID_J=np.repeat(range(0, nJudges_M), nSubjects_N))
    
        if add_epsilon:
            epsilon = np.sqrt(0.1) * npr.normal(size=df.shape[0])
        else:
            epsilon = 0
        
        if featureZ_col is None:
    
            probabilities_T = inverseLogit(beta_X * df[featureX_col] + epsilon)
    
    Riku-Laine's avatar
    Riku-Laine committed
        else:
    
            probabilities_T = inverseLogit(beta_X * df[featureX_col] +
    
    Riku-Laine's avatar
    Riku-Laine committed
                                        beta_Z * df[featureZ_col] + epsilon)
    
        df = df.assign(probabilities_T=probabilities_T)
    
        # Draw T from Bernoulli distribution
        decisions = npr.binomial(n=1, p=1 - df.probabilities_T, size=df.shape[0])
    
        df = df.assign(decision_T=decisions)
    
        # Calculate the acceptance rates.
        acceptance_rates = df.groupby('judgeID_J').mean().decision_T.values
    
        # Replicate the rates so they can be attached to the corresponding judge ID.
        df = df.assign(acceptanceRate_R=np.repeat(acceptance_rates, nSubjects_N))
    
        df_labeled = df.copy()
    
        df_labeled.loc[df.decision_T == 0, 'result_Y'] = np.nan
    
        return df_labeled, df
    
    
    def quantileDecider(df,
                        featureX_col,
                        featureZ_col=None,
                        nJudges_M=100,
                        beta_X=1,
                        beta_Z=1,
    
                        add_epsilon=True,
                        leniency_upper_limit=0.9):
    
    Riku-Laine's avatar
    Riku-Laine committed
        '''Assign decisions by the value of inverse cumulative distribution function
        of the logit-normal distribution at leniency r.'''
        
        # Assert that every judge will have the same number of subjects.
        assert df.shape[0] % nJudges_M == 0, "Can't assign subjets evenly!"
    
        # Compute the number of subjects allocated for each judge.
        nSubjects_N = int(df.shape[0] / nJudges_M)
    
        # Assign judge IDs as running numbering from 0 to nJudges_M - 1
        df = df.assign(judgeID_J=np.repeat(range(0, nJudges_M), nSubjects_N))
    
        # Sample acceptance rates uniformly from a closed interval
        # from 0.1 to 0.9 and round to tenth decimal place.
    
        acceptance_rates = npr.uniform(.1, leniency_upper_limit, nJudges_M)
    
    Riku-Laine's avatar
    Riku-Laine committed
        acceptance_rates = np.round(acceptance_rates, 10)
    
        # Replicate the rates so they can be attached to the corresponding judge ID.
        df = df.assign(acceptanceRate_R=np.repeat(acceptance_rates, nSubjects_N))
    
        if add_epsilon:
            epsilon = np.sqrt(0.1) * npr.normal(size=df.shape[0])
        else:
            epsilon = 0
        
        if featureZ_col is None:
    
            probabilities_T = inverseLogit(beta_X * df[featureX_col] + epsilon)
    
    Riku-Laine's avatar
    Riku-Laine committed
    
            # Compute the bounds straight from the inverse cumulative.
            # Assuming X is N(0, 1) so Var(bX*X)=bX**2*Var(X)=bX**2.
    
            df = df.assign(bounds=inverseCumulative(
    
    Riku-Laine's avatar
    Riku-Laine committed
                x=df.acceptanceRate_R, mu=0, sigma=np.sqrt(beta_X**2)))
        else:
    
            probabilities_T = inverseLogit(beta_X * df[featureX_col] +
    
    Riku-Laine's avatar
    Riku-Laine committed
                                        beta_Z * df[featureZ_col] + epsilon)
    
            # Compute the bounds straight from the inverse cumulative.
            # Assuming X and Z are i.i.d standard Gaussians with variance 1.
            # Thus Var(bx*X+bZ*Z)= bX**2*Var(X)+bZ**2*Var(Z).
    
            df = df.assign(bounds=inverseCumulative(
    
    Riku-Laine's avatar
    Riku-Laine committed
                x=df.acceptanceRate_R, mu=0, sigma=np.sqrt(beta_X**2 + beta_Z**2)))
    
        df = df.assign(probabilities_T=probabilities_T)
    
        # Assign negative decision if the predicted probability (probabilities_T) is
        # over the judge's threshold (bounds).
        df = df.assign(decision_T=np.where(df.probabilities_T >= df.bounds, 0, 1))
    
        
        # Calculate the observed acceptance rates.
        acceptance_rates = df.groupby('judgeID_J').mean().decision_T.values
    
        # Replicate the rates so they can be attached to the corresponding judge ID.
        df.acceptanceRate_R = np.repeat(acceptance_rates, nSubjects_N)
    
    Riku-Laine's avatar
    Riku-Laine committed
    
        df_labeled = df.copy()
    
        df_labeled.loc[df.decision_T == 0, 'result_Y'] = np.nan
    
        return df_labeled, df
    
    
    def randomDecider(df, nJudges_M=100, use_acceptance_rates=False):
        '''Doesn't use any information about X and Z to make decisions.
        
        If use_acceptance_rates is False (default) then all decisions are positive
    
        with probability 0.5. If True, probabilities will be sampled from 
    
    Riku-Laine's avatar
    Riku-Laine committed
        U(0.1, 0.9) and rounded to tenth decimal place.'''
    
        # Assert that every judge will have the same number of subjects.
        assert df.shape[0] % nJudges_M == 0, "Can't assign subjets evenly!"
    
        # Compute the number of subjects allocated for each judge.
        nSubjects_N = int(df.shape[0] / nJudges_M)
    
        # Assign judge IDs as running numbering from 0 to nJudges_M - 1
        df = df.assign(judgeID_J=np.repeat(range(0, nJudges_M), nSubjects_N))
    
        if use_acceptance_rates:
            # Sample acceptance rates uniformly from a closed interval
            # from 0.1 to 0.9 and round to tenth decimal place.
    
            # 26JUL2019: Fix one leniency to 0.9 so that contraction can compute all
            #            values.
            acceptance_rates = np.append(npr.uniform(.1, .9, nJudges_M - 1), 0.9)
            acceptance_rates = np.round(acceptance_rates, 10)
    
    Riku-Laine's avatar
    Riku-Laine committed
        else:
            # No real leniency here -> set to 0.5.
            acceptance_rates = np.ones(nJudges_M) * 0.5
    
        # Replicate the rates so they can be attached to the corresponding judge ID.
        df = df.assign(acceptanceRate_R=np.repeat(acceptance_rates, nSubjects_N))
    
        df = df.assign(
            decision_T=npr.binomial(n=1, p=df.acceptanceRate_R, size=df.shape[0]))
    
        df_labeled = df.copy()
    
        df_labeled.loc[df.decision_T == 0, 'result_Y'] = np.nan
    
        return df_labeled, df
    
    
    def biasDecider(df,
                    featureX_col,
                    featureZ_col=None,
                    nJudges_M=100,
                    beta_X=1,
                    beta_Z=1,
                    add_epsilon=True):
        '''
        Biased decider: If X > 1, then X <- X * 0.75. People with high X, 
    
        get more positive decisions as they should. And if -2 < X -1, then 
        X <- X + 0.5. People with X in [-2, 1], get less positive decisions 
        as they should.
    
    Riku-Laine's avatar
    Riku-Laine committed
        
        '''
    
        # If X > 1, then X <- X * 0.75. People with high X, get more positive
        # decisions as they should
        df = df.assign(biased_X=np.where(df[featureX_col] > 1, df[featureX_col] *
                                         0.75, df[featureX_col]))
    
        # If -2 < X -1, then X <- X + 0.5. People with X in [-2, 1], get less
        # positive decisions as they should
        df.biased_X = np.where((df.biased_X > -2) & (df.biased_X < -1) == 1,
                               df.biased_X + 0.5, df.biased_X)
    
        # Assert that every judge will have the same number of subjects.
        assert df.shape[0] % nJudges_M == 0, "Can't assign subjets evenly!"
    
        # Use quantile decider, but judge by the biased X.
    
        df_labeled, df = quantileDecider(df,
    
    Riku-Laine's avatar
    Riku-Laine committed
                                         featureX_col='biased_X',
                                         featureZ_col=featureZ_col,
                                         nJudges_M=nJudges_M,
                                         beta_X=beta_X,
                                         beta_Z=beta_Z,
                                         add_epsilon=add_epsilon)
    
        return df_labeled, df
    
    
    # ## Evaluator modules
    
    # ### Convenience functions
    
    def fitPredictiveModel(x_train, y_train, x_test, class_value, model_type=None):
        '''
        Fit a predictive model (default logistic regression) with given training 
        instances and return probabilities for test instances to obtain a given 
        class label.
        
        Arguments:
        ----------
        
        x_train -- x values of training instances
        y_train -- y values of training instances
        x_test -- x values of test instances
        class_value -- class label for which the probabilities are counted for.
        model_type -- type of model to be fitted.
        
        Returns:
        --------
        (1) Trained predictive model
        (2) Probabilities for given test inputs for given class.
        '''
    
        if model_type is None or model_type in ["logistic_regression", "lr"]:
            # Instantiate the model (using the default parameters)
            logreg = LogisticRegression(solver='lbfgs')
    
            # Check shape and fit the model.
            if x_train.ndim == 1:
                logreg = logreg.fit(x_train.values.reshape(-1, 1), y_train)
            else:
                logreg = logreg.fit(x_train, y_train)
    
            label_probs_logreg = getProbabilityForClass(x_test, logreg,
                                                        class_value)
    
            return logreg, label_probs_logreg
    
        elif model_type in ["random_forest", "rf"]:
            # Instantiate the model
            forest = RandomForestClassifier(n_estimators=100, max_depth=3)
    
            # Check shape and fit the model.
            if x_train.ndim == 1:
                forest = forest.fit(x_train.values.reshape(-1, 1), y_train)
            else:
                forest = forest.fit(x_train, y_train)
    
            label_probs_forest = getProbabilityForClass(x_test, forest,
                                                        class_value)
    
            return forest, label_probs_forest
    
        elif model_type == "fully_random":
    
            label_probs = np.ones_like(x_test) / 2
    
            model_object = lambda x: 0.5
    
            return model_object, label_probs
        else:
            raise ValueError("Invalid model_type!", model_type)
    
    
    def getProbabilityForClass(x, model, class_value):
        '''
        Function (wrapper) for obtaining the probability of a class given x and a 
        predictive model.
    
        Arguments:
        -----------
        x -- individual features, an array of shape (observations, features)
        model -- a trained sklearn model. Predicts probabilities for given x. 
            Should accept input of shape (observations, features)
        class_value -- the resulting class to predict (usually 0 or 1).
    
        Returns:
        --------
        (1) The probabilities of given class label for each x.
        '''
        if x.ndim == 1:
            # if x is vector, transform to column matrix.
            f_values = model.predict_proba(np.array(x).reshape(-1, 1))
        else:
            f_values = model.predict_proba(x)
    
        # Get correct column of predicted class, remove extra dimensions and return.
        return f_values[:, model.classes_ == class_value].flatten()
    
    # ### Contraction algorithm
    # 
    # Below is an implementation of Lakkaraju's team's algorithm presented in 
    # [their paper](https://helka.finna.fi/PrimoRecord/pci.acm3098066). Relevant
    # parameters to be passed to the function are presented in the description.
    
    def contraction(df, judgeIDJ_col, decisionT_col, resultY_col, modelProbS_col,
                    accRateR_col, r):
        '''
        This is an implementation of the algorithm presented by Lakkaraju
        et al. in their paper "The Selective Labels Problem: Evaluating 
        Algorithmic Predictions in the Presence of Unobservables" (2017).
    
        Arguments:
        ----------
        df -- The (Pandas) data frame containing the data, judge decisions,
            judge IDs, results and probability scores.
        judgeIDJ_col -- String, the name of the column containing the judges' IDs
            in df.
        decisionT_col -- String, the name of the column containing the judges' decisions
        resultY_col -- String, the name of the column containing the realization
        modelProbS_col -- String, the name of the column containing the probability
            scores from the black-box model B.
        accRateR_col -- String, the name of the column containing the judges' 
            acceptance rates
        r -- Float between 0 and 1, the given acceptance rate.
    
        Returns:
        --------
        (1) The estimated failure rate at acceptance rate r.
        '''
        # Get ID of the most lenient judge.
        most_lenient_ID_q = df[judgeIDJ_col].loc[df[accRateR_col].idxmax()]
    
        # Subset. "D_q is the set of all observations judged by q."
        D_q = df[df[judgeIDJ_col] == most_lenient_ID_q].copy()
    
        
        ##### Agreement rate computation begins #######
        
        max_leniency = max(df[accRateR_col])
        
        if max_leniency < r:
            return np.nan, np.nan
        
        # Sort D_q
        # "Observations deemed as high risk by B are at the top of this list"
        D_sort_q = D_q.sort_values(by=modelProbS_col, ascending=False)
        
        # The agreement rate is now computed as the percentage of all the positive
        # decisions given by q in the 1-r % of the most dangerous subjects.
        all_negatives = np.sum(D_sort_q[decisionT_col] == 0)
        
        n_most_dangerous = int(round(D_q.shape[0] * (1-max_leniency)))
        
        agreed_decisions = np.sum(D_sort_q[decisionT_col][0:n_most_dangerous] == 0)
    
        print("The agreement rate of contraction is:", agreed_decisions/all_negatives)
        
        ##### Agreement rate computation ends #######
    
    Riku-Laine's avatar
    Riku-Laine committed
    
        # All observations of R_q have observed outcome labels.
        # "R_q is the set of observations in D_q with observed outcome labels."
        R_q = D_q[D_q[decisionT_col] == 1].copy()
    
        # Sort observations in R_q in descending order of confidence scores S and
        # assign to R_sort_q.
        # "Observations deemed as high risk by B are at the top of this list"
        R_sort_q = R_q.sort_values(by=modelProbS_col, ascending=False)
    
        number_to_remove = int(
            round((1.0 - r) * D_q.shape[0] - (D_q.shape[0] - R_q.shape[0])))
    
        # "R_B is the list of observations assigned to t = 1 by B"
        R_B = R_sort_q[number_to_remove:R_sort_q.shape[0]]
    
    
        return np.sum(R_B[resultY_col] == 0) / D_q.shape[0], D_q.shape[0]
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
    # ### Evaluators
    
    def trueEvaluationEvaluator(df, featureX_col, decisionT_col, resultY_col, r):
    
    
        df.sort_values(by='B_prob_0_model', inplace=True, ascending=True)
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
        to_release = int(round(df.shape[0] * r))
        
        failed = df[resultY_col][0:to_release] == 0
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
        return np.sum(failed) / df.shape[0], df.shape[0]
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
    def labeledOutcomesEvaluator(df,
                                 featureX_col,
                                 decisionT_col,
                                 resultY_col,
                                 r,
                                 adjusted=False):
    
    
        df_observed = df.loc[df[decisionT_col] == 1, :]
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
        df_observed = df_observed.sort_values(by='B_prob_0_model',
    
    Riku-Laine's avatar
    Riku-Laine committed
                                                  inplace=False,
                                                  ascending=True)
    
    
        to_release = int(round(df_observed.shape[0] * r))
        
        failed = df_observed[resultY_col][0:to_release] == 0
    
    Riku-Laine's avatar
    Riku-Laine committed
    
        if adjusted:
    
            return np.mean(failed), df.shape[0]
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
        return np.sum(failed) / df.shape[0], df.shape[0]
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
    def humanEvaluationEvaluator(df, judgeIDJ_col, decisionT_col, resultY_col,
                                 accRateR_col, r):
    
        # Get judges with correct leniency as list
        is_correct_leniency = df[accRateR_col].round(1) == r
    
        # No judges with correct leniency
        if np.sum(is_correct_leniency) == 0:
    
            return np.nan, np.nan
    
    Riku-Laine's avatar
    Riku-Laine committed
    
        correct_leniency_list = df.loc[is_correct_leniency, judgeIDJ_col]
    
        # Released are the people they judged and released, T = 1
        released = df[df[judgeIDJ_col].isin(correct_leniency_list)
                      & (df[decisionT_col] == 1)]
    
    
        failed = released[resultY_col] == 0
        
    
    Riku-Laine's avatar
    Riku-Laine committed
        # Get their failure rate, aka ratio of reoffenders to number of people judged in total
    
        return np.sum(failed) / correct_leniency_list.shape[0], correct_leniency_list.shape[0]
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
    def monteCarloEvaluator(df,
                            featureX_col,
                            decisionT_col,
                            resultY_col,
                            accRateR_col,
                            r,
                            mu_X=0,
                            mu_Z=0,
                            beta_X=1,
                            beta_Z=1,
                            sigma_X=1,
                            sigma_Z=1):
    
        # Compute the predicted/assumed decision bounds for all the judges.
    
        q_r = inverseCumulative(x=df[accRateR_col],
    
    Riku-Laine's avatar
    Riku-Laine committed
                                 mu=mu_X + mu_Z,
                                 sigma=np.sqrt((beta_X * sigma_X)**2 +
                                               (beta_Z * sigma_Z)**2))
    
    
        df = df.assign(bounds=logit(q_r) - df[featureX_col])
    
    Riku-Laine's avatar
    Riku-Laine committed
    
        # Compute the expectation of Z when it is known to come from truncated
        # Gaussian.
    
        alphabeta = (df.bounds - mu_Z) / (sigma_Z)
    
    Riku-Laine's avatar
    Riku-Laine committed
    
        Z_ = scs.norm.sf(alphabeta, loc=mu_Z, scale=sigma_Z)  # 1 - cdf(ab)
    
        # E(Z | Z > a). Expectation of Z if negative decision.
        exp_lower_trunc = mu_Z + (sigma_Z * scs.norm.pdf(alphabeta)) / Z_
    
        # E(Z | Z < b). Expectation of Z if positive decision.
        exp_upper_trunc = mu_Z - (
            sigma_Z * scs.norm.pdf(alphabeta)) / scs.norm.cdf(alphabeta)
    
    
        exp_Z = (1 - df[decisionT_col]
                 ) * exp_lower_trunc + df[decisionT_col] * exp_upper_trunc
    
    Riku-Laine's avatar
    Riku-Laine committed
    
        # Attach the predicted probability for Y=0 to data.
    
        df = df.assign(predicted_Y=inverseLogit(df[featureX_col] + exp_Z))
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
        # Predictions drawn from binomial.
        predictions = npr.binomial(n=1, p=1 - df.predicted_Y, size=df.shape[0])
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
        df[resultY_col] = np.where(df[decisionT_col] == 0, predictions,
                                     df[resultY_col])
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
        df.sort_values(by='B_prob_0_model', inplace=True, ascending=True)
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
        to_release = int(round(df.shape[0] * r))
        
        failed = df[resultY_col][0:to_release] == 0
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
        return np.sum(failed) / df.shape[0], df.shape[0]
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
    def drawDiagnostics(title, save_name, f_rates, titles):
    
    Riku-Laine's avatar
    Riku-Laine committed
        
        cols = 2
        rows = np.ceil(len(f_rates) / cols)
        
        plt.figure(figsize=(16, 4.5*rows+1))
    
        ax = plt.subplot(rows, cols, 1)
        x_ax = np.arange(1, 9, 1) / 10
    
        plt.boxplot(f_rates[0], labels=x_ax)
    
        plt.title(titles[0])
        plt.xlabel('Acceptance rate')
        plt.ylabel('Failure rate')
        plt.grid()
    
        for i in range(len(f_rates)):
            plt.subplot(rows, cols, i + 1, sharey=ax)
    
            plt.boxplot(f_rates[i], labels=x_ax)
    
            plt.title(titles[i])
            plt.xlabel('Acceptance rate')
            plt.ylabel('Failure rate')
            plt.grid()
    
        plt.tight_layout()
    
        plt.subplots_adjust(top=0.85)
    
    Riku-Laine's avatar
    Riku-Laine committed
        plt.suptitle(title, y=0.96, weight='bold')
    
    
        plt.savefig(save_name + '_diagnostic_plot')
    
    Riku-Laine's avatar
    Riku-Laine committed
    
        plt.show()
    
    
    def perfComp(dgModule, deciderModule, title, save_name, model_type=None, 
                 fit_with_Z=False, nIter=10):
        '''Performance comparison, a.k.a the main figure. 
        
        Parameters:
        -----------
            dgModule: Module creating the data
            decisderModule: Module assigning the decisions
            title: Title for the diagnostic figure
            save_name: name for saving
            model_type: what kind of model should be fitted. Logistic regression
                        ("lr"), random forest ("rf") or fully random 
                        ("fully_random")
            fit_with_Z: should the predictive model be fit with Z (the latent 
                        variable)
            nIter: Number of train-test splits to be performed.
            
        '''
        
        failure_rates = np.zeros((8, 4))
        failure_stds = np.zeros((8, 4))
    
    Riku-Laine's avatar
    Riku-Laine committed
    
        f_rate_true = np.zeros((nIter, 8))
        f_rate_label = np.zeros((nIter, 8))
        f_rate_cont = np.zeros((nIter, 8))
    
        f_rate_cf = np.zeros((nIter, 8))
    
    Riku-Laine's avatar
    Riku-Laine committed
    
        # Create data
        df = dgModule()
    
    
        # Assign decicions
    
    Riku-Laine's avatar
    Riku-Laine committed
        df_labeled, df_unlabeled = deciderModule(df)
        
    
        for i in range(nIter):
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
            # Split data
            train, test_labeled = train_test_split(df_labeled, test_size=0.5)
            
            # Assign same observations to unlabeled dat
            test_unlabeled = df_unlabeled.iloc[test_labeled.index.values]
    
            # Train model
            if fit_with_Z:
                B_model, predictions = fitPredictiveModel(
                        train.loc[train['decision_T'] == 1, ['X', 'Z']],
                        train.loc[train['decision_T'] == 1, 'result_Y'], test_labeled[['X', 'Z']], 
                        0, model_type=model_type)
    
    Riku-Laine's avatar
    Riku-Laine committed
        
    
            else:
                B_model, predictions = fitPredictiveModel(
                        train.loc[train['decision_T'] == 1, 'X'],
                        train.loc[train['decision_T'] == 1, 'result_Y'], test_labeled['X'], 
                        0, model_type=model_type)
    
            # Attach predictions to data
            test_labeled = test_labeled.assign(B_prob_0_model=predictions)
            test_unlabeled = test_unlabeled.assign(B_prob_0_model=predictions)
    
    Riku-Laine's avatar
    Riku-Laine committed
        
    
            test_labeled.sort_values(by='B_prob_0_model', inplace=True, ascending=True)
    
    Riku-Laine's avatar
    Riku-Laine committed
        
    
            # Assign group IDs
            kk_array = pd.qcut(test_labeled['B_prob_0_model'], group_amount, labels=False)
    
    Riku-Laine's avatar
    Riku-Laine committed
        
    
            # Find observed values
            observed = test_labeled['decision_T'] == 1
    
    Riku-Laine's avatar
    Riku-Laine committed
        
    
            # Assign data to the model
    
    Riku-Laine's avatar
    Riku-Laine committed
            
    
            ###############################
            # Change this assignment if you want to use predictions as input to the
            # model.
            ###############################
            dat = dict(D=1,
                       N_obs=np.sum(observed),
                       N_cens=np.sum(~observed),
                       K=group_amount,
                       sigma_tau=sigma_tau,
                       M=len(set(df_unlabeled['judgeID_J'])),
                       jj_obs=test_labeled.loc[observed, 'judgeID_J']+1,
                       jj_cens=test_labeled.loc[~observed, 'judgeID_J']+1,
                       kk_obs=kk_array[observed]+1,
                       kk_cens=kk_array[~observed]+1,
                       dec_obs=test_labeled.loc[observed, 'decision_T'],
                       dec_cens=test_labeled.loc[~observed, 'decision_T'],
                       X_obs=test_labeled.loc[observed, 'X'].values.reshape(-1,1),
                       X_cens=test_labeled.loc[~observed, 'X'].values.reshape(-1,1),
                       y_obs=test_labeled.loc[observed, 'result_Y'].astype(int))
    
            # Do the sampling - Change this if you wish to use other methods.
            fit = sm.sampling(data=dat, chains=5, iter=5000, control = dict(adapt_delta=0.9))
                    
            pars = fit.extract()
    
            
            plt.figure(figsize=(15,30))
        
            fit.plot();
        
            plt.savefig(save_name + '_stan_convergence_diagnostic_plot_' + str(i))
            
            plt.show()
            plt.close('all')
            gc.collect()
        
            print(fit, file=open(save_name + '_stan_fit_diagnostics_' + str(i) + '.txt', 'w'))
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
            # Bayes
            
            # Format matrix, each row is a sample of the posterior
            # columns are observations
                    
            y_imp = np.ones((pars['y_est'].shape[0], test_labeled.shape[0]))
            
            # Impute the unobserved with the estimated 
            # Reverse the binary coding to compute failures as sum. 0 == 0 = 1
            y_imp[:, ~observed] = 1 - pars['y_est']
            
            # Remove the variables to conserve memory
            del(pars)
            del(fit)
            
            # Impute the observed
            y_imp[:, observed] = 1 - test_labeled.loc[observed, 'result_Y']
                                
            for r in range(1, 9):
                
                to_release = np.round(test_labeled.shape[0] * (r / 10)).astype(int)
                            
                failed = np.sum(y_imp[:, 0:to_release], axis=1)
                
                f_rate_cf[i, r - 1] = np.mean(failed / test_labeled.shape[0])
        
    
    Riku-Laine's avatar
    Riku-Laine committed
                print(".", end="")
    
    Riku-Laine's avatar
    Riku-Laine committed
                # True evaluation
    
        
                f_rate_true[i, r - 1], N = trueEvaluationEvaluator(test_unlabeled,
                           'X', 'decision_T', 'result_Y', r / 10)
                    
    
    Riku-Laine's avatar
    Riku-Laine committed
                # Labeled outcomes only
    
        
                f_rate_label[i, r - 1], N = labeledOutcomesEvaluator(test_labeled, 
                            'X', 'decision_T', 'result_Y', r / 10)
                
    
    Riku-Laine's avatar
    Riku-Laine committed
                # Contraction
    
        
                f_rate_cont[i, r - 1], N = contraction(test_labeled, 'judgeID_J', 
                           'decision_T', 'result_Y', 'B_prob_0_model', 
                           'acceptanceRate_R', r / 10)
                
    
    Riku-Laine's avatar
    Riku-Laine committed
        failure_rates[:, 0] = np.mean(f_rate_true, axis=0)
        failure_rates[:, 1] = np.mean(f_rate_label, axis=0)
    
        failure_rates[:, 2] = np.mean(f_rate_cont, axis=0)
        failure_rates[:, 3] = np.mean(f_rate_cf, axis=0)
    
        failure_stds[:, 0] = np.std(f_rate_true, axis=0)
        failure_stds[:, 1] = np.std(f_rate_label, axis=0)
        failure_stds[:, 2] = np.std(f_rate_cont, axis=0)
        failure_stds[:, 3] = np.std(f_rate_cf, axis=0)
    
    Riku-Laine's avatar
    Riku-Laine committed
    
        x_ax = np.arange(0.1, 0.9, 0.1)
    
    
        labels = ['True Evaluation', 'Labeled outcomes', 'Contraction', 
                  'Counterfactuals']
        
        colours = ['g', 'magenta', 'b', 'r']
        
        line_styles = ['--', ':', '-.', '-']
    
    Riku-Laine's avatar
    Riku-Laine committed
    
        for i in range(failure_rates.shape[1]):
            plt.errorbar(x_ax,
                         failure_rates[:, i],
                         label=labels[i],
                         c=colours[i],
    
                         linestyle=line_styles[i],
                         yerr=failure_stds[:, i])
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
        #plt.title('Failure rate vs. Acceptance rate')
    
    Riku-Laine's avatar
    Riku-Laine committed
        plt.xlabel('Acceptance rate')
        plt.ylabel('Failure rate')
        plt.legend()
        plt.grid()
        
    
        plt.savefig(save_name + '_all')
    
    Riku-Laine's avatar
    Riku-Laine committed
        
        plt.show()
    
        print("\nFailure rates:")
        print(np.array2string(failure_rates, formatter={'float_kind':lambda x: "%.5f" % x}))
        
    
        print("\nStandard deviations:")
        print(np.array2string(failure_stds, formatter={'float_kind':lambda x: "%.5f" % x}))
    
    
    Riku-Laine's avatar
    Riku-Laine committed
        print("\nMean absolute errors:")
        for i in range(1, failure_rates.shape[1]):
            print(
                labels[i].ljust(len(max(labels, key=len))),
                np.round(
                    np.mean(np.abs(failure_rates[:, 0] - failure_rates[:, i])), 5))
    
    Riku-Laine's avatar
    Riku-Laine committed
        drawDiagnostics(title=title,
    
                    save_name=save_name,
                    f_rates=[f_rate_true, f_rate_label, f_rate_cont, f_rate_cf],
                    titles=labels)
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
    # Compile stan model
    
    Riku-Laine's avatar
    Riku-Laine committed
    sm = pystan.StanModel(file=stan_code_file_name)
    
    if which == 1:
    
        print("\nWith unobservables (Bernoullian outcome + independent decisions)")
        
        print("Decision-maker in the data and model: random and random")
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
        dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim)
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
        decider = lambda x: randomDecider(x, nJudges_M=M_sim, use_acceptance_rates=True)
    
    Riku-Laine's avatar
    Riku-Laine committed
    
        perfComp(
            dg, lambda x: decider(x),
            "Fluctuation of failure rate estimates across iterations\n" +
            "Bernoulli + independent decisions, without unobservables",
    
            figure_path + "sl_bernoulli_independent_without_Z",
            model_type="fully_random", 
            fit_with_Z=False
    
    Riku-Laine's avatar
    Riku-Laine committed
        )
    
    Riku-Laine's avatar
    Riku-Laine committed
    plt.close('all')
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    if which == 2:
    
        print("\nWith unobservables (Bernoullian outcome + independent decisions)")
        
        print("Decision-maker in the data and model: random and y ~ x")
    
    Riku-Laine's avatar
    Riku-Laine committed
    
        dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim)
    
    
        decider = lambda x: randomDecider(x, nJudges_M=M_sim, use_acceptance_rates=True)
    
    Riku-Laine's avatar
    Riku-Laine committed
    
        perfComp(