#!/usr/bin/env python3 # -*- coding: utf-8 -*- ''' # Author: Riku Laine # Date: 12AUG2019 # Project name: Potential outcomes in model evaluation # Description: This script creates the figures and results used # in synthetic data experiments. # # Parameters: # ----------- # (1) figure_path : file location for saving the created figures. # (2) N_sim : Size of simulated data set. # (3) M_sim : Number of judges in simulated data, # N_sim must be divisible by M_sim! # (4) which : Which data + outcome analysis should be performed. Ranges from # 1 to 14. See at the end of the script which figure is which. # Separate execution preferred for keeping runtime and memory decent. # (5) group_amount : How many groups if non-linear model is used. # (6) stan_code_file_name : Name of file containing the stan model code. # (7) sigma_tau : Values of prior variance. # ''' # Refer to the `notes.tex` file for explanations about the modular framework. # Imports import numpy as np import pandas as pd import matplotlib.pyplot as plt import scipy.stats as scs import scipy.special as ssp import numpy.random as npr from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split import pystan import gc plt.switch_backend('agg') import sys # figure storage name figure_path = sys.argv[1] # Size of simulated data set N_sim = int(sys.argv[2]) # Number of judges in simulated data, N_sim must be divisible by M_sim! M_sim = int(sys.argv[3]) # Which data + outcome generation should be performed. which = int(sys.argv[4]) # How many groups if jung model is used group_amount = int(sys.argv[5]) # Name of stan model code file stan_code_file_name = sys.argv[6] # Variance prior sigma_tau = float(sys.argv[7]) # Settings plt.rcParams.update({'font.size': 16}) plt.rcParams.update({'figure.figsize': (10, 6)}) print("These results have been obtained with the following settings:") print("Number of observations in the simulated data:", N_sim) print("Number of judges in the simulated data:", M_sim) print("Number of groups:", group_amount) print("Prior for the variances:", sigma_tau) # Basic functions def inverseLogit(x): return 1.0 / (1.0 + np.exp(-1.0 * x)) def logit(x): return np.log(x) - np.log(1.0 - x) def inverseCumulative(x, mu, sigma): '''Compute the inverse of the cumulative distribution of logit-normal distribution at x with parameters mu and sigma (mean and st.dev.).''' return inverseLogit(ssp.erfinv(2 * x - 1) * np.sqrt(2 * sigma**2) - mu) def standardError(p, n): denominator = p * (1 - p) return np.sqrt(denominator / n) # ## Data generation modules def bernoulliDGWithoutUnobservables(N_total=50000): '''Generates data | Variables: X, Y | Outcome from Bernoulli''' df = pd.DataFrame() # Sample feature X from standard Gaussian distribution, N(0, 1). df = df.assign(X=npr.normal(size=N_total)) # Calculate P(Y=0|X=x) = 1 / (1 + exp(-X)) = inverseLogit(X) df = df.assign(probabilities_Y=inverseLogit(df.X)) # Draw Y ~ Bernoulli(1 - inverseLogit(X)) # Note: P(Y=1|X=x) = 1 - P(Y=0|X=x) = 1 - inverseLogit(X) results = npr.binomial(n=1, p=1 - df.probabilities_Y, size=N_total) df = df.assign(result_Y=results) return df def thresholdDGWithUnobservables(N_total=50000, beta_X=1.0, beta_Z=1.0, beta_W=0.2): '''Generates data | Variables: X, Z, W, Y | Outcome by threshold''' df = pd.DataFrame() # Sample the variables from standard Gaussian distributions. df = df.assign(X=npr.normal(size=N_total)) df = df.assign(Z=npr.normal(size=N_total)) df = df.assign(W=npr.normal(size=N_total)) # Calculate P(Y=0|X, Z, W) probabilities_Y = inverseLogit(beta_X * df.X + beta_Z * df.Z + beta_W * df.W) df = df.assign(probabilities_Y=probabilities_Y) # Result is 0 if P(Y = 0| X = x; Z = z; W = w) >= 0.5 , 1 otherwise df = df.assign(result_Y=np.where(df.probabilities_Y >= 0.5, 0, 1)) return df def bernoulliDGWithUnobservables(N_total=50000, beta_X=1.0, beta_Z=1.0, beta_W=0.2): '''Generates data | Variables: X, Z, W, Y | Outcome from Bernoulli''' df = pd.DataFrame() # Sample feature X, Z and W from standard Gaussian distribution, N(0, 1). df = df.assign(X=npr.normal(size=N_total)) df = df.assign(Z=npr.normal(size=N_total)) df = df.assign(W=npr.normal(size=N_total)) # Calculate P(Y=0|X=x) = 1 / (1 + exp(-X)) = inverseLogit(X) probabilities_Y = inverseLogit(beta_X * df.X + beta_Z * df.Z + beta_W * df.W) df = df.assign(probabilities_Y=probabilities_Y) # Draw Y from Bernoulli distribution results = npr.binomial(n=1, p=1 - df.probabilities_Y, size=N_total) df = df.assign(result_Y=results) return df # ## Decider modules def humanDeciderLakkaraju(df, featureX_col, featureZ_col=None, nJudges_M=100, beta_X=1, beta_Z=1, add_epsilon=True): '''Decider module | Non-independent batch decisions.''' # Decider as specified by Lakkaraju et al. # Assert that every judge will have the same number of subjects. assert df.shape[0] % nJudges_M == 0, "Can't assign subjets evenly!" # Compute the number of subjects allocated for each judge. nSubjects_N = int(df.shape[0] / nJudges_M) # Assign judge IDs as running numbering from 0 to nJudges_M - 1 df = df.assign(judgeID_J=np.repeat(range(0, nJudges_M), nSubjects_N)) # Sample acceptance rates uniformly from a closed interval # from 0.1 to 0.9 and round to tenth decimal place. acceptance_rates = npr.uniform(.1, .9, nJudges_M) acceptance_rates = np.round(acceptance_rates, 10) # Replicate the rates so they can be attached to the corresponding judge ID. df = df.assign(acceptanceRate_R=np.repeat(acceptance_rates, nSubjects_N)) # Add noise if add_epsilon: epsilon = np.sqrt(0.1) * npr.normal(size=df.shape[0]) else: epsilon = 0 # Compute probability for jail decision if featureZ_col is None: probabilities_T = inverseLogit(beta_X * df[featureX_col] + epsilon) else: probabilities_T = inverseLogit(beta_X * df[featureX_col] + beta_Z * df[featureZ_col] + epsilon) df = df.assign(probabilities_T=probabilities_T) # Sort by judges then probabilities in decreasing order # Most dangerous for each judge are at the top. df.sort_values(by=["judgeID_J", "probabilities_T"], ascending=False, inplace=True) # Iterate over the data. Subject will be given a negative decision # if they are in the top (1-r)*100% of the individuals the judge will judge. # I.e. if their within-judge-index is under 1 - acceptance threshold times # the number of subjects assigned to each judge they will receive a # negative decision. df.reset_index(drop=True, inplace=True) df['decision_T'] = np.where((df.index.values % nSubjects_N) < ((1 - df['acceptanceRate_R']) * nSubjects_N), 0, 1) df_labeled = df.copy() # Hide unobserved df_labeled.loc[df.decision_T == 0, 'result_Y'] = np.nan return df_labeled, df def bernoulliDecider(df, featureX_col, featureZ_col=None, nJudges_M=100, beta_X=1, beta_Z=1, add_epsilon=True): '''Use X and Z to make a decision with probability P(T=0|X, Z)=inverseLogit(beta_X*X+beta_Z*Z).''' # No leniencies involved # Assert that every judge will have the same number of subjects. assert df.shape[0] % nJudges_M == 0, "Can't assign subjets evenly!" # Compute the number of subjects allocated for each judge. nSubjects_N = int(df.shape[0] / nJudges_M) # Assign judge IDs as running numbering from 0 to nJudges_M - 1 df = df.assign(judgeID_J=np.repeat(range(0, nJudges_M), nSubjects_N)) if add_epsilon: epsilon = np.sqrt(0.1) * npr.normal(size=df.shape[0]) else: epsilon = 0 if featureZ_col is None: probabilities_T = inverseLogit(beta_X * df[featureX_col] + epsilon) else: probabilities_T = inverseLogit(beta_X * df[featureX_col] + beta_Z * df[featureZ_col] + epsilon) df = df.assign(probabilities_T=probabilities_T) # Draw T from Bernoulli distribution decisions = npr.binomial(n=1, p=1 - df.probabilities_T, size=df.shape[0]) df = df.assign(decision_T=decisions) # Calculate the acceptance rates. acceptance_rates = df.groupby('judgeID_J').mean().decision_T.values # Replicate the rates so they can be attached to the corresponding judge ID. df = df.assign(acceptanceRate_R=np.repeat(acceptance_rates, nSubjects_N)) df_labeled = df.copy() df_labeled.loc[df.decision_T == 0, 'result_Y'] = np.nan return df_labeled, df def quantileDecider(df, featureX_col, featureZ_col=None, nJudges_M=100, beta_X=1, beta_Z=1, add_epsilon=True, leniency_upper_limit=0.9): '''Assign decisions by the value of inverse cumulative distribution function of the logit-normal distribution at leniency r.''' # Assert that every judge will have the same number of subjects. assert df.shape[0] % nJudges_M == 0, "Can't assign subjets evenly!" # Compute the number of subjects allocated for each judge. nSubjects_N = int(df.shape[0] / nJudges_M) # Assign judge IDs as running numbering from 0 to nJudges_M - 1 df = df.assign(judgeID_J=np.repeat(range(0, nJudges_M), nSubjects_N)) # Sample acceptance rates uniformly from a closed interval # from 0.1 to 0.9 and round to tenth decimal place. acceptance_rates = npr.uniform(.1, leniency_upper_limit, nJudges_M) acceptance_rates = np.round(acceptance_rates, 10) # Replicate the rates so they can be attached to the corresponding judge ID. df = df.assign(acceptanceRate_R=np.repeat(acceptance_rates, nSubjects_N)) if add_epsilon: epsilon = np.sqrt(0.1) * npr.normal(size=df.shape[0]) else: epsilon = 0 if featureZ_col is None: probabilities_T = inverseLogit(beta_X * df[featureX_col] + epsilon) # Compute the bounds straight from the inverse cumulative. # Assuming X is N(0, 1) so Var(bX*X)=bX**2*Var(X)=bX**2. df = df.assign(bounds=inverseCumulative( x=df.acceptanceRate_R, mu=0, sigma=np.sqrt(beta_X**2))) else: probabilities_T = inverseLogit(beta_X * df[featureX_col] + beta_Z * df[featureZ_col] + epsilon) # Compute the bounds straight from the inverse cumulative. # Assuming X and Z are i.i.d standard Gaussians with variance 1. # Thus Var(bx*X+bZ*Z)= bX**2*Var(X)+bZ**2*Var(Z). df = df.assign(bounds=inverseCumulative( x=df.acceptanceRate_R, mu=0, sigma=np.sqrt(beta_X**2 + beta_Z**2))) df = df.assign(probabilities_T=probabilities_T) # Assign negative decision if the predicted probability (probabilities_T) is # over the judge's threshold (bounds). df = df.assign(decision_T=np.where(df.probabilities_T >= df.bounds, 0, 1)) # Calculate the observed acceptance rates. acceptance_rates = df.groupby('judgeID_J').mean().decision_T.values # Replicate the rates so they can be attached to the corresponding judge ID. df.acceptanceRate_R = np.repeat(acceptance_rates, nSubjects_N) df_labeled = df.copy() df_labeled.loc[df.decision_T == 0, 'result_Y'] = np.nan return df_labeled, df def randomDecider(df, nJudges_M=100, use_acceptance_rates=False): '''Doesn't use any information about X and Z to make decisions. If use_acceptance_rates is False (default) then all decisions are positive with probability 0.5. If True, probabilities will be sampled from U(0.1, 0.9) and rounded to tenth decimal place.''' # Assert that every judge will have the same number of subjects. assert df.shape[0] % nJudges_M == 0, "Can't assign subjets evenly!" # Compute the number of subjects allocated for each judge. nSubjects_N = int(df.shape[0] / nJudges_M) # Assign judge IDs as running numbering from 0 to nJudges_M - 1 df = df.assign(judgeID_J=np.repeat(range(0, nJudges_M), nSubjects_N)) if use_acceptance_rates: # Sample acceptance rates uniformly from a closed interval # from 0.1 to 0.9 and round to tenth decimal place. # 26JUL2019: Fix one leniency to 0.9 so that contraction can compute all # values. acceptance_rates = np.append(npr.uniform(.1, .9, nJudges_M - 1), 0.9) acceptance_rates = np.round(acceptance_rates, 10) else: # No real leniency here -> set to 0.5. acceptance_rates = np.ones(nJudges_M) * 0.5 # Replicate the rates so they can be attached to the corresponding judge ID. df = df.assign(acceptanceRate_R=np.repeat(acceptance_rates, nSubjects_N)) df = df.assign( decision_T=npr.binomial(n=1, p=df.acceptanceRate_R, size=df.shape[0])) df_labeled = df.copy() df_labeled.loc[df.decision_T == 0, 'result_Y'] = np.nan return df_labeled, df def biasDecider(df, featureX_col, featureZ_col=None, nJudges_M=100, beta_X=1, beta_Z=1, add_epsilon=True): ''' Biased decider: If X > 1, then X <- X * 0.75. People with high X, get more positive decisions as they should. And if -2 < X -1, then X <- X + 0.5. People with X in [-2, 1], get less positive decisions as they should. ''' # If X > 1, then X <- X * 0.75. People with high X, get more positive # decisions as they should df = df.assign(biased_X=np.where(df[featureX_col] > 1, df[featureX_col] * 0.75, df[featureX_col])) # If -2 < X -1, then X <- X + 0.5. People with X in [-2, 1], get less # positive decisions as they should df.biased_X = np.where((df.biased_X > -2) & (df.biased_X < -1) == 1, df.biased_X + 0.5, df.biased_X) # Assert that every judge will have the same number of subjects. assert df.shape[0] % nJudges_M == 0, "Can't assign subjets evenly!" # Use quantile decider, but judge by the biased X. df_labeled, df = quantileDecider(df, featureX_col='biased_X', featureZ_col=featureZ_col, nJudges_M=nJudges_M, beta_X=beta_X, beta_Z=beta_Z, add_epsilon=add_epsilon) return df_labeled, df # ## Evaluator modules # ### Convenience functions def fitPredictiveModel(x_train, y_train, x_test, class_value, model_type=None): ''' Fit a predictive model (default logistic regression) with given training instances and return probabilities for test instances to obtain a given class label. Arguments: ---------- x_train -- x values of training instances y_train -- y values of training instances x_test -- x values of test instances class_value -- class label for which the probabilities are counted for. model_type -- type of model to be fitted. Returns: -------- (1) Trained predictive model (2) Probabilities for given test inputs for given class. ''' if model_type is None or model_type in ["logistic_regression", "lr"]: # Instantiate the model (using the default parameters) logreg = LogisticRegression(solver='lbfgs') # Check shape and fit the model. if x_train.ndim == 1: logreg = logreg.fit(x_train.values.reshape(-1, 1), y_train) else: logreg = logreg.fit(x_train, y_train) label_probs_logreg = getProbabilityForClass(x_test, logreg, class_value) return logreg, label_probs_logreg elif model_type in ["random_forest", "rf"]: # Instantiate the model forest = RandomForestClassifier(n_estimators=100, max_depth=3) # Check shape and fit the model. if x_train.ndim == 1: forest = forest.fit(x_train.values.reshape(-1, 1), y_train) else: forest = forest.fit(x_train, y_train) label_probs_forest = getProbabilityForClass(x_test, forest, class_value) return forest, label_probs_forest elif model_type == "fully_random": label_probs = np.ones_like(x_test) / 2 model_object = lambda x: 0.5 return model_object, label_probs else: raise ValueError("Invalid model_type!", model_type) def getProbabilityForClass(x, model, class_value): ''' Function (wrapper) for obtaining the probability of a class given x and a predictive model. Arguments: ----------- x -- individual features, an array of shape (observations, features) model -- a trained sklearn model. Predicts probabilities for given x. Should accept input of shape (observations, features) class_value -- the resulting class to predict (usually 0 or 1). Returns: -------- (1) The probabilities of given class label for each x. ''' if x.ndim == 1: # if x is vector, transform to column matrix. f_values = model.predict_proba(np.array(x).reshape(-1, 1)) else: f_values = model.predict_proba(x) # Get correct column of predicted class, remove extra dimensions and return. return f_values[:, model.classes_ == class_value].flatten() # ### Contraction algorithm # # Below is an implementation of Lakkaraju's team's algorithm presented in # [their paper](https://helka.finna.fi/PrimoRecord/pci.acm3098066). Relevant # parameters to be passed to the function are presented in the description. def contraction(df, judgeIDJ_col, decisionT_col, resultY_col, modelProbS_col, accRateR_col, r): ''' This is an implementation of the algorithm presented by Lakkaraju et al. in their paper "The Selective Labels Problem: Evaluating Algorithmic Predictions in the Presence of Unobservables" (2017). Arguments: ---------- df -- The (Pandas) data frame containing the data, judge decisions, judge IDs, results and probability scores. judgeIDJ_col -- String, the name of the column containing the judges' IDs in df. decisionT_col -- String, the name of the column containing the judges' decisions resultY_col -- String, the name of the column containing the realization modelProbS_col -- String, the name of the column containing the probability scores from the black-box model B. accRateR_col -- String, the name of the column containing the judges' acceptance rates r -- Float between 0 and 1, the given acceptance rate. Returns: -------- (1) The estimated failure rate at acceptance rate r. ''' # Get ID of the most lenient judge. most_lenient_ID_q = df[judgeIDJ_col].loc[df[accRateR_col].idxmax()] # Subset. "D_q is the set of all observations judged by q." D_q = df[df[judgeIDJ_col] == most_lenient_ID_q].copy() ##### Agreement rate computation begins ####### max_leniency = max(df[accRateR_col]) if max_leniency < r: return np.nan, np.nan # Sort D_q # "Observations deemed as high risk by B are at the top of this list" D_sort_q = D_q.sort_values(by=modelProbS_col, ascending=False) # The agreement rate is now computed as the percentage of all the positive # decisions given by q in the 1-r % of the most dangerous subjects. all_negatives = np.sum(D_sort_q[decisionT_col] == 0) n_most_dangerous = int(round(D_q.shape[0] * (1-max_leniency))) agreed_decisions = np.sum(D_sort_q[decisionT_col][0:n_most_dangerous] == 0) print("The agreement rate of contraction is:", agreed_decisions/all_negatives) ##### Agreement rate computation ends ####### # All observations of R_q have observed outcome labels. # "R_q is the set of observations in D_q with observed outcome labels." R_q = D_q[D_q[decisionT_col] == 1].copy() # Sort observations in R_q in descending order of confidence scores S and # assign to R_sort_q. # "Observations deemed as high risk by B are at the top of this list" R_sort_q = R_q.sort_values(by=modelProbS_col, ascending=False) number_to_remove = int( round((1.0 - r) * D_q.shape[0] - (D_q.shape[0] - R_q.shape[0]))) # "R_B is the list of observations assigned to t = 1 by B" R_B = R_sort_q[number_to_remove:R_sort_q.shape[0]] return np.sum(R_B[resultY_col] == 0) / D_q.shape[0], D_q.shape[0] # ### Evaluators def trueEvaluationEvaluator(df, featureX_col, decisionT_col, resultY_col, r): df.sort_values(by='B_prob_0_model', inplace=True, ascending=True) to_release = int(round(df.shape[0] * r)) failed = df[resultY_col][0:to_release] == 0 return np.sum(failed) / df.shape[0], df.shape[0] def labeledOutcomesEvaluator(df, featureX_col, decisionT_col, resultY_col, r, adjusted=False): df_observed = df.loc[df[decisionT_col] == 1, :] df_observed = df_observed.sort_values(by='B_prob_0_model', inplace=False, ascending=True) to_release = int(round(df_observed.shape[0] * r)) failed = df_observed[resultY_col][0:to_release] == 0 if adjusted: return np.mean(failed), df.shape[0] return np.sum(failed) / df.shape[0], df.shape[0] def humanEvaluationEvaluator(df, judgeIDJ_col, decisionT_col, resultY_col, accRateR_col, r): # Get judges with correct leniency as list is_correct_leniency = df[accRateR_col].round(1) == r # No judges with correct leniency if np.sum(is_correct_leniency) == 0: return np.nan, np.nan correct_leniency_list = df.loc[is_correct_leniency, judgeIDJ_col] # Released are the people they judged and released, T = 1 released = df[df[judgeIDJ_col].isin(correct_leniency_list) & (df[decisionT_col] == 1)] failed = released[resultY_col] == 0 # Get their failure rate, aka ratio of reoffenders to number of people judged in total return np.sum(failed) / correct_leniency_list.shape[0], correct_leniency_list.shape[0] def monteCarloEvaluator(df, featureX_col, decisionT_col, resultY_col, accRateR_col, r, mu_X=0, mu_Z=0, beta_X=1, beta_Z=1, sigma_X=1, sigma_Z=1): # Compute the predicted/assumed decision bounds for all the judges. q_r = inverseCumulative(x=df[accRateR_col], mu=mu_X + mu_Z, sigma=np.sqrt((beta_X * sigma_X)**2 + (beta_Z * sigma_Z)**2)) df = df.assign(bounds=logit(q_r) - df[featureX_col]) # Compute the expectation of Z when it is known to come from truncated # Gaussian. alphabeta = (df.bounds - mu_Z) / (sigma_Z) Z_ = scs.norm.sf(alphabeta, loc=mu_Z, scale=sigma_Z) # 1 - cdf(ab) # E(Z | Z > a). Expectation of Z if negative decision. exp_lower_trunc = mu_Z + (sigma_Z * scs.norm.pdf(alphabeta)) / Z_ # E(Z | Z < b). Expectation of Z if positive decision. exp_upper_trunc = mu_Z - ( sigma_Z * scs.norm.pdf(alphabeta)) / scs.norm.cdf(alphabeta) exp_Z = (1 - df[decisionT_col] ) * exp_lower_trunc + df[decisionT_col] * exp_upper_trunc # Attach the predicted probability for Y=0 to data. df = df.assign(predicted_Y=inverseLogit(df[featureX_col] + exp_Z)) # Predictions drawn from binomial. predictions = npr.binomial(n=1, p=1 - df.predicted_Y, size=df.shape[0]) df[resultY_col] = np.where(df[decisionT_col] == 0, predictions, df[resultY_col]) df.sort_values(by='B_prob_0_model', inplace=True, ascending=True) to_release = int(round(df.shape[0] * r)) failed = df[resultY_col][0:to_release] == 0 return np.sum(failed) / df.shape[0], df.shape[0] def drawDiagnostics(title, save_name, f_rates, titles): cols = 2 rows = np.ceil(len(f_rates) / cols) plt.figure(figsize=(16, 4.5*rows+1)) ax = plt.subplot(rows, cols, 1) x_ax = np.arange(1, 9, 1) / 10 plt.boxplot(f_rates[0], labels=x_ax) plt.title(titles[0]) plt.xlabel('Acceptance rate') plt.ylabel('Failure rate') plt.grid() for i in range(len(f_rates)): plt.subplot(rows, cols, i + 1, sharey=ax) plt.boxplot(f_rates[i], labels=x_ax) plt.title(titles[i]) plt.xlabel('Acceptance rate') plt.ylabel('Failure rate') plt.grid() plt.tight_layout() plt.subplots_adjust(top=0.85) plt.suptitle(title, y=0.96, weight='bold') plt.savefig(save_name + '_diagnostic_plot') plt.show() def perfComp(dgModule, deciderModule, title, save_name, model_type=None, fit_with_Z=False, nIter=10): '''Performance comparison, a.k.a the main figure. Parameters: ----------- dgModule: Module creating the data decisderModule: Module assigning the decisions title: Title for the diagnostic figure save_name: name for saving model_type: what kind of model should be fitted. Logistic regression ("lr"), random forest ("rf") or fully random ("fully_random") fit_with_Z: should the predictive model be fit with Z (the latent variable) nIter: Number of train-test splits to be performed. ''' failure_rates = np.zeros((8, 4)) failure_stds = np.zeros((8, 4)) f_rate_true = np.zeros((nIter, 8)) f_rate_label = np.zeros((nIter, 8)) f_rate_cont = np.zeros((nIter, 8)) f_rate_cf = np.zeros((nIter, 8)) # Create data df = dgModule() # Assign decicions df_labeled, df_unlabeled = deciderModule(df) for i in range(nIter): # Split data train, test_labeled = train_test_split(df_labeled, test_size=0.5) # Assign same observations to unlabeled dat test_unlabeled = df_unlabeled.iloc[test_labeled.index.values] # Train model if fit_with_Z: B_model, predictions = fitPredictiveModel( train.loc[train['decision_T'] == 1, ['X', 'Z']], train.loc[train['decision_T'] == 1, 'result_Y'], test_labeled[['X', 'Z']], 0, model_type=model_type) else: B_model, predictions = fitPredictiveModel( train.loc[train['decision_T'] == 1, 'X'], train.loc[train['decision_T'] == 1, 'result_Y'], test_labeled['X'], 0, model_type=model_type) # Attach predictions to data test_labeled = test_labeled.assign(B_prob_0_model=predictions) test_unlabeled = test_unlabeled.assign(B_prob_0_model=predictions) test_labeled.sort_values(by='B_prob_0_model', inplace=True, ascending=True) # Assign group IDs kk_array = pd.qcut(test_labeled['B_prob_0_model'], group_amount, labels=False) # Find observed values observed = test_labeled['decision_T'] == 1 # Assign data to the model ############################### # Change this assignment if you want to use predictions as input to the # model. ############################### dat = dict(D=1, N_obs=np.sum(observed), N_cens=np.sum(~observed), K=group_amount, sigma_tau=sigma_tau, M=len(set(df_unlabeled['judgeID_J'])), jj_obs=test_labeled.loc[observed, 'judgeID_J']+1, jj_cens=test_labeled.loc[~observed, 'judgeID_J']+1, kk_obs=kk_array[observed]+1, kk_cens=kk_array[~observed]+1, dec_obs=test_labeled.loc[observed, 'decision_T'], dec_cens=test_labeled.loc[~observed, 'decision_T'], X_obs=test_labeled.loc[observed, 'X'].values.reshape(-1,1), X_cens=test_labeled.loc[~observed, 'X'].values.reshape(-1,1), y_obs=test_labeled.loc[observed, 'result_Y'].astype(int)) # Do the sampling - Change this if you wish to use other methods. fit = sm.sampling(data=dat, chains=5, iter=5000, control = dict(adapt_delta=0.9)) pars = fit.extract() plt.figure(figsize=(15,30)) fit.plot(); plt.savefig(save_name + '_stan_convergence_diagnostic_plot_' + str(i)) plt.show() plt.close('all') gc.collect() print(fit, file=open(save_name + '_stan_fit_diagnostics_' + str(i) + '.txt', 'w')) # Bayes # Format matrix, each row is a sample of the posterior # columns are observations y_imp = np.ones((pars['y_est'].shape[0], test_labeled.shape[0])) # Impute the unobserved with the estimated # Reverse the binary coding to compute failures as sum. 0 == 0 = 1 y_imp[:, ~observed] = 1 - pars['y_est'] # Remove the variables to conserve memory del(pars) del(fit) gc.collect() # Impute the observed y_imp[:, observed] = 1 - test_labeled.loc[observed, 'result_Y'] for r in range(1, 9): to_release = np.round(test_labeled.shape[0] * (r / 10)).astype(int) failed = np.sum(y_imp[:, 0:to_release], axis=1) f_rate_cf[i, r - 1] = np.mean(failed / test_labeled.shape[0]) print(".", end="") # True evaluation f_rate_true[i, r - 1], N = trueEvaluationEvaluator(test_unlabeled, 'X', 'decision_T', 'result_Y', r / 10) # Labeled outcomes only f_rate_label[i, r - 1], N = labeledOutcomesEvaluator(test_labeled, 'X', 'decision_T', 'result_Y', r / 10) # Contraction f_rate_cont[i, r - 1], N = contraction(test_labeled, 'judgeID_J', 'decision_T', 'result_Y', 'B_prob_0_model', 'acceptanceRate_R', r / 10) failure_rates[:, 0] = np.mean(f_rate_true, axis=0) failure_rates[:, 1] = np.mean(f_rate_label, axis=0) failure_rates[:, 2] = np.mean(f_rate_cont, axis=0) failure_rates[:, 3] = np.mean(f_rate_cf, axis=0) failure_stds[:, 0] = np.std(f_rate_true, axis=0) failure_stds[:, 1] = np.std(f_rate_label, axis=0) failure_stds[:, 2] = np.std(f_rate_cont, axis=0) failure_stds[:, 3] = np.std(f_rate_cf, axis=0) x_ax = np.arange(0.1, 0.9, 0.1) labels = ['True Evaluation', 'Labeled outcomes', 'Contraction', 'Counterfactuals'] colours = ['g', 'magenta', 'b', 'r'] line_styles = ['--', ':', '-.', '-'] for i in range(failure_rates.shape[1]): plt.errorbar(x_ax, failure_rates[:, i], label=labels[i], c=colours[i], linestyle=line_styles[i], yerr=failure_stds[:, i]) #plt.title('Failure rate vs. Acceptance rate') plt.xlabel('Acceptance rate') plt.ylabel('Failure rate') plt.legend() plt.grid() plt.savefig(save_name + '_all') plt.show() print("\nFailure rates:") print(np.array2string(failure_rates, formatter={'float_kind':lambda x: "%.5f" % x})) print("\nStandard deviations:") print(np.array2string(failure_stds, formatter={'float_kind':lambda x: "%.5f" % x})) print("\nMean absolute errors:") for i in range(1, failure_rates.shape[1]): print( labels[i].ljust(len(max(labels, key=len))), np.round( np.mean(np.abs(failure_rates[:, 0] - failure_rates[:, i])), 5)) drawDiagnostics(title=title, save_name=save_name, f_rates=[f_rate_true, f_rate_label, f_rate_cont, f_rate_cf], titles=labels) # Compile stan model sm = pystan.StanModel(file=stan_code_file_name) if which == 1: print("\nWith unobservables (Bernoullian outcome + independent decisions)") print("Decision-maker in the data and model: random and random") dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim) decider = lambda x: randomDecider(x, nJudges_M=M_sim, use_acceptance_rates=True) perfComp( dg, lambda x: decider(x), "Fluctuation of failure rate estimates across iterations\n" + "Bernoulli + independent decisions, without unobservables", figure_path + "sl_bernoulli_independent_without_Z", model_type="fully_random", fit_with_Z=False ) plt.close('all') gc.collect() if which == 2: print("\nWith unobservables (Bernoullian outcome + independent decisions)") print("Decision-maker in the data and model: random and y ~ x") dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim) decider = lambda x: randomDecider(x, nJudges_M=M_sim, use_acceptance_rates=True) perfComp( dg, lambda x: decider(x), "Fluctuation of failure rate estimates across iterations\n" + "Bernoulli + independent decisions, without unobservables", figure_path + "sl_bernoulli_independent_without_Z", model_type="lr", fit_with_Z=False ) if which == 3: print("\nWith unobservables (Bernoullian outcome + independent decisions)") print("Decision-maker in the data and model: random and y ~ x + z") dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim) decider = lambda x: randomDecider(x, nJudges_M=M_sim, use_acceptance_rates=True) perfComp( dg, lambda x: decider(x), "Fluctuation of failure rate estimates across iterations\n" + "Bernoulli + independent decisions, without unobservables", figure_path + "sl_bernoulli_independent_without_Z", model_type="lr", fit_with_Z=True ) if which == 4: print("\nWith unobservables (Bernoullian outcome + independent decisions)") print("Decision-maker in the data and model: y ~ x and random") dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim) decider = lambda x: quantileDecider( x, featureX_col="X", featureZ_col=None, nJudges_M=M_sim, beta_X=1, beta_Z=1) perfComp( dg, lambda x: decider(x), "Fluctuation of failure rate estimates across iterations\n" + "Bernoulli + independent decisions, without unobservables", figure_path + "sl_bernoulli_independent_without_Z", model_type="fully_random", fit_with_Z=False ) if which == 5: print("\nWith unobservables (Bernoullian outcome + independent decisions)") print("Decision-maker in the data and model: y ~ x and y ~ x") dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim) decider = lambda x: quantileDecider( x, featureX_col="X", featureZ_col=None, nJudges_M=M_sim, beta_X=1, beta_Z=1) perfComp( dg, lambda x: decider(x), "Fluctuation of failure rate estimates across iterations\n" + "Bernoulli + independent decisions, without unobservables", figure_path + "sl_bernoulli_independent_without_Z", model_type="lr", fit_with_Z=False ) if which == 6: print("\nWith unobservables (Bernoullian outcome + independent decisions)") print("Decision-maker in the data and model: y ~ x and y ~ x + z") dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim) decider = lambda x: quantileDecider( x, featureX_col="X", featureZ_col=None, nJudges_M=M_sim, beta_X=1, beta_Z=1) perfComp( dg, lambda x: decider(x), "Fluctuation of failure rate estimates across iterations\n" + "Bernoulli + independent decisions, without unobservables", figure_path + "sl_bernoulli_independent_without_Z", model_type="lr", fit_with_Z=True ) if which == 7: print("\nWith unobservables (Bernoullian outcome + independent decisions)") print("Decision-maker in the data and model: y ~ x + z and random") dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim) decider = lambda x: quantileDecider( x, featureX_col="X", featureZ_col='Z', nJudges_M=M_sim, beta_X=1, beta_Z=1) perfComp( dg, lambda x: decider(x), "Fluctuation of failure rate estimates across iterations\n" + "Bernoulli + independent decisions, without unobservables", figure_path + "sl_bernoulli_independent_without_Z", model_type="fully_random", fit_with_Z=False ) if which == 8: print("\nWith unobservables (Bernoullian outcome + independent decisions)") print("Decision-maker in the data and model: y ~ x + z and y ~ x") dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim) decider = lambda x: quantileDecider( x, featureX_col="X", featureZ_col='Z', nJudges_M=M_sim, beta_X=1, beta_Z=1) perfComp( dg, lambda x: decider(x), "Fluctuation of failure rate estimates across iterations\n" + "Bernoulli + independent decisions, without unobservables", figure_path + "sl_bernoulli_independent_without_Z", model_type="lr", fit_with_Z=False ) if which == 9: print("\nWith unobservables (Bernoullian outcome + independent decisions)") print("Decision-maker in the data and model: y ~ x + z and y ~ x + z") dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim) decider = lambda x: quantileDecider( x, featureX_col="X", featureZ_col='Z', nJudges_M=M_sim, beta_X=1, beta_Z=1) perfComp( dg, lambda x: decider(x), "Fluctuation of failure rate estimates across iterations\n" + "Bernoulli + independent decisions, without unobservables", figure_path + "sl_bernoulli_independent_without_Z", model_type="lr", fit_with_Z=True ) if which == 10: print("\nWith unobservables (Bernoullian outcome + independent decisions)") print("Decision-maker in the data and model: biased and random") dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim) decider = lambda x: biasDecider( x, featureX_col="X", featureZ_col='Z', nJudges_M=M_sim, beta_X=1, beta_Z=1) perfComp( dg, lambda x: decider(x), "Fluctuation of failure rate estimates across iterations\n" + "Bernoulli + independent decisions, without unobservables", figure_path + "sl_bernoulli_independent_without_Z", model_type="fully_random", fit_with_Z=False ) if which == 11: print("\nWith unobservables (Bernoullian outcome + independent decisions)") print("Decision-maker in the data and model: biased and y ~ x") dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim) decider = lambda x: biasDecider( x, featureX_col="X", featureZ_col='Z', nJudges_M=M_sim, beta_X=1, beta_Z=1) perfComp( dg, lambda x: decider(x), "Fluctuation of failure rate estimates across iterations\n" + "Bernoulli + independent decisions, without unobservables", figure_path + "sl_bernoulli_independent_without_Z", model_type="lr", fit_with_Z=False ) if which == 12: print("\nWith unobservables (Bernoullian outcome + independent decisions)") print("Decision-maker in the data and model: biased and y ~ x + z") dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim) decider = lambda x: biasDecider( x, featureX_col="X", featureZ_col='Z', nJudges_M=M_sim, beta_X=1, beta_Z=1) perfComp( dg, lambda x: decider(x), "Fluctuation of failure rate estimates across iterations\n" + "Bernoulli + independent decisions, without unobservables", figure_path + "sl_bernoulli_independent_without_Z", model_type="lr", fit_with_Z=True ) if which == 13: print("\nWith unobservables (Bernoullian outcome + independent decisions)") print("Fewer subjects per decision-maker, from 500 to 100. Now N_total =", N_sim / 5) dg = lambda: bernoulliDGWithUnobservables(N_total=int(N_sim / 5)) decider = lambda x: quantileDecider( x, featureX_col="X", featureZ_col=None, nJudges_M=M_sim, beta_X=1, beta_Z=1) perfComp( dg, lambda x: decider(x), "Fluctuation of failure rate estimates across iterations\n" + "Bernoulli + independent decisions, without unobservables", figure_path + "sl_bernoulli_independent_without_Z", model_type="lr", fit_with_Z=True ) if which == 14: print("\nWith unobservables (Bernoullian outcome + independent decisions)") print("Now R ~ Uniform(0.1, 0.4)") dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim) decider = lambda x: quantileDecider(x, featureX_col="X", featureZ_col=None, nJudges_M=M_sim, beta_X=1, beta_Z=1, leniency_upper_limit=0.4) perfComp( dg, lambda x: decider(x), "Fluctuation of failure rate estimates across iterations\n" + "Bernoulli + independent decisions, without unobservables", figure_path + "sl_bernoulli_independent_without_Z", model_type="lr", fit_with_Z=True )