Newer
Older
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Project name: Potential outcomes in model evaluation
# Description: This script creates the figures and results used
# in synthetic data experiments.
#
# Parameters:
# -----------
# (1) figure_path : file location for saving the created figures.
# (2) N_sim : Size of simulated data set.
# (3) M_sim : Number of judges in simulated data,
# N_sim must be divisible by M_sim!
# (4) which : Which data + outcome analysis should be performed. Ranges from
# 1 to 14. See at the end of the script which figure is which.
# Separate execution preferred for keeping runtime and memory decent.
# (5) group_amount : How many groups if non-linear model is used.
# (6) stan_code_file_name : Name of file containing the stan model code.
# (7) sigma_tau : Values of prior variance.
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#
'''
# Refer to the `notes.tex` file for explanations about the modular framework.
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as scs
import scipy.special as ssp
import numpy.random as npr
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import pystan
import gc
plt.switch_backend('agg')
import sys
# figure storage name
figure_path = sys.argv[1]
# Size of simulated data set
N_sim = int(sys.argv[2])
# Number of judges in simulated data, N_sim must be divisible by M_sim!
M_sim = int(sys.argv[3])
# Which data + outcome generation should be performed.
which = int(sys.argv[4])
# How many groups if jung model is used
group_amount = int(sys.argv[5])
# Name of stan model code file
stan_code_file_name = sys.argv[6]
# Variance prior
sigma_tau = float(sys.argv[7])
# Settings
plt.rcParams.update({'font.size': 16})
plt.rcParams.update({'figure.figsize': (10, 6)})
print("These results have been obtained with the following settings:")
print("Number of observations in the simulated data:", N_sim)
print("Number of judges in the simulated data:", M_sim)
print("Number of groups:", group_amount)
print("Prior for the variances:", sigma_tau)
# Basic functions
return 1.0 / (1.0 + np.exp(-1.0 * x))
def logit(x):
return np.log(x) - np.log(1.0 - x)
def inverseCumulative(x, mu, sigma):
'''Compute the inverse of the cumulative distribution of logit-normal
distribution at x with parameters mu and sigma (mean and st.dev.).'''
return inverseLogit(ssp.erfinv(2 * x - 1) * np.sqrt(2 * sigma**2) - mu)
def standardError(p, n):
denominator = p * (1 - p)
return np.sqrt(denominator / n)
# ## Data generation modules
def bernoulliDGWithoutUnobservables(N_total=50000):
'''Generates data | Variables: X, Y | Outcome from Bernoulli'''
df = pd.DataFrame()
# Sample feature X from standard Gaussian distribution, N(0, 1).
df = df.assign(X=npr.normal(size=N_total))
# Calculate P(Y=0|X=x) = 1 / (1 + exp(-X)) = inverseLogit(X)
df = df.assign(probabilities_Y=inverseLogit(df.X))
# Draw Y ~ Bernoulli(1 - inverseLogit(X))
# Note: P(Y=1|X=x) = 1 - P(Y=0|X=x) = 1 - inverseLogit(X)
results = npr.binomial(n=1, p=1 - df.probabilities_Y, size=N_total)
df = df.assign(result_Y=results)
return df
def thresholdDGWithUnobservables(N_total=50000,
beta_X=1.0,
beta_Z=1.0,
beta_W=0.2):
'''Generates data | Variables: X, Z, W, Y | Outcome by threshold'''
df = pd.DataFrame()
# Sample the variables from standard Gaussian distributions.
df = df.assign(X=npr.normal(size=N_total))
df = df.assign(Z=npr.normal(size=N_total))
df = df.assign(W=npr.normal(size=N_total))
# Calculate P(Y=0|X, Z, W)
probabilities_Y = inverseLogit(beta_X * df.X + beta_Z * df.Z + beta_W * df.W)
df = df.assign(probabilities_Y=probabilities_Y)
# Result is 0 if P(Y = 0| X = x; Z = z; W = w) >= 0.5 , 1 otherwise
df = df.assign(result_Y=np.where(df.probabilities_Y >= 0.5, 0, 1))
return df
def bernoulliDGWithUnobservables(N_total=50000,
beta_X=1.0,
beta_Z=1.0,
beta_W=0.2):
'''Generates data | Variables: X, Z, W, Y | Outcome from Bernoulli'''
df = pd.DataFrame()
# Sample feature X, Z and W from standard Gaussian distribution, N(0, 1).
df = df.assign(X=npr.normal(size=N_total))
df = df.assign(Z=npr.normal(size=N_total))
df = df.assign(W=npr.normal(size=N_total))
# Calculate P(Y=0|X=x) = 1 / (1 + exp(-X)) = inverseLogit(X)
probabilities_Y = inverseLogit(beta_X * df.X + beta_Z * df.Z + beta_W * df.W)
df = df.assign(probabilities_Y=probabilities_Y)
# Draw Y from Bernoulli distribution
results = npr.binomial(n=1, p=1 - df.probabilities_Y, size=N_total)
df = df.assign(result_Y=results)
return df
# ## Decider modules
def humanDeciderLakkaraju(df,
featureX_col,
featureZ_col=None,
nJudges_M=100,
beta_X=1,
beta_Z=1,
add_epsilon=True):
'''Decider module | Non-independent batch decisions.'''
# Decider as specified by Lakkaraju et al.
# Assert that every judge will have the same number of subjects.
assert df.shape[0] % nJudges_M == 0, "Can't assign subjets evenly!"
# Compute the number of subjects allocated for each judge.
nSubjects_N = int(df.shape[0] / nJudges_M)
# Assign judge IDs as running numbering from 0 to nJudges_M - 1
df = df.assign(judgeID_J=np.repeat(range(0, nJudges_M), nSubjects_N))
# Sample acceptance rates uniformly from a closed interval
# from 0.1 to 0.9 and round to tenth decimal place.
acceptance_rates = npr.uniform(.1, .9, nJudges_M)
acceptance_rates = np.round(acceptance_rates, 10)
# Replicate the rates so they can be attached to the corresponding judge ID.
df = df.assign(acceptanceRate_R=np.repeat(acceptance_rates, nSubjects_N))
if add_epsilon:
epsilon = np.sqrt(0.1) * npr.normal(size=df.shape[0])
else:
epsilon = 0
# Compute probability for jail decision
probabilities_T = inverseLogit(beta_X * df[featureX_col] + epsilon)
probabilities_T = inverseLogit(beta_X * df[featureX_col] +
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
beta_Z * df[featureZ_col] + epsilon)
df = df.assign(probabilities_T=probabilities_T)
# Sort by judges then probabilities in decreasing order
# Most dangerous for each judge are at the top.
df.sort_values(by=["judgeID_J", "probabilities_T"],
ascending=False,
inplace=True)
# Iterate over the data. Subject will be given a negative decision
# if they are in the top (1-r)*100% of the individuals the judge will judge.
# I.e. if their within-judge-index is under 1 - acceptance threshold times
# the number of subjects assigned to each judge they will receive a
# negative decision.
df.reset_index(drop=True, inplace=True)
df['decision_T'] = np.where((df.index.values % nSubjects_N) <
((1 - df['acceptanceRate_R']) * nSubjects_N),
0, 1)
df_labeled = df.copy()
# Hide unobserved
df_labeled.loc[df.decision_T == 0, 'result_Y'] = np.nan
return df_labeled, df
def bernoulliDecider(df,
featureX_col,
featureZ_col=None,
nJudges_M=100,
beta_X=1,
beta_Z=1,
add_epsilon=True):
'''Use X and Z to make a decision with probability
P(T=0|X, Z)=inverseLogit(beta_X*X+beta_Z*Z).'''
# No leniencies involved
# Assert that every judge will have the same number of subjects.
assert df.shape[0] % nJudges_M == 0, "Can't assign subjets evenly!"
# Compute the number of subjects allocated for each judge.
nSubjects_N = int(df.shape[0] / nJudges_M)
# Assign judge IDs as running numbering from 0 to nJudges_M - 1
df = df.assign(judgeID_J=np.repeat(range(0, nJudges_M), nSubjects_N))
if add_epsilon:
epsilon = np.sqrt(0.1) * npr.normal(size=df.shape[0])
else:
epsilon = 0
if featureZ_col is None:
probabilities_T = inverseLogit(beta_X * df[featureX_col] + epsilon)
probabilities_T = inverseLogit(beta_X * df[featureX_col] +
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
beta_Z * df[featureZ_col] + epsilon)
df = df.assign(probabilities_T=probabilities_T)
# Draw T from Bernoulli distribution
decisions = npr.binomial(n=1, p=1 - df.probabilities_T, size=df.shape[0])
df = df.assign(decision_T=decisions)
# Calculate the acceptance rates.
acceptance_rates = df.groupby('judgeID_J').mean().decision_T.values
# Replicate the rates so they can be attached to the corresponding judge ID.
df = df.assign(acceptanceRate_R=np.repeat(acceptance_rates, nSubjects_N))
df_labeled = df.copy()
df_labeled.loc[df.decision_T == 0, 'result_Y'] = np.nan
return df_labeled, df
def quantileDecider(df,
featureX_col,
featureZ_col=None,
nJudges_M=100,
beta_X=1,
beta_Z=1,
add_epsilon=True,
leniency_upper_limit=0.9):
'''Assign decisions by the value of inverse cumulative distribution function
of the logit-normal distribution at leniency r.'''
# Assert that every judge will have the same number of subjects.
assert df.shape[0] % nJudges_M == 0, "Can't assign subjets evenly!"
# Compute the number of subjects allocated for each judge.
nSubjects_N = int(df.shape[0] / nJudges_M)
# Assign judge IDs as running numbering from 0 to nJudges_M - 1
df = df.assign(judgeID_J=np.repeat(range(0, nJudges_M), nSubjects_N))
# Sample acceptance rates uniformly from a closed interval
# from 0.1 to 0.9 and round to tenth decimal place.
acceptance_rates = npr.uniform(.1, leniency_upper_limit, nJudges_M)
acceptance_rates = np.round(acceptance_rates, 10)
# Replicate the rates so they can be attached to the corresponding judge ID.
df = df.assign(acceptanceRate_R=np.repeat(acceptance_rates, nSubjects_N))
if add_epsilon:
epsilon = np.sqrt(0.1) * npr.normal(size=df.shape[0])
else:
epsilon = 0
if featureZ_col is None:
probabilities_T = inverseLogit(beta_X * df[featureX_col] + epsilon)
# Compute the bounds straight from the inverse cumulative.
# Assuming X is N(0, 1) so Var(bX*X)=bX**2*Var(X)=bX**2.
df = df.assign(bounds=inverseCumulative(
x=df.acceptanceRate_R, mu=0, sigma=np.sqrt(beta_X**2)))
else:
probabilities_T = inverseLogit(beta_X * df[featureX_col] +
beta_Z * df[featureZ_col] + epsilon)
# Compute the bounds straight from the inverse cumulative.
# Assuming X and Z are i.i.d standard Gaussians with variance 1.
# Thus Var(bx*X+bZ*Z)= bX**2*Var(X)+bZ**2*Var(Z).
df = df.assign(bounds=inverseCumulative(
x=df.acceptanceRate_R, mu=0, sigma=np.sqrt(beta_X**2 + beta_Z**2)))
df = df.assign(probabilities_T=probabilities_T)
# Assign negative decision if the predicted probability (probabilities_T) is
# over the judge's threshold (bounds).
df = df.assign(decision_T=np.where(df.probabilities_T >= df.bounds, 0, 1))
# Calculate the observed acceptance rates.
acceptance_rates = df.groupby('judgeID_J').mean().decision_T.values
# Replicate the rates so they can be attached to the corresponding judge ID.
df.acceptanceRate_R = np.repeat(acceptance_rates, nSubjects_N)
df_labeled = df.copy()
df_labeled.loc[df.decision_T == 0, 'result_Y'] = np.nan
return df_labeled, df
def randomDecider(df, nJudges_M=100, use_acceptance_rates=False):
'''Doesn't use any information about X and Z to make decisions.
If use_acceptance_rates is False (default) then all decisions are positive
with probability 0.5. If True, probabilities will be sampled from
U(0.1, 0.9) and rounded to tenth decimal place.'''
# Assert that every judge will have the same number of subjects.
assert df.shape[0] % nJudges_M == 0, "Can't assign subjets evenly!"
# Compute the number of subjects allocated for each judge.
nSubjects_N = int(df.shape[0] / nJudges_M)
# Assign judge IDs as running numbering from 0 to nJudges_M - 1
df = df.assign(judgeID_J=np.repeat(range(0, nJudges_M), nSubjects_N))
if use_acceptance_rates:
# Sample acceptance rates uniformly from a closed interval
# from 0.1 to 0.9 and round to tenth decimal place.
# 26JUL2019: Fix one leniency to 0.9 so that contraction can compute all
# values.
acceptance_rates = np.append(npr.uniform(.1, .9, nJudges_M - 1), 0.9)
acceptance_rates = np.round(acceptance_rates, 10)
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
else:
# No real leniency here -> set to 0.5.
acceptance_rates = np.ones(nJudges_M) * 0.5
# Replicate the rates so they can be attached to the corresponding judge ID.
df = df.assign(acceptanceRate_R=np.repeat(acceptance_rates, nSubjects_N))
df = df.assign(
decision_T=npr.binomial(n=1, p=df.acceptanceRate_R, size=df.shape[0]))
df_labeled = df.copy()
df_labeled.loc[df.decision_T == 0, 'result_Y'] = np.nan
return df_labeled, df
def biasDecider(df,
featureX_col,
featureZ_col=None,
nJudges_M=100,
beta_X=1,
beta_Z=1,
add_epsilon=True):
'''
Biased decider: If X > 1, then X <- X * 0.75. People with high X,
get more positive decisions as they should. And if -2 < X -1, then
X <- X + 0.5. People with X in [-2, 1], get less positive decisions
as they should.
'''
# If X > 1, then X <- X * 0.75. People with high X, get more positive
# decisions as they should
df = df.assign(biased_X=np.where(df[featureX_col] > 1, df[featureX_col] *
0.75, df[featureX_col]))
# If -2 < X -1, then X <- X + 0.5. People with X in [-2, 1], get less
# positive decisions as they should
df.biased_X = np.where((df.biased_X > -2) & (df.biased_X < -1) == 1,
df.biased_X + 0.5, df.biased_X)
# Assert that every judge will have the same number of subjects.
assert df.shape[0] % nJudges_M == 0, "Can't assign subjets evenly!"
# Use quantile decider, but judge by the biased X.
df_labeled, df = quantileDecider(df,
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
featureX_col='biased_X',
featureZ_col=featureZ_col,
nJudges_M=nJudges_M,
beta_X=beta_X,
beta_Z=beta_Z,
add_epsilon=add_epsilon)
return df_labeled, df
# ## Evaluator modules
# ### Convenience functions
def fitPredictiveModel(x_train, y_train, x_test, class_value, model_type=None):
'''
Fit a predictive model (default logistic regression) with given training
instances and return probabilities for test instances to obtain a given
class label.
Arguments:
----------
x_train -- x values of training instances
y_train -- y values of training instances
x_test -- x values of test instances
class_value -- class label for which the probabilities are counted for.
model_type -- type of model to be fitted.
Returns:
--------
(1) Trained predictive model
(2) Probabilities for given test inputs for given class.
'''
if model_type is None or model_type in ["logistic_regression", "lr"]:
# Instantiate the model (using the default parameters)
logreg = LogisticRegression(solver='lbfgs')
# Check shape and fit the model.
if x_train.ndim == 1:
logreg = logreg.fit(x_train.values.reshape(-1, 1), y_train)
else:
logreg = logreg.fit(x_train, y_train)
label_probs_logreg = getProbabilityForClass(x_test, logreg,
class_value)
return logreg, label_probs_logreg
elif model_type in ["random_forest", "rf"]:
# Instantiate the model
forest = RandomForestClassifier(n_estimators=100, max_depth=3)
# Check shape and fit the model.
if x_train.ndim == 1:
forest = forest.fit(x_train.values.reshape(-1, 1), y_train)
else:
forest = forest.fit(x_train, y_train)
label_probs_forest = getProbabilityForClass(x_test, forest,
class_value)
return forest, label_probs_forest
elif model_type == "fully_random":
label_probs = np.ones_like(x_test) / 2
model_object = lambda x: 0.5
return model_object, label_probs
else:
raise ValueError("Invalid model_type!", model_type)
def getProbabilityForClass(x, model, class_value):
'''
Function (wrapper) for obtaining the probability of a class given x and a
predictive model.
Arguments:
-----------
x -- individual features, an array of shape (observations, features)
model -- a trained sklearn model. Predicts probabilities for given x.
Should accept input of shape (observations, features)
class_value -- the resulting class to predict (usually 0 or 1).
Returns:
--------
(1) The probabilities of given class label for each x.
'''
if x.ndim == 1:
# if x is vector, transform to column matrix.
f_values = model.predict_proba(np.array(x).reshape(-1, 1))
else:
f_values = model.predict_proba(x)
# Get correct column of predicted class, remove extra dimensions and return.
return f_values[:, model.classes_ == class_value].flatten()
# ### Contraction algorithm
#
# Below is an implementation of Lakkaraju's team's algorithm presented in
# [their paper](https://helka.finna.fi/PrimoRecord/pci.acm3098066). Relevant
# parameters to be passed to the function are presented in the description.
def contraction(df, judgeIDJ_col, decisionT_col, resultY_col, modelProbS_col,
accRateR_col, r):
'''
This is an implementation of the algorithm presented by Lakkaraju
et al. in their paper "The Selective Labels Problem: Evaluating
Algorithmic Predictions in the Presence of Unobservables" (2017).
Arguments:
----------
df -- The (Pandas) data frame containing the data, judge decisions,
judge IDs, results and probability scores.
judgeIDJ_col -- String, the name of the column containing the judges' IDs
in df.
decisionT_col -- String, the name of the column containing the judges' decisions
resultY_col -- String, the name of the column containing the realization
modelProbS_col -- String, the name of the column containing the probability
scores from the black-box model B.
accRateR_col -- String, the name of the column containing the judges'
acceptance rates
r -- Float between 0 and 1, the given acceptance rate.
Returns:
--------
(1) The estimated failure rate at acceptance rate r.
'''
# Get ID of the most lenient judge.
most_lenient_ID_q = df[judgeIDJ_col].loc[df[accRateR_col].idxmax()]
# Subset. "D_q is the set of all observations judged by q."
D_q = df[df[judgeIDJ_col] == most_lenient_ID_q].copy()
##### Agreement rate computation begins #######
max_leniency = max(df[accRateR_col])
if max_leniency < r:
return np.nan, np.nan
# Sort D_q
# "Observations deemed as high risk by B are at the top of this list"
D_sort_q = D_q.sort_values(by=modelProbS_col, ascending=False)
# The agreement rate is now computed as the percentage of all the positive
# decisions given by q in the 1-r % of the most dangerous subjects.
all_negatives = np.sum(D_sort_q[decisionT_col] == 0)
n_most_dangerous = int(round(D_q.shape[0] * (1-max_leniency)))
agreed_decisions = np.sum(D_sort_q[decisionT_col][0:n_most_dangerous] == 0)
print("The agreement rate of contraction is:", agreed_decisions/all_negatives)
##### Agreement rate computation ends #######
# All observations of R_q have observed outcome labels.
# "R_q is the set of observations in D_q with observed outcome labels."
R_q = D_q[D_q[decisionT_col] == 1].copy()
# Sort observations in R_q in descending order of confidence scores S and
# assign to R_sort_q.
# "Observations deemed as high risk by B are at the top of this list"
R_sort_q = R_q.sort_values(by=modelProbS_col, ascending=False)
number_to_remove = int(
round((1.0 - r) * D_q.shape[0] - (D_q.shape[0] - R_q.shape[0])))
# "R_B is the list of observations assigned to t = 1 by B"
R_B = R_sort_q[number_to_remove:R_sort_q.shape[0]]
return np.sum(R_B[resultY_col] == 0) / D_q.shape[0], D_q.shape[0]
# ### Evaluators
def trueEvaluationEvaluator(df, featureX_col, decisionT_col, resultY_col, r):
df.sort_values(by='B_prob_0_model', inplace=True, ascending=True)
to_release = int(round(df.shape[0] * r))
failed = df[resultY_col][0:to_release] == 0
return np.sum(failed) / df.shape[0], df.shape[0]
def labeledOutcomesEvaluator(df,
featureX_col,
decisionT_col,
resultY_col,
r,
adjusted=False):
df_observed = df.loc[df[decisionT_col] == 1, :]
df_observed = df_observed.sort_values(by='B_prob_0_model',
to_release = int(round(df_observed.shape[0] * r))
failed = df_observed[resultY_col][0:to_release] == 0
return np.mean(failed), df.shape[0]
return np.sum(failed) / df.shape[0], df.shape[0]
def humanEvaluationEvaluator(df, judgeIDJ_col, decisionT_col, resultY_col,
accRateR_col, r):
# Get judges with correct leniency as list
is_correct_leniency = df[accRateR_col].round(1) == r
# No judges with correct leniency
if np.sum(is_correct_leniency) == 0:
correct_leniency_list = df.loc[is_correct_leniency, judgeIDJ_col]
# Released are the people they judged and released, T = 1
released = df[df[judgeIDJ_col].isin(correct_leniency_list)
& (df[decisionT_col] == 1)]
failed = released[resultY_col] == 0
# Get their failure rate, aka ratio of reoffenders to number of people judged in total
return np.sum(failed) / correct_leniency_list.shape[0], correct_leniency_list.shape[0]
def monteCarloEvaluator(df,
featureX_col,
decisionT_col,
resultY_col,
accRateR_col,
r,
mu_X=0,
mu_Z=0,
beta_X=1,
beta_Z=1,
sigma_X=1,
sigma_Z=1):
# Compute the predicted/assumed decision bounds for all the judges.
q_r = inverseCumulative(x=df[accRateR_col],
mu=mu_X + mu_Z,
sigma=np.sqrt((beta_X * sigma_X)**2 +
(beta_Z * sigma_Z)**2))
df = df.assign(bounds=logit(q_r) - df[featureX_col])
# Compute the expectation of Z when it is known to come from truncated
# Gaussian.
alphabeta = (df.bounds - mu_Z) / (sigma_Z)
Z_ = scs.norm.sf(alphabeta, loc=mu_Z, scale=sigma_Z) # 1 - cdf(ab)
# E(Z | Z > a). Expectation of Z if negative decision.
exp_lower_trunc = mu_Z + (sigma_Z * scs.norm.pdf(alphabeta)) / Z_
# E(Z | Z < b). Expectation of Z if positive decision.
exp_upper_trunc = mu_Z - (
sigma_Z * scs.norm.pdf(alphabeta)) / scs.norm.cdf(alphabeta)
exp_Z = (1 - df[decisionT_col]
) * exp_lower_trunc + df[decisionT_col] * exp_upper_trunc
# Attach the predicted probability for Y=0 to data.
df = df.assign(predicted_Y=inverseLogit(df[featureX_col] + exp_Z))
# Predictions drawn from binomial.
predictions = npr.binomial(n=1, p=1 - df.predicted_Y, size=df.shape[0])
df[resultY_col] = np.where(df[decisionT_col] == 0, predictions,
df[resultY_col])
df.sort_values(by='B_prob_0_model', inplace=True, ascending=True)
to_release = int(round(df.shape[0] * r))
failed = df[resultY_col][0:to_release] == 0
return np.sum(failed) / df.shape[0], df.shape[0]
def drawDiagnostics(title, save_name, f_rates, titles):
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
cols = 2
rows = np.ceil(len(f_rates) / cols)
plt.figure(figsize=(16, 4.5*rows+1))
ax = plt.subplot(rows, cols, 1)
x_ax = np.arange(1, 9, 1) / 10
plt.boxplot(f_rates[0], labels=x_ax)
plt.title(titles[0])
plt.xlabel('Acceptance rate')
plt.ylabel('Failure rate')
plt.grid()
for i in range(len(f_rates)):
plt.subplot(rows, cols, i + 1, sharey=ax)
plt.boxplot(f_rates[i], labels=x_ax)
plt.title(titles[i])
plt.xlabel('Acceptance rate')
plt.ylabel('Failure rate')
plt.grid()
plt.tight_layout()
plt.subplots_adjust(top=0.85)
plt.savefig(save_name + '_diagnostic_plot')
def perfComp(dgModule, deciderModule, title, save_name, model_type=None,
fit_with_Z=False, nIter=10):
'''Performance comparison, a.k.a the main figure.
Parameters:
-----------
dgModule: Module creating the data
decisderModule: Module assigning the decisions
title: Title for the diagnostic figure
save_name: name for saving
model_type: what kind of model should be fitted. Logistic regression
("lr"), random forest ("rf") or fully random
("fully_random")
fit_with_Z: should the predictive model be fit with Z (the latent
variable)
nIter: Number of train-test splits to be performed.
'''
failure_rates = np.zeros((8, 4))
failure_stds = np.zeros((8, 4))
f_rate_true = np.zeros((nIter, 8))
f_rate_label = np.zeros((nIter, 8))
f_rate_cont = np.zeros((nIter, 8))
f_rate_cf = np.zeros((nIter, 8))
# Split data
train, test_labeled = train_test_split(df_labeled, test_size=0.5)
# Assign same observations to unlabeled dat
test_unlabeled = df_unlabeled.iloc[test_labeled.index.values]
# Train model
if fit_with_Z:
B_model, predictions = fitPredictiveModel(
train.loc[train['decision_T'] == 1, ['X', 'Z']],
train.loc[train['decision_T'] == 1, 'result_Y'], test_labeled[['X', 'Z']],
0, model_type=model_type)
else:
B_model, predictions = fitPredictiveModel(
train.loc[train['decision_T'] == 1, 'X'],
train.loc[train['decision_T'] == 1, 'result_Y'], test_labeled['X'],
0, model_type=model_type)
# Attach predictions to data
test_labeled = test_labeled.assign(B_prob_0_model=predictions)
test_unlabeled = test_unlabeled.assign(B_prob_0_model=predictions)
test_labeled.sort_values(by='B_prob_0_model', inplace=True, ascending=True)
# Assign group IDs
kk_array = pd.qcut(test_labeled['B_prob_0_model'], group_amount, labels=False)
# Find observed values
observed = test_labeled['decision_T'] == 1
###############################
# Change this assignment if you want to use predictions as input to the
# model.
###############################
dat = dict(D=1,
N_obs=np.sum(observed),
N_cens=np.sum(~observed),
K=group_amount,
sigma_tau=sigma_tau,
M=len(set(df_unlabeled['judgeID_J'])),
jj_obs=test_labeled.loc[observed, 'judgeID_J']+1,
jj_cens=test_labeled.loc[~observed, 'judgeID_J']+1,
kk_obs=kk_array[observed]+1,
kk_cens=kk_array[~observed]+1,
dec_obs=test_labeled.loc[observed, 'decision_T'],
dec_cens=test_labeled.loc[~observed, 'decision_T'],
X_obs=test_labeled.loc[observed, 'X'].values.reshape(-1,1),
X_cens=test_labeled.loc[~observed, 'X'].values.reshape(-1,1),
y_obs=test_labeled.loc[observed, 'result_Y'].astype(int))
# Do the sampling - Change this if you wish to use other methods.
fit = sm.sampling(data=dat, chains=5, iter=5000, control = dict(adapt_delta=0.9))
pars = fit.extract()
plt.figure(figsize=(15,30))
fit.plot();
plt.savefig(save_name + '_stan_convergence_diagnostic_plot_' + str(i))
plt.show()
plt.close('all')
gc.collect()
print(fit, file=open(save_name + '_stan_fit_diagnostics_' + str(i) + '.txt', 'w'))
# Bayes
# Format matrix, each row is a sample of the posterior
# columns are observations
y_imp = np.ones((pars['y_est'].shape[0], test_labeled.shape[0]))
# Impute the unobserved with the estimated
# Reverse the binary coding to compute failures as sum. 0 == 0 = 1
y_imp[:, ~observed] = 1 - pars['y_est']
# Remove the variables to conserve memory
del(pars)
del(fit)
gc.collect()
# Impute the observed
y_imp[:, observed] = 1 - test_labeled.loc[observed, 'result_Y']
for r in range(1, 9):
to_release = np.round(test_labeled.shape[0] * (r / 10)).astype(int)
failed = np.sum(y_imp[:, 0:to_release], axis=1)
f_rate_cf[i, r - 1] = np.mean(failed / test_labeled.shape[0])
f_rate_true[i, r - 1], N = trueEvaluationEvaluator(test_unlabeled,
'X', 'decision_T', 'result_Y', r / 10)
f_rate_label[i, r - 1], N = labeledOutcomesEvaluator(test_labeled,
'X', 'decision_T', 'result_Y', r / 10)
f_rate_cont[i, r - 1], N = contraction(test_labeled, 'judgeID_J',
'decision_T', 'result_Y', 'B_prob_0_model',
'acceptanceRate_R', r / 10)
failure_rates[:, 0] = np.mean(f_rate_true, axis=0)
failure_rates[:, 1] = np.mean(f_rate_label, axis=0)
failure_rates[:, 2] = np.mean(f_rate_cont, axis=0)
failure_rates[:, 3] = np.mean(f_rate_cf, axis=0)
failure_stds[:, 0] = np.std(f_rate_true, axis=0)
failure_stds[:, 1] = np.std(f_rate_label, axis=0)
failure_stds[:, 2] = np.std(f_rate_cont, axis=0)
failure_stds[:, 3] = np.std(f_rate_cf, axis=0)
labels = ['True Evaluation', 'Labeled outcomes', 'Contraction',
'Counterfactuals']
colours = ['g', 'magenta', 'b', 'r']
line_styles = ['--', ':', '-.', '-']
for i in range(failure_rates.shape[1]):
plt.errorbar(x_ax,
failure_rates[:, i],
label=labels[i],
c=colours[i],
linestyle=line_styles[i],
yerr=failure_stds[:, i])
#plt.title('Failure rate vs. Acceptance rate')
plt.xlabel('Acceptance rate')
plt.ylabel('Failure rate')
plt.legend()
plt.grid()
plt.savefig(save_name + '_all')
plt.show()
print("\nFailure rates:")
print(np.array2string(failure_rates, formatter={'float_kind':lambda x: "%.5f" % x}))
print("\nStandard deviations:")
print(np.array2string(failure_stds, formatter={'float_kind':lambda x: "%.5f" % x}))
print("\nMean absolute errors:")
for i in range(1, failure_rates.shape[1]):
print(
labels[i].ljust(len(max(labels, key=len))),
np.round(
np.mean(np.abs(failure_rates[:, 0] - failure_rates[:, i])), 5))
save_name=save_name,
f_rates=[f_rate_true, f_rate_label, f_rate_cont, f_rate_cf],
titles=labels)
sm = pystan.StanModel(file=stan_code_file_name)
if which == 1:
print("\nWith unobservables (Bernoullian outcome + independent decisions)")
print("Decision-maker in the data and model: random and random")
dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim)
decider = lambda x: randomDecider(x, nJudges_M=M_sim, use_acceptance_rates=True)
perfComp(
dg, lambda x: decider(x),
"Fluctuation of failure rate estimates across iterations\n" +
"Bernoulli + independent decisions, without unobservables",
figure_path + "sl_bernoulli_independent_without_Z",
model_type="fully_random",
fit_with_Z=False
print("\nWith unobservables (Bernoullian outcome + independent decisions)")
print("Decision-maker in the data and model: random and y ~ x")
dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim)
decider = lambda x: randomDecider(x, nJudges_M=M_sim, use_acceptance_rates=True)