Skip to content
Snippets Groups Projects
Commit 3e84e4a6 authored by Riku-Laine's avatar Riku-Laine
Browse files

Outline writing, corrected some models. Defined stard error better.

parent 8fa4e1a9
No related branches found
No related tags found
No related merge requests found
data {
int<lower=1> D;
int<lower=0> N_obs;
int<lower=0> N_cens;
int<lower=0> M;
real<lower=0> sigma_tau;
int<lower=1, upper=M> jj_obs[N_obs]; // judge_ID
int<lower=1, upper=M> jj_cens[N_cens]; // judge_ID
int<lower=0, upper=1> dec_obs[N_obs];
int<lower=0, upper=1> dec_cens[N_cens];
row_vector[D] X_obs[N_obs];
row_vector[D] X_cens[N_cens];
int<lower=0, upper=1> y_obs[N_obs];
}
parameters {
vector[N_obs] Z_obs;
vector[N_cens] Z_cens;
real alpha_T[M];
vector[D] beta_XT_raw;
vector[D] beta_XY_raw;
real<lower=0> beta_ZT_raw; // jungimainen oletus latentin pos. vaik.
real<lower=0> beta_ZY_raw; // jungimainen oletus latentin pos. vaik.
real<lower=0> tau_XT;
real<lower=0> tau_XY;
real<lower=0> tau_ZT;
real<lower=0> tau_ZY;
}
transformed parameters {
vector[D] beta_XT;
vector[D] beta_XY;
real<lower=0> beta_ZT; // jungimainen oletus latentin pos. vaik.
real<lower=0> beta_ZY; // jungimainen oletus latentin pos. vaik.
beta_XT = tau_XT * beta_XT_raw;
beta_XY = tau_XY * beta_XY_raw;
beta_ZT = tau_ZT * beta_ZT_raw;
beta_ZY = tau_ZY * beta_ZY_raw;
}
model {
Z_obs ~ normal(0, 1);
Z_cens ~ normal(0, 1);
tau_XT ~ normal(0, sigma_tau);
tau_XY ~ normal(0, sigma_tau);
tau_ZT ~ normal(0, sigma_tau);
tau_ZY ~ normal(0, sigma_tau);
beta_XT_raw ~ normal(0, 1);
beta_XY_raw ~ normal(0, 1);
beta_ZT_raw ~ normal(0, 1);
beta_ZY_raw ~ normal(0, 1);
for(i in 1:N_obs){
dec_obs[i] ~ bernoulli_logit(alpha_T[jj_obs[i]] + X_obs[i] * beta_XT + beta_ZT * Z_obs[i]);
y_obs[i] ~ bernoulli_logit(X_obs[i] * beta_XY + beta_ZY * Z_obs[i]);
}
for(i in 1:N_cens)
dec_cens[i] ~ bernoulli_logit(alpha_T[jj_cens[i]] + X_cens[i] * beta_XT + beta_ZT * Z_cens[i]);
}
generated quantities {
int<lower=0, upper=1> y_est[N_cens];
for(i in 1:N_cens){
y_est[i] = bernoulli_logit_rng(X_cens[i] * beta_XY + beta_ZY * Z_cens[i]);
}
}
......@@ -21,8 +21,8 @@ parameters {
vector[N_cens] Z_cens;
real alpha_T[M]; // Judge-specific intercepts
vector[D] beta_XT[K];
vector[D] beta_XY[K];
vector[D] beta_XT_raw[K];
vector[D] beta_XY_raw[K];
vector<lower=0>[K] beta_ZT_raw; // Coefficient for the latent variable.
vector<lower=0>[K] beta_ZY_raw; // Coefficient for the latent variable.
......@@ -34,18 +34,36 @@ parameters {
}
transformed parameters{
vector[D] beta_XT[K];
vector[D] beta_XY[K];
vector<lower=0>[K] beta_ZT_cumulative;
vector<lower=0>[K] beta_ZY_cumulative;
vector<lower=0>[K] beta_ZT;
vector<lower=0>[K] beta_ZY;
beta_ZT[1] = beta_ZT_raw[1];
beta_ZY[1] = beta_ZY_raw[1];
if(K >= 2){
beta_XT[1] = beta_XT_raw[1];
beta_XY[1] = beta_XY_raw[1];
for(i in 2:K){ // random walk prior here
beta_XT[i] = tau_XT * beta_XT_raw[i-1]; // ith group
beta_XY[i] = tau_XY * beta_XY_raw[i-1];
}
beta_ZT[1] = beta_ZT_raw[1];
beta_ZY[1] = beta_ZY_raw[1];
beta_ZT[2:] = tau_ZT * beta_ZT_raw[2:];
beta_ZY[2:] = tau_ZY * beta_ZY_raw[2:];
} else {
// if only one group, constrain variances with the tau prior
beta_XT[1] = tau_XT * beta_XT_raw[1];
beta_XY[1] = tau_XY * beta_XY_raw[1];
beta_ZT = tau_ZT * beta_ZT_raw;
beta_ZY = tau_ZY * beta_ZY_raw;
}
beta_ZT_cumulative = cumulative_sum(beta_ZT);
......@@ -53,25 +71,20 @@ transformed parameters{
}
model {
Z_obs ~ std_normal();
Z_cens ~ std_normal();
Z_obs ~ normal(0, 1);
Z_cens ~ normal(0, 1);
tau_XY ~ normal(0, sigma_tau);
tau_XT ~ normal(0, sigma_tau);
tau_ZY ~ normal(0, sigma_tau);
tau_XY ~ normal(0, sigma_tau);
tau_ZT ~ normal(0, sigma_tau);
tau_ZY ~ normal(0, sigma_tau);
beta_XT[1] ~ std_normal(); // first group
beta_XY[1] ~ std_normal();
beta_ZT_raw ~ std_normal();
beta_ZY_raw ~ std_normal();
if(K >= 2){
for(i in 2:K){ // random walk prior here
beta_XT[i] ~ normal(beta_XT[i-1], tau_XT); // ith group
beta_XY[i] ~ normal(beta_XY[i-1], tau_XY);
}
for(i in 1:K){
beta_XT_raw[i] ~ normal(0, 1);
beta_XY_raw[i] ~ normal(0, 1);
}
beta_ZT_raw ~ normal(0, 1);
beta_ZY_raw ~ normal(0, 1);
for(i in 1:N_obs){
dec_obs[i] ~ bernoulli_logit(alpha_T[jj_obs[i]] + X_obs[i] * beta_XT[kk_obs[i]] + beta_ZT_cumulative[kk_obs[i]] * Z_obs[i]);
......@@ -83,9 +96,9 @@ model {
}
generated quantities {
real<lower=0, upper=1> y_est[N_cens];
int<lower=0, upper=1> y_est[N_cens];
for(i in 1:N_cens){
y_est[i] = inv_logit(X_cens[i] * beta_XY[kk_cens[i]] + beta_ZY_cumulative[kk_cens[i]] * Z_cens[i]);
y_est[i] = bernoulli_logit_rng(X_cens[i] * beta_XY[kk_cens[i]] + beta_ZY_cumulative[kk_cens[i]] * Z_cens[i]);
}
}
......@@ -459,7 +459,7 @@ dat = dict(D=1,
sm = pystan.StanModel(file=stan_code_file_name)
fit = sm.sampling(data=dat, chains=5, iter=4000, control = dict(adapt_delta=0.9))
fit = sm.sampling(data=dat, chains=5, iter=6000, control = dict(adapt_delta=0.9))
pars = fit.extract()
......
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
# Author: Riku Laine
# Date: 26JUL2019
# Project name: Potential outcomes in model evaluation
# Description: This script creates the figures and results used
# in empirical data experiments.
#
# Parameters:
# -----------
# (1) figure_path : file name for saving the created figures.
# (2) group_amount : How many groups if Jung-inspired model is used.
# (3) stan_code_file_name : Name of file containing the stan model code.
# (4) sigma_tau : Values of prior variance for the Jung-inspired model.
# (5) data_path : File of compas data.
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.special as ssp
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import pystan
plt.switch_backend('agg')
import sys
# figure storage name
figure_path = sys.argv[1]
# How many groups if jung model is used
group_amount = int(sys.argv[2])
# Name of stan model code file
stan_code_file_name = sys.argv[3]
# Variance prior
sigma_tau = float(sys.argv[4])
# figure storage name
data_path = sys.argv[5]
# Prefix for the figures and log files
print("These results have been obtained with the following settings:")
print("Number of groups:", group_amount)
print("Prior for the variances:", sigma_tau)
save_name = "sl_compas"
def inv_logit(x):
return 1.0 / (1.0 + np.exp(-1.0 * x))
def logit(x):
return np.log(x) - np.log(1.0 - x)
def inverse_cumulative(x, mu, sigma):
'''Compute the inverse of the cumulative distribution of logit-normal
distribution at x with parameters mu and sigma (mean and st.dev.).'''
return inv_logit(ssp.erfinv(2 * x - 1) * np.sqrt(2 * sigma**2) - mu)
def standardError(p, n):
denominator = p * (1 - p)
return np.sqrt(denominator / n)
##########################
# ## Evaluator modules
# ### Convenience functions
def fitPredictiveModel(x_train, y_train, x_test, class_value, model_type=None):
'''
Fit a predictive model (default logistic regression) with given training
instances and return probabilities for test instances to obtain a given
class label.
Arguments:
----------
x_train -- x values of training instances
y_train -- y values of training instances
x_test -- x values of test instances
class_value -- class label for which the probabilities are counted for.
model_type -- type of model to be fitted.
Returns:
--------
(1) Trained predictive model
(2) Probabilities for given test inputs for given class.
'''
if model_type is None or model_type in ["logistic_regression", "lr"]:
# Instantiate the model (using the default parameters)
logreg = LogisticRegression(solver='lbfgs')
# Check shape and fit the model.
if x_train.ndim == 1:
logreg = logreg.fit(x_train.values.reshape(-1, 1), y_train)
else:
logreg = logreg.fit(x_train, y_train)
label_probs_logreg = getProbabilityForClass(x_test, logreg,
class_value)
return logreg, label_probs_logreg
elif model_type in ["random_forest", "rf"]:
# Instantiate the model
forest = RandomForestClassifier(n_estimators=100, max_depth=3)
# Check shape and fit the model.
if x_train.ndim == 1:
forest = forest.fit(x_train.values.reshape(-1, 1), y_train)
else:
forest = forest.fit(x_train, y_train)
label_probs_forest = getProbabilityForClass(x_test, forest,
class_value)
return forest, label_probs_forest
elif model_type == "fully_random":
label_probs = np.ones_like(x_test) / 2
model_object = lambda x: 0.5
return model_object, label_probs
else:
raise ValueError("Invalid model_type!", model_type)
def getProbabilityForClass(x, model, class_value):
'''
Function (wrapper) for obtaining the probability of a class given x and a
predictive model.
Arguments:
-----------
x -- individual features, an array of shape (observations, features)
model -- a trained sklearn model. Predicts probabilities for given x.
Should accept input of shape (observations, features)
class_value -- the resulting class to predict (usually 0 or 1).
Returns:
--------
(1) The probabilities of given class label for each x.
'''
if x.ndim == 1:
# if x is vector, transform to column matrix.
f_values = model.predict_proba(np.array(x).reshape(-1, 1))
else:
f_values = model.predict_proba(x)
# Get correct column of predicted class, remove extra dimensions and return.
return f_values[:, model.classes_ == class_value].flatten()
# ### Contraction algorithm
#
# Below is an implementation of Lakkaraju's team's algorithm presented in
# [their paper](https://helka.finna.fi/PrimoRecord/pci.acm3098066). Relevant
# parameters to be passed to the function are presented in the description.
def contraction(df, judgeIDJ_col, decisionT_col, resultY_col, modelProbS_col,
accRateR_col, r):
'''
This is an implementation of the algorithm presented by Lakkaraju
et al. in their paper "The Selective Labels Problem: Evaluating
Algorithmic Predictions in the Presence of Unobservables" (2017).
Arguments:
----------
df -- The (Pandas) data frame containing the data, judge decisions,
judge IDs, results and probability scores.
judgeIDJ_col -- String, the name of the column containing the judges' IDs
in df.
decisionT_col -- String, the name of the column containing the judges' decisions
resultY_col -- String, the name of the column containing the realization
modelProbS_col -- String, the name of the column containing the probability
scores from the black-box model B.
accRateR_col -- String, the name of the column containing the judges'
acceptance rates
r -- Float between 0 and 1, the given acceptance rate.
Returns:
--------
(1) The estimated failure rate at acceptance rate r.
'''
# Get ID of the most lenient judge.
most_lenient_ID_q = df[judgeIDJ_col].loc[df[accRateR_col].idxmax()]
# Subset. "D_q is the set of all observations judged by q."
D_q = df[df[judgeIDJ_col] == most_lenient_ID_q].copy()
# All observations of R_q have observed outcome labels.
# "R_q is the set of observations in D_q with observed outcome labels."
R_q = D_q[D_q[decisionT_col] == 1].copy()
# Sort observations in R_q in descending order of confidence scores S and
# assign to R_sort_q.
# "Observations deemed as high risk by B are at the top of this list"
R_sort_q = R_q.sort_values(by=modelProbS_col, ascending=False)
number_to_remove = int(
round((1.0 - r) * D_q.shape[0] - (D_q.shape[0] - R_q.shape[0])))
# "R_B is the list of observations assigned to t = 1 by B"
R_B = R_sort_q[number_to_remove:R_sort_q.shape[0]]
return np.sum(R_B[resultY_col] == 0) / D_q.shape[0], D_q.shape[0]
# ### Evaluators
def trueEvaluationEvaluator(df, resultY_col, r):
df.sort_values(by='B_prob_0_model', inplace=True, ascending=True)
to_release = int(round(df.shape[0] * r))
failed = df[resultY_col][0:to_release] == 0
return np.sum(failed) / df.shape[0], df.shape[0]
def labeledOutcomesEvaluator(df, decisionT_col, resultY_col, r, adjusted=False):
df_observed = df.loc[df[decisionT_col] == 1, :]
df_observed = df_observed.sort_values(by='B_prob_0_model',
inplace=False,
ascending=True)
to_release = int(round(df_observed.shape[0] * r))
failed = df_observed[resultY_col][0:to_release] == 0
if adjusted:
return np.mean(failed), df.shape[0]
return np.sum(failed) / df.shape[0], df.shape[0]
def humanEvaluationEvaluator(df, judgeIDJ_col, decisionT_col, resultY_col,
accRateR_col, r):
# Get judges with correct leniency as list
is_correct_leniency = df[accRateR_col].round(1) == r
# No judges with correct leniency
if np.sum(is_correct_leniency) == 0:
return np.nan, np.nan
correct_leniency_list = df.loc[is_correct_leniency, judgeIDJ_col]
# Released are the people they judged and released, T = 1
released = df[df[judgeIDJ_col].isin(correct_leniency_list)
& (df[decisionT_col] == 1)]
failed = released[resultY_col] == 0
# Get their failure rate, aka ratio of reoffenders to number of people judged in total
return np.sum(failed) / correct_leniency_list.shape[0], correct_leniency_list.shape[0]
###################
# Read in the data
compas_raw = pd.read_csv(data_path)
# Select columns
compas = compas_raw[[
'age', 'c_charge_degree', 'race', 'age_cat', 'score_text', 'sex',
'priors_count', 'days_b_screening_arrest', 'decile_score', 'is_recid',
'two_year_recid', 'c_jail_in', 'c_jail_out'
]]
# Subset values, see reasons in ProPublica methodology.
compas = compas.query('days_b_screening_arrest <= 30 and \
days_b_screening_arrest >= -30 and \
is_recid != -1 and \
c_charge_degree != "O"')
# Drop row if score_text is na
compas = compas[compas.score_text.notnull()]
# Recode recidivism values to fit earlier notation
# So here result_Y = 1 - is_recid (inverted binary coding).
compas['result_Y'] = np.where(compas['is_recid'] == 1, 0, 1)
# Convert string values to dummies, drop first so full rank
compas_dummy = pd.get_dummies(
compas,
columns=['c_charge_degree', 'race', 'age_cat', 'sex'],
drop_first=True)
compas_dummy.drop(columns=[
'age', 'days_b_screening_arrest', 'c_jail_in', 'c_jail_out',
'two_year_recid', 'score_text', 'is_recid'
],
inplace=True)
# Shuffle rows for random judge assignment
compas_shuffled = compas_dummy.sample(frac=1)
nJudges_M = 9
# Assign judges as evenly as possible
judge_ID = pd.qcut(np.arange(len(compas_shuffled)), nJudges_M, labels=False)
# Assign fixed leniencies from 0.1 to 0.9
judge_leniency = np.arange(1, 10) / 10
judge_leniency = judge_leniency[judge_ID]
compas_shuffled = compas_shuffled.assign(judge_ID=judge_ID,
judge_leniency=judge_leniency)
# Sort by judges then probabilities in decreasing order
# Most dangerous for each judge are at the top.
compas_shuffled.sort_values(by=["judge_ID", "decile_score"],
ascending=False,
inplace=True)
# Iterate over the data. Subject will be given a negative decision
# if they are in the top (1-r)*100% of the individuals the judge will judge.
# I.e. if their within-judge-index is under 1 - acceptance threshold times
# the number of subjects assigned to each judge they will receive a
# negative decision.
compas_shuffled.reset_index(drop=True, inplace=True)
subjects_allocated = compas_shuffled.judge_ID.value_counts()
compas_shuffled['judge_index'] = compas_shuffled.groupby('judge_ID').cumcount()
compas_shuffled['decision_T'] = np.where(
compas_shuffled['judge_index'] < (1 - compas_shuffled['judge_leniency']) *
subjects_allocated[compas_shuffled['judge_ID']].values, 0, 1)
compas_labeled = compas_shuffled.copy()
compas_unlabeled = compas_shuffled.copy()
# Hide unobserved
compas_labeled.loc[compas_labeled.decision_T == 0, 'result_Y'] = np.nan
# Choose feature_columns
feature_cols = ~compas_labeled.columns.isin(
['result_Y', 'decile_score', 'judge_ID', 'judge_leniency', 'judge_index', 'decision_T'])
feature_cols = compas_labeled.columns[feature_cols]
####### Draw figures ###########
failure_rates = np.zeros((8, 6))
error_Ns = np.zeros((8, 6))
# Split data
train, test_labeled = train_test_split(compas_labeled, test_size=0.5)
# Assign same observations to unlabeled data
test_unlabeled = compas_unlabeled.iloc[test_labeled.index.values]
# Train a logistic regression model
B_model, predictions = fitPredictiveModel(
train.loc[train['decision_T'] == 1, feature_cols],
train.loc[train['decision_T'] == 1, 'result_Y'], test_labeled[feature_cols], 0)
test_labeled = test_labeled.assign(B_prob_0_model=predictions)
test_unlabeled = test_unlabeled.assign(B_prob_0_model=predictions)
test_labeled.sort_values(by='B_prob_0_model', inplace=True, ascending=True)
kk_array = pd.qcut(test_labeled['B_prob_0_model'], group_amount, labels=False)
# Find observed values
observed = test_labeled['decision_T'] == 1
# Assign data to the model
dat = dict(D=10,
N_obs=np.sum(observed),
N_cens=np.sum(~observed),
K=group_amount,
sigma_tau=sigma_tau,
M=len(set(compas_labeled['judge_ID'])),
jj_obs=test_labeled.loc[observed, 'judge_ID']+1,
jj_cens=test_labeled.loc[~observed, 'judge_ID']+1,
kk_obs=kk_array[observed]+1,
kk_cens=kk_array[~observed]+1,
dec_obs=test_labeled.loc[observed, 'decision_T'],
dec_cens=test_labeled.loc[~observed, 'decision_T'],
X_obs=test_labeled.loc[observed, feature_cols.values],
X_cens=test_labeled.loc[~observed, feature_cols.values],
y_obs=test_labeled.loc[observed, 'result_Y'].astype(int))
sm = pystan.StanModel(file=stan_code_file_name)
fit = sm.sampling(data=dat, chains=5, iter=6000, control = dict(adapt_delta=0.9))
pars = fit.extract()
print(fit, file=open(save_name + '_stan_fit_diagnostics.txt', 'w'))
plt.figure(figsize=(15,30))
fit.plot();
plt.savefig(save_name + '_stan_diagnostic_plot')
plt.show()
plt.close('all')
# Bayes
# Alusta matriisi, rivillä yksi otos posteriorista
# sarakkeet havaintoja
y_imp = np.ones((pars['y_est'].shape[0], test_labeled.shape[0]))
# Täydennetään havaitsemattomat estimoiduilla
y_imp[:, ~observed] = 1-pars['y_est']
# Täydennetään havaitut havaituilla
y_imp[:, observed] = 1-test_labeled.loc[observed, 'result_Y']
Rs = np.arange(.1, .9, .1)
to_release_list = np.round(test_labeled.shape[0] * Rs).astype(int)
f_rate_bayes = np.full((pars['y_est'].shape[0], 8), np.nan)
for i in range(len(to_release_list)):
failed = np.sum(y_imp[:, 0:to_release_list[i]], axis=1)
est_failure_rates = failed / test_labeled.shape[0]
failure_rates[i, 5] = np.mean(est_failure_rates)
error_Ns[i, 5] = test_labeled.shape[0]
for r in np.arange(1, 9):
print(".", end="")
# True evaluation
FR, N = trueEvaluationEvaluator(test_unlabeled, 'result_Y', r / 10)
failure_rates[r - 1, 0] = FR
error_Ns[r - 1, 0] = N
# Labeled outcomes only
FR, N = labeledOutcomesEvaluator(test_labeled, 'decision_T', 'result_Y', r / 10)
failure_rates[r - 1, 1] = FR
error_Ns[r - 1, 1] = N
# Adjusted labeled outcomes
FR, N = labeledOutcomesEvaluator(test_labeled, 'decision_T', 'result_Y', r / 10,
adjusted=True)
failure_rates[r - 1, 2] = FR
error_Ns[r - 1, 2] = N
# Human evaluation
FR, N = humanEvaluationEvaluator(test_labeled, 'judge_ID', 'decision_T',
'result_Y', 'judge_leniency',
r / 10)
failure_rates[r - 1, 3] = FR
error_Ns[r - 1, 3] = N
# Contraction
FR, N = contraction(test_labeled, 'judge_ID', 'decision_T', 'result_Y',
'B_prob_0_model', 'judge_leniency', r / 10)
failure_rates[r - 1, 4] = FR
error_Ns[r - 1, 4] = N
x_ax = np.arange(0.1, 0.9, 0.1)
failure_SEs = standardError(failure_rates, error_Ns)
labels = [
'True Evaluation', 'Labeled outcomes', 'Labeled outcomes, adj.',
'Human evaluation', 'Contraction', 'Potential outcomes'
]
colours = ['g', 'magenta', 'darkviolet', 'r', 'b', 'c']
for i in range(failure_rates.shape[1]):
plt.errorbar(x_ax,
failure_rates[:, i],
label=labels[i],
c=colours[i],
yerr=failure_SEs[:, i])
plt.title('Failure rate vs. Acceptance rate')
plt.xlabel('Acceptance rate')
plt.ylabel('Failure rate')
plt.legend()
plt.grid()
plt.savefig(save_name + '_all')
plt.show()
print("\nFailure rates:")
print(np.array2string(failure_rates, formatter={'float_kind':lambda x: "%.5f" % x}))
print("\nMean absolute errors:")
for i in range(1, failure_rates.shape[1]):
print(
labels[i].ljust(len(max(labels, key=len))),
np.round(
np.mean(np.abs(failure_rates[:, 0] - failure_rates[:, i])), 5))
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
# Author: Riku Laine
# Date: 26JUL2019
# Project name: Potential outcomes in model evaluation
# Description: This script creates the figures and results used
# in empirical data experiments.
#
# Parameters:
# -----------
# (1) figure_path : file name for saving the created figures.
# (2) group_amount : How many groups if Jung-inspired model is used.
# (3) stan_code_file_name : Name of file containing the stan model code.
# (4) sigma_tau : Values of prior variance for the Jung-inspired model.
# (5) data_path : File of compas data.
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.special as ssp
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import pystan
plt.switch_backend('agg')
import sys
# figure storage name
figure_path = sys.argv[1]
# How many groups if jung model is used
group_amount = int(sys.argv[2])
# Name of stan model code file
stan_code_file_name = sys.argv[3]
# Variance prior
sigma_tau = float(sys.argv[4])
# figure storage name
data_path = sys.argv[5]
# Prefix for the figures and log files
print("These results have been obtained with the following settings:")
print("Number of groups:", group_amount)
print("Prior for the variances:", sigma_tau)
save_name = "sl_compas"
def inv_logit(x):
return 1.0 / (1.0 + np.exp(-1.0 * x))
def logit(x):
return np.log(x) - np.log(1.0 - x)
def inverse_cumulative(x, mu, sigma):
'''Compute the inverse of the cumulative distribution of logit-normal
distribution at x with parameters mu and sigma (mean and st.dev.).'''
return inv_logit(ssp.erfinv(2 * x - 1) * np.sqrt(2 * sigma**2) - mu)
def standardError(p, n):
denominator = p * (1 - p)
return np.sqrt(denominator / n)
##########################
# ## Evaluator modules
# ### Convenience functions
def fitPredictiveModel(x_train, y_train, x_test, class_value, model_type=None):
'''
Fit a predictive model (default logistic regression) with given training
instances and return probabilities for test instances to obtain a given
class label.
Arguments:
----------
x_train -- x values of training instances
y_train -- y values of training instances
x_test -- x values of test instances
class_value -- class label for which the probabilities are counted for.
model_type -- type of model to be fitted.
Returns:
--------
(1) Trained predictive model
(2) Probabilities for given test inputs for given class.
'''
if model_type is None or model_type in ["logistic_regression", "lr"]:
# Instantiate the model (using the default parameters)
logreg = LogisticRegression(solver='lbfgs')
# Check shape and fit the model.
if x_train.ndim == 1:
logreg = logreg.fit(x_train.values.reshape(-1, 1), y_train)
else:
logreg = logreg.fit(x_train, y_train)
label_probs_logreg = getProbabilityForClass(x_test, logreg,
class_value)
return logreg, label_probs_logreg
elif model_type in ["random_forest", "rf"]:
# Instantiate the model
forest = RandomForestClassifier(n_estimators=100, max_depth=3)
# Check shape and fit the model.
if x_train.ndim == 1:
forest = forest.fit(x_train.values.reshape(-1, 1), y_train)
else:
forest = forest.fit(x_train, y_train)
label_probs_forest = getProbabilityForClass(x_test, forest,
class_value)
return forest, label_probs_forest
elif model_type == "fully_random":
label_probs = np.ones_like(x_test) / 2
model_object = lambda x: 0.5
return model_object, label_probs
else:
raise ValueError("Invalid model_type!", model_type)
def getProbabilityForClass(x, model, class_value):
'''
Function (wrapper) for obtaining the probability of a class given x and a
predictive model.
Arguments:
-----------
x -- individual features, an array of shape (observations, features)
model -- a trained sklearn model. Predicts probabilities for given x.
Should accept input of shape (observations, features)
class_value -- the resulting class to predict (usually 0 or 1).
Returns:
--------
(1) The probabilities of given class label for each x.
'''
if x.ndim == 1:
# if x is vector, transform to column matrix.
f_values = model.predict_proba(np.array(x).reshape(-1, 1))
else:
f_values = model.predict_proba(x)
# Get correct column of predicted class, remove extra dimensions and return.
return f_values[:, model.classes_ == class_value].flatten()
# ### Contraction algorithm
#
# Below is an implementation of Lakkaraju's team's algorithm presented in
# [their paper](https://helka.finna.fi/PrimoRecord/pci.acm3098066). Relevant
# parameters to be passed to the function are presented in the description.
def contraction(df, judgeIDJ_col, decisionT_col, resultY_col, modelProbS_col,
accRateR_col, r):
'''
This is an implementation of the algorithm presented by Lakkaraju
et al. in their paper "The Selective Labels Problem: Evaluating
Algorithmic Predictions in the Presence of Unobservables" (2017).
Arguments:
----------
df -- The (Pandas) data frame containing the data, judge decisions,
judge IDs, results and probability scores.
judgeIDJ_col -- String, the name of the column containing the judges' IDs
in df.
decisionT_col -- String, the name of the column containing the judges' decisions
resultY_col -- String, the name of the column containing the realization
modelProbS_col -- String, the name of the column containing the probability
scores from the black-box model B.
accRateR_col -- String, the name of the column containing the judges'
acceptance rates
r -- Float between 0 and 1, the given acceptance rate.
Returns:
--------
(1) The estimated failure rate at acceptance rate r.
'''
# Get ID of the most lenient judge.
most_lenient_ID_q = df[judgeIDJ_col].loc[df[accRateR_col].idxmax()]
# Subset. "D_q is the set of all observations judged by q."
D_q = df[df[judgeIDJ_col] == most_lenient_ID_q].copy()
# All observations of R_q have observed outcome labels.
# "R_q is the set of observations in D_q with observed outcome labels."
R_q = D_q[D_q[decisionT_col] == 1].copy()
# Sort observations in R_q in descending order of confidence scores S and
# assign to R_sort_q.
# "Observations deemed as high risk by B are at the top of this list"
R_sort_q = R_q.sort_values(by=modelProbS_col, ascending=False)
number_to_remove = int(
round((1.0 - r) * D_q.shape[0] - (D_q.shape[0] - R_q.shape[0])))
# "R_B is the list of observations assigned to t = 1 by B"
R_B = R_sort_q[number_to_remove:R_sort_q.shape[0]]
return np.sum(R_B[resultY_col] == 0) / D_q.shape[0], D_q.shape[0]
# ### Evaluators
def trueEvaluationEvaluator(df, resultY_col, r):
df.sort_values(by='B_prob_0_model', inplace=True, ascending=True)
to_release = int(round(df.shape[0] * r))
failed = df[resultY_col][0:to_release] == 0
return np.sum(failed) / df.shape[0], df.shape[0]
def labeledOutcomesEvaluator(df, decisionT_col, resultY_col, r, adjusted=False):
df_observed = df.loc[df[decisionT_col] == 1, :]
df_observed = df_observed.sort_values(by='B_prob_0_model',
inplace=False,
ascending=True)
to_release = int(round(df_observed.shape[0] * r))
failed = df_observed[resultY_col][0:to_release] == 0
if adjusted:
return np.mean(failed), df.shape[0]
return np.sum(failed) / df.shape[0], df.shape[0]
def humanEvaluationEvaluator(df, judgeIDJ_col, decisionT_col, resultY_col,
accRateR_col, r):
# Get judges with correct leniency as list
is_correct_leniency = df[accRateR_col].round(1) == r
# No judges with correct leniency
if np.sum(is_correct_leniency) == 0:
return np.nan, np.nan
correct_leniency_list = df.loc[is_correct_leniency, judgeIDJ_col]
# Released are the people they judged and released, T = 1
released = df[df[judgeIDJ_col].isin(correct_leniency_list)
& (df[decisionT_col] == 1)]
failed = released[resultY_col] == 0
# Get their failure rate, aka ratio of reoffenders to number of people judged in total
return np.sum(failed) / correct_leniency_list.shape[0], correct_leniency_list.shape[0]
###################
# Read in the data
compas_raw = pd.read_csv(data_path)
# Select columns
compas = compas_raw[[
'age', 'c_charge_degree', 'race', 'age_cat', 'score_text', 'sex',
'priors_count', 'days_b_screening_arrest', 'decile_score', 'is_recid',
'two_year_recid', 'c_jail_in', 'c_jail_out'
]]
# Subset values, see reasons in ProPublica methodology.
compas = compas.query('days_b_screening_arrest <= 30 and \
days_b_screening_arrest >= -30 and \
is_recid != -1 and \
c_charge_degree != "O"')
# Drop row if score_text is na
compas = compas[compas.score_text.notnull()]
# Recode recidivism values to fit earlier notation
# So here result_Y = 1 - is_recid (inverted binary coding).
compas['result_Y'] = np.where(compas['is_recid'] == 1, 0, 1)
# Convert string values to dummies, drop first so full rank
compas_dummy = pd.get_dummies(
compas,
columns=['c_charge_degree', 'race', 'age_cat', 'sex'],
drop_first=True)
compas_dummy.drop(columns=[
'age', 'days_b_screening_arrest', 'c_jail_in', 'c_jail_out',
'two_year_recid', 'score_text', 'is_recid'
],
inplace=True)
# Shuffle rows for random judge assignment
compas_shuffled = compas_dummy.sample(frac=1)
nJudges_M = 9
# Assign judges as evenly as possible
judge_ID = pd.qcut(np.arange(len(compas_shuffled)), nJudges_M, labels=False)
# Assign fixed leniencies from 0.1 to 0.9
judge_leniency = np.arange(1, 10) / 10
judge_leniency = judge_leniency[judge_ID]
compas_shuffled = compas_shuffled.assign(judge_ID=judge_ID,
judge_leniency=judge_leniency)
# Sort by judges then probabilities in decreasing order
# Most dangerous for each judge are at the top.
compas_shuffled.sort_values(by=["judge_ID", "decile_score"],
ascending=False,
inplace=True)
# Iterate over the data. Subject will be given a negative decision
# if they are in the top (1-r)*100% of the individuals the judge will judge.
# I.e. if their within-judge-index is under 1 - acceptance threshold times
# the number of subjects assigned to each judge they will receive a
# negative decision.
compas_shuffled.reset_index(drop=True, inplace=True)
subjects_allocated = compas_shuffled.judge_ID.value_counts()
compas_shuffled['judge_index'] = compas_shuffled.groupby('judge_ID').cumcount()
compas_shuffled['decision_T'] = np.where(
compas_shuffled['judge_index'] < (1 - compas_shuffled['judge_leniency']) *
subjects_allocated[compas_shuffled['judge_ID']].values, 0, 1)
compas_labeled = compas_shuffled.copy()
compas_unlabeled = compas_shuffled.copy()
# Hide unobserved
compas_labeled.loc[compas_labeled.decision_T == 0, 'result_Y'] = np.nan
# Choose feature_columns
feature_cols = ~compas_labeled.columns.isin(
['result_Y', 'decile_score', 'judge_ID', 'judge_leniency', 'judge_index', 'decision_T'])
feature_cols = compas_labeled.columns[feature_cols]
####### Draw figures ###########
failure_rates = np.zeros((8, 6))
error_Ns = np.zeros((8, 6))
# Split data
train, test_labeled = train_test_split(compas_labeled, test_size=0.5)
# Assign same observations to unlabeled data
test_unlabeled = compas_unlabeled.iloc[test_labeled.index.values]
# Train a logistic regression model
B_model, predictions = fitPredictiveModel(
train.loc[train['decision_T'] == 1, feature_cols],
train.loc[train['decision_T'] == 1, 'result_Y'], test_labeled[feature_cols], 0)
test_labeled = test_labeled.assign(B_prob_0_model=predictions)
test_unlabeled = test_unlabeled.assign(B_prob_0_model=predictions)
test_labeled.sort_values(by='B_prob_0_model', inplace=True, ascending=True)
kk_array = pd.qcut(test_labeled['B_prob_0_model'], group_amount, labels=False)
# Find observed values
observed = test_labeled['decision_T'] == 1
# Assign data to the model
dat = dict(D=1,
N_obs=np.sum(observed),
N_cens=np.sum(~observed),
K=group_amount,
sigma_tau=sigma_tau,
M=len(set(compas_labeled['judge_ID'])),
jj_obs=test_labeled.loc[observed, 'judge_ID']+1,
jj_cens=test_labeled.loc[~observed, 'judge_ID']+1,
kk_obs=kk_array[observed]+1,
kk_cens=kk_array[~observed]+1,
dec_obs=test_labeled.loc[observed, 'decision_T'],
dec_cens=test_labeled.loc[~observed, 'decision_T'],
X_obs=test_labeled.loc[observed, 'B_prob_0_model'].values.reshape(-1, 1),
X_cens=test_labeled.loc[~observed, 'B_prob_0_model'].values.reshape(-1, 1),
y_obs=test_labeled.loc[observed, 'result_Y'].astype(int))
sm = pystan.StanModel(file=stan_code_file_name)
fit = sm.sampling(data=dat, chains=5, iter=6000, control = dict(adapt_delta=0.9))
pars = fit.extract()
print(fit, file=open(save_name + '_stan_fit_diagnostics.txt', 'w'))
plt.figure(figsize=(15,30))
fit.plot();
plt.savefig(save_name + '_stan_diagnostic_plot')
plt.show()
plt.close('all')
# Bayes
# Alusta matriisi, rivillä yksi otos posteriorista
# sarakkeet havaintoja
y_imp = np.ones((pars['y_est'].shape[0], test_labeled.shape[0]))
# Täydennetään havaitsemattomat estimoiduilla
y_imp[:, ~observed] = 1-pars['y_est']
# Täydennetään havaitut havaituilla
y_imp[:, observed] = 1-test_labeled.loc[observed, 'result_Y']
Rs = np.arange(.1, .9, .1)
to_release_list = np.round(test_labeled.shape[0] * Rs).astype(int)
f_rate_bayes = np.full((pars['y_est'].shape[0], 8), np.nan)
for i in range(len(to_release_list)):
failed = np.sum(y_imp[:, 0:to_release_list[i]], axis=1)
est_failure_rates = failed / test_labeled.shape[0]
failure_rates[i, 5] = np.mean(est_failure_rates)
error_Ns[i, 5] = test_labeled.shape[0]
for r in np.arange(1, 9):
print(".", end="")
# True evaluation
FR, N = trueEvaluationEvaluator(test_unlabeled, 'result_Y', r / 10)
failure_rates[r - 1, 0] = FR
error_Ns[r - 1, 0] = N
# Labeled outcomes only
FR, N = labeledOutcomesEvaluator(test_labeled, 'decision_T', 'result_Y', r / 10)
failure_rates[r - 1, 1] = FR
error_Ns[r - 1, 1] = N
# Adjusted labeled outcomes
FR, N = labeledOutcomesEvaluator(test_labeled, 'decision_T', 'result_Y', r / 10,
adjusted=True)
failure_rates[r - 1, 2] = FR
error_Ns[r - 1, 2] = N
# Human evaluation
FR, N = humanEvaluationEvaluator(test_labeled, 'judge_ID', 'decision_T',
'result_Y', 'judge_leniency',
r / 10)
failure_rates[r - 1, 3] = FR
error_Ns[r - 1, 3] = N
# Contraction
FR, N = contraction(test_labeled, 'judge_ID', 'decision_T', 'result_Y',
'B_prob_0_model', 'judge_leniency', r / 10)
failure_rates[r - 1, 4] = FR
error_Ns[r - 1, 4] = N
x_ax = np.arange(0.1, 0.9, 0.1)
failure_SEs = standardError(failure_rates, error_Ns)
labels = [
'True Evaluation', 'Labeled outcomes', 'Labeled outcomes, adj.',
'Human evaluation', 'Contraction', 'Potential outcomes'
]
colours = ['g', 'magenta', 'darkviolet', 'r', 'b', 'c']
for i in range(failure_rates.shape[1]):
plt.errorbar(x_ax,
failure_rates[:, i],
label=labels[i],
c=colours[i],
yerr=failure_SEs[:, i])
plt.title('Failure rate vs. Acceptance rate')
plt.xlabel('Acceptance rate')
plt.ylabel('Failure rate')
plt.legend()
plt.grid()
plt.savefig(save_name + '_all')
plt.show()
print("\nFailure rates:")
print(np.array2string(failure_rates, formatter={'float_kind':lambda x: "%.5f" % x}))
print("\nMean absolute errors:")
for i in range(1, failure_rates.shape[1]):
print(
labels[i].ljust(len(max(labels, key=len))),
np.round(
np.mean(np.abs(failure_rates[:, 0] - failure_rates[:, i])), 5))
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment