# -*- coding: utf-8 -*- """ @author: Riku_L Script for creating all the result figures used in the paper. Change 'path' variable below to the folder in your computer containg the experiment results. """ import numpy as np import matplotlib.pyplot as plt from matplotlib.transforms import Affine2D from matplotlib.lines import Line2D plt.rcParams.update({'font.size': 16}) plt.rcParams.update({'figure.figsize': (10, 6)}) path = "C:/Users/Riku_L/Downloads/results/" ### Draw summary figures. for z_coef in ["1", "5"]: plt.rcParams.update({'font.size': 16}) plt.rcParams.update({'figure.figsize': (10, 6)}) fig, ax = plt.subplots() trans1 = Affine2D().translate(-0.1, 0.0) + ax.transData trans2 = Affine2D().translate(+0.1, 0.0) + ax.transData legend_elements = [Line2D([0], [0], color='r', label='CFBI'), Line2D([0], [0], color='b', label='Contraction', ls='--')] ticks = [] i = 0 for deciderM in ["random", "batch"]: for deciderH in ["random", "batch", "independent"]: true = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_9coefZ" + z_coef + "_0_true_FRs.npy") contraction = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_9coefZ" + z_coef + "_0_contraction_FRs.npy") counterfactuals = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_9coefZ" + z_coef + "_0_counterfactuals_FRs.npy") # Counterfactuals y1 = (true - counterfactuals) ymean1 = np.nanmean(y1) yerr1 = np.nanstd(y1, ddof=1) # Contraction y2 = (true - contraction) ymean2 = np.nanmean(y2) yerr2 = np.nanstd(y2, ddof=1) # Plot errorbars er1 = ax.errorbar(i, ymean1, yerr=yerr1, fmt="o", transform=trans1, c='r') er2 = ax.errorbar(i, ymean2, yerr=yerr2, fmt="o", transform=trans2, c='b') # Set errorbar linestyle er2[-1][0].set_linestyle('--') ticks = np.append(ticks, "H: " + deciderH + "\nM: " + deciderM) print(np.round(ymean1, 5), np.round(ymean2, 5)) i = i + 1 # Customize xticks plt.xticks(np.arange(0, i), ticks, rotation=30) plt.ylabel("Error w.r.t. True evaluation") plt.xlabel("Decision makers") plt.grid(axis='y') plt.ylim((-0.08, 0.08)) plt.axhline(0, c="k", linestyle=":", lw=1) ax.legend(handles=legend_elements, loc = "lower right", title="$\\bf{Evaluators}$") # Save manually. plt.show() plt.close("all") ### Draw the single result figures for all different configurations. plt.close("all") for z_coef in ["1", "5"]: for r in ["5", "9"]: for deciderM in ["random", "batch"]: for deciderH in ["random", "batch", "independent", "probabilistic"]: if z_coef == "5" and r == "5": continue true = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_true_FRs.npy") labeled = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_labeled_FRs.npy") contraction = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_contraction_FRs.npy") counterfactuals = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_counterfactuals_FRs.npy") failure_rates = np.zeros((8, 4)) failure_stds = np.zeros((8, 4)) failure_rates[:, 0] = np.nanmean(true, axis=0) failure_rates[:, 1] = np.nanmean(labeled, axis=0) failure_rates[:, 2] = np.nanmean(contraction, axis=0) failure_rates[:, 3] = np.nanmean(counterfactuals, axis=0) # Compute sample std failure_stds[:, 0] = np.nanstd(true, axis=0, ddof=1) failure_stds[:, 1] = np.nanstd(labeled, axis=0, ddof=1) failure_stds[:, 2] = np.nanstd(contraction, axis=0, ddof=1) failure_stds[:, 3] = np.nanstd(counterfactuals, axis=0, ddof=1) x_ax = np.arange(0.1, 0.9, 0.1) labels = ['True Evaluation', 'Labeled outcomes', 'Contraction', 'CFBI'] colours = ['g', 'magenta', 'b', 'r'] line_styles = ['--', ':', '-.', '-'] # General plot: Failure rate vs. Acceptance rate for i in range(failure_rates.shape[1]): plt.errorbar(x_ax, failure_rates[:, i], label=labels[i], c=colours[i], linestyle=line_styles[i], yerr=failure_stds[:, i]) plt.xlabel('Acceptance rate') plt.ylabel('Failure rate') plt.legend(title="$\\bf{Evaluators}$") plt.grid() plt.savefig(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_all") plt.show() ###### def baseFigure(true, labeled, contraction, counterfactuals, J): failure_rates = np.zeros((8, 4)) failure_stds = np.zeros((8, 4)) failure_rates[:, 0] = np.nanmean(true, axis=0) failure_rates[:, 1] = np.nanmean(labeled, axis=0) failure_rates[:, 2] = np.nanmean(contraction, axis=0) failure_rates[:, 3] = np.nanmean(counterfactuals, axis=0) failure_stds[:, 0] = np.nanstd(true, axis=0, ddof=1) failure_stds[:, 1] = np.nanstd(labeled, axis=0, ddof=1) failure_stds[:, 2] = np.nanstd(contraction, axis=0, ddof=1) failure_stds[:, 3] = np.nanstd(counterfactuals, axis=0, ddof=1) x_ax = np.arange(0.1, 0.9, 0.1) labels = ['True Evaluation', 'Labeled outcomes', 'Contraction', 'CFBI'] colours = ['g', 'magenta', 'b', 'r'] line_styles = ['--', ':', '-.', '-'] for i in range(failure_rates.shape[1]): plt.errorbar(x_ax, failure_rates[:, i], label=labels[i], c=colours[i], linestyle=line_styles[i], yerr=failure_stds[:, i]) plt.xlabel('Acceptance rate') plt.ylabel('Failure rate') plt.legend(title="$\\bf{Evaluators}$") plt.grid() plt.savefig(path + "sl_compas_nJudges" + J + "_all") plt.show() plt.close('all') ### Draw COMPAS result figures (FR VS AR). # J = 12 true = np.load(path + "sl_sl_compas__true_FRs.npy") labeled = np.load(path + "sl_sl_compas__labeled_FRs.npy") contraction = np.load(path + "sl_sl_compas__contraction_FRs.npy") counterfactuals = np.load(path + "sl_sl_compas__counterfactuals_FRs.npy") baseFigure(true, labeled, contraction, counterfactuals, "12") # J = 24 true = np.load(path + "sl_sl_compas_nJudges24_true_FRs.npy") labeled = np.load(path + "sl_sl_compas_nJudges24_labeled_FRs.npy") contraction = np.load(path + "sl_sl_compas_nJudges24_contraction_FRs.npy") counterfactuals = np.load(path + "sl_sl_compas_nJudges24_counterfactuals_FRs.npy") baseFigure(true, labeled, contraction, counterfactuals, "24") # J = 48 true = np.load(path + "sl_sl_compas_nJudges48_true_FRs.npy") labeled = np.load(path + "sl_sl_compas_nJudges48_labeled_FRs.npy") contraction = np.load(path + "sl_sl_compas_nJudges48_contraction_FRs.npy") counterfactuals = np.load(path + "sl_sl_compas_nJudges48_counterfactuals_FRs.npy") baseFigure(true, labeled, contraction, counterfactuals, "48") ### Draw COMPAS result figure (FR error vs number of judges). plt.close("all") fig, ax = plt.subplots() trans1 = Affine2D().translate(-0.1, 0.0) + ax.transData trans2 = Affine2D().translate(+0.1, 0.0) + ax.transData legend_elements = [Line2D([0], [0], color='r', label='CFBI'), Line2D([0], [0], color='b', label='Contraction', ls='--')] i=0 ticks = [] # J = 12 true = np.load(path + "sl_sl_compas__true_FRs.npy") contraction = np.load(path + "sl_sl_compas__contraction_FRs.npy") counterfactuals = np.load(path + "sl_sl_compas__counterfactuals_FRs.npy") # Counterfactuals y1 = (true - counterfactuals) ymean1 = np.nanmean(y1) yerr1 = np.nanstd(y1, ddof=1) # Contraction y2 = (true - contraction) ymean2 = np.nanmean(y2) yerr2 = np.nanstd(y2, ddof=1) # Plot errorbars er1 = ax.errorbar(i, ymean1, yerr=yerr1, fmt="o", transform=trans1, c='r') er2 = ax.errorbar(i, ymean2, yerr=yerr2, fmt="o", transform=trans2, c='b') # Set errorbar linestyle er2[-1][0].set_linestyle('--') ticks = np.append(ticks, "12") i = i + 1 # J = 24 true = np.load(path + "sl_sl_compas_nJudges24_true_FRs.npy") contraction = np.load(path + "sl_sl_compas_nJudges24_contraction_FRs.npy") counterfactuals = np.load(path + "sl_sl_compas_nJudges24_counterfactuals_FRs.npy") # Counterfactuals y1 = (true - counterfactuals) ymean1 = np.nanmean(y1) yerr1 = np.nanstd(y1, ddof=1) # Contraction y2 = (true - contraction) ymean2 = np.nanmean(y2) yerr2 = np.nanstd(y2, ddof=1) # Plot errorbars er1 = ax.errorbar(i, ymean1, yerr=yerr1, fmt="o", transform=trans1, c='r') er2 = ax.errorbar(i, ymean2, yerr=yerr2, fmt="o", transform=trans2, c='b') # Set errorbar linestyle er2[-1][0].set_linestyle('--') ticks = np.append(ticks, "24") i = i + 1 # J = 48 true = np.load(path + "sl_sl_compas_nJudges48_true_FRs.npy") contraction = np.load(path + "sl_sl_compas_nJudges48_contraction_FRs.npy") counterfactuals = np.load(path + "sl_sl_compas_nJudges48_counterfactuals_FRs.npy") # Counterfactuals y1 = (true - counterfactuals) ymean1 = np.nanmean(y1) yerr1 = np.nanstd(y1, ddof=1) # Contraction y2 = (true - contraction) ymean2 = np.nanmean(y2) yerr2 = np.nanstd(y2, ddof=1) # Plot errorbars er1 = ax.errorbar(i, ymean1, yerr=yerr1, fmt="o", transform=trans1, c='r') er2 = ax.errorbar(i, ymean2, yerr=yerr2, fmt="o", transform=trans2, c='b') # Set errorbar linestyle er2[-1][0].set_linestyle('--') ticks = np.append(ticks, "48") i = i + 1 # Customize xticks plt.xticks(np.arange(0, i), ticks) plt.ylabel("Error w.r.t. True evaluation") plt.xlabel("Number of judges") plt.grid(axis='y') plt.axhline(0, c="k", linestyle=":", lw=1) ax.legend(handles=legend_elements, loc='upper left', title="$\\bf{Evaluators}$") plt.savefig(path + "sl_errors_compas") plt.show() ### Draw fixed figure deciderH = "independent" deciderM = "batch" r = "5" z_coef = "1" true = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_true_FRs.npy") labeled = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_labeled_FRs.npy") contraction = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_contraction_FRs.npy") counterfactuals = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_counterfactuals_FRs.npy") failure_rates = np.zeros((8, 4)) failure_stds = np.zeros((8, 4)) failure_rates[:, 0] = np.nanmean(true, axis=0) failure_rates[:, 1] = np.nanmean(labeled, axis=0) failure_rates[:, 2] = np.nanmean(contraction, axis=0) failure_rates[:, 3] = np.nanmean(counterfactuals, axis=0) # Compute sample std failure_stds[:, 0] = np.nanstd(true, axis=0, ddof=1) failure_stds[:, 1] = np.nanstd(labeled, axis=0, ddof=1) failure_stds[:, 2] = np.nanstd(contraction, axis=0, ddof=1) failure_stds[:, 3] = np.nanstd(counterfactuals, axis=0, ddof=1) x_ax = np.arange(0.1, 0.9, 0.1) labels = ['True Evaluation', 'Labeled outcomes', 'Contraction', 'CFBI'] colours = ['g', 'magenta', 'b', 'r'] line_styles = ['--', ':', '-.', '-'] # Fix: Remove contractions 0.6 leniency estimate. failure_rates[5, 2] = np.nan failure_stds[5, 2] = np.nan # General plot: Failure rate vs. Acceptance rate for i in range(failure_rates.shape[1]): plt.errorbar(x_ax, failure_rates[:, i], label=labels[i], c=colours[i], linestyle=line_styles[i], yerr=failure_stds[:, i]) plt.xlabel('Acceptance rate') plt.ylabel('Failure rate') plt.legend(title="$\\bf{Evaluators}$") plt.grid() plt.savefig(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_all_fixed") plt.show()