draw_figures.py

# -*- coding: utf-8 -*-
"""
@author: Riku_L

Script for creating all the result figures used in the paper.

Change 'path' variable below to the folder in your computer containg the
experiment results.
"""

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.transforms import Affine2D
from matplotlib.lines import Line2D

plt.rcParams.update({'font.size': 16})
plt.rcParams.update({'figure.figsize': (10, 6)})

path = "C:/Users/Riku_L/Downloads/results/"

### Draw summary figures.

for z_coef in ["1", "5"]:

    plt.rcParams.update({'font.size': 16})
    plt.rcParams.update({'figure.figsize': (10, 6)})

    fig, ax = plt.subplots()

    trans1 = Affine2D().translate(-0.1, 0.0) + ax.transData
    trans2 = Affine2D().translate(+0.1, 0.0) + ax.transData

    legend_elements = [Line2D([0], [0], color='r', label='CFBI'),
                       Line2D([0], [0], color='b', label='Contraction', ls='--')]

    ticks = []
    i = 0

    for deciderM in ["random", "batch"]:
        for deciderH in ["random", "batch", "independent"]:

            true = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_9coefZ" + z_coef + "_0_true_FRs.npy")
            contraction = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_9coefZ" + z_coef + "_0_contraction_FRs.npy")
            counterfactuals = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_9coefZ" + z_coef + "_0_counterfactuals_FRs.npy")

            # Counterfactuals
            y1 = (true - counterfactuals)
            ymean1 = np.nanmean(y1)
            yerr1 = np.nanstd(y1, ddof=1)

            # Contraction
            y2 = (true - contraction)
            ymean2 = np.nanmean(y2)
            yerr2 = np.nanstd(y2, ddof=1)

            # Plot errorbars
            er1 = ax.errorbar(i, ymean1, yerr=yerr1, fmt="o", transform=trans1, c='r')
            er2 = ax.errorbar(i, ymean2, yerr=yerr2, fmt="o", transform=trans2, c='b')

            # Set errorbar linestyle
            er2[-1][0].set_linestyle('--')

            ticks = np.append(ticks, "H: " + deciderH + "\nM: " + deciderM)

            print(np.round(ymean1, 5), np.round(ymean2, 5))

            i = i + 1

    # Customize xticks
    plt.xticks(np.arange(0, i), ticks, rotation=30)

    plt.ylabel("Error w.r.t. True evaluation")
    plt.xlabel("Decision makers")
    plt.grid(axis='y')
    plt.ylim((-0.08, 0.08))
    plt.axhline(0, c="k", linestyle=":", lw=1)
    ax.legend(handles=legend_elements, loc = "lower right", title="$\\bf{Evaluators}$")

    # Save manually.

    plt.show()
    plt.close("all")


### Draw the single result figures for all different configurations.

plt.close("all")

for z_coef in ["1", "5"]:
    for r in ["5", "9"]:
        for deciderM in ["random", "batch"]:
            for deciderH in ["random", "batch", "independent", "probabilistic"]:

                if z_coef == "5" and r == "5":
                    continue

                true = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_true_FRs.npy")
                labeled = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_labeled_FRs.npy")
                contraction = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_contraction_FRs.npy")
                counterfactuals = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_counterfactuals_FRs.npy")

                failure_rates = np.zeros((8, 4))
                failure_stds = np.zeros((8, 4))

                failure_rates[:, 0] = np.nanmean(true, axis=0)
                failure_rates[:, 1] = np.nanmean(labeled, axis=0)
                failure_rates[:, 2] = np.nanmean(contraction, axis=0)
                failure_rates[:, 3] = np.nanmean(counterfactuals, axis=0)

                # Compute sample std
                failure_stds[:, 0] = np.nanstd(true, axis=0, ddof=1)
                failure_stds[:, 1] = np.nanstd(labeled, axis=0, ddof=1)
                failure_stds[:, 2] = np.nanstd(contraction, axis=0, ddof=1)
                failure_stds[:, 3] = np.nanstd(counterfactuals, axis=0, ddof=1)

                x_ax = np.arange(0.1, 0.9, 0.1)

                labels = ['True Evaluation', 'Labeled outcomes', 'Contraction',
                          'CFBI']

                colours = ['g', 'magenta', 'b', 'r']

                line_styles = ['--', ':', '-.', '-']

                # General plot: Failure rate vs. Acceptance rate
                for i in range(failure_rates.shape[1]):
                    plt.errorbar(x_ax,
                                 failure_rates[:, i],
                                 label=labels[i],
                                 c=colours[i],
                                 linestyle=line_styles[i],
                                 yerr=failure_stds[:, i])

                plt.xlabel('Acceptance rate')
                plt.ylabel('Failure rate')
                plt.legend(title="$\\bf{Evaluators}$")
                plt.grid()

                plt.savefig(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_all")

                plt.show()

######

def baseFigure(true, labeled, contraction, counterfactuals, J):
    failure_rates = np.zeros((8, 4))
    failure_stds = np.zeros((8, 4))

    failure_rates[:, 0] = np.nanmean(true, axis=0)
    failure_rates[:, 1] = np.nanmean(labeled, axis=0)
    failure_rates[:, 2] = np.nanmean(contraction, axis=0)
    failure_rates[:, 3] = np.nanmean(counterfactuals, axis=0)

    failure_stds[:, 0] = np.nanstd(true, axis=0, ddof=1)
    failure_stds[:, 1] = np.nanstd(labeled, axis=0, ddof=1)
    failure_stds[:, 2] = np.nanstd(contraction, axis=0, ddof=1)
    failure_stds[:, 3] = np.nanstd(counterfactuals, axis=0, ddof=1)

    x_ax = np.arange(0.1, 0.9, 0.1)

    labels = ['True Evaluation', 'Labeled outcomes', 'Contraction',
              'CFBI']

    colours = ['g', 'magenta', 'b', 'r']

    line_styles = ['--', ':', '-.', '-']

    for i in range(failure_rates.shape[1]):
        plt.errorbar(x_ax,
                     failure_rates[:, i],
                     label=labels[i],
                     c=colours[i],
                     linestyle=line_styles[i],
                     yerr=failure_stds[:, i])

    plt.xlabel('Acceptance rate')
    plt.ylabel('Failure rate')
    plt.legend(title="$\\bf{Evaluators}$")
    plt.grid()

    plt.savefig(path + "sl_compas_nJudges" + J + "_all")

    plt.show()

    plt.close('all')

### Draw COMPAS result figures (FR VS AR).

# J = 12
true = np.load(path + "sl_sl_compas__true_FRs.npy")
labeled = np.load(path + "sl_sl_compas__labeled_FRs.npy")
contraction = np.load(path + "sl_sl_compas__contraction_FRs.npy")
counterfactuals = np.load(path + "sl_sl_compas__counterfactuals_FRs.npy")

baseFigure(true, labeled, contraction, counterfactuals, "12")

# J = 24
true = np.load(path + "sl_sl_compas_nJudges24_true_FRs.npy")
labeled = np.load(path + "sl_sl_compas_nJudges24_labeled_FRs.npy")
contraction = np.load(path + "sl_sl_compas_nJudges24_contraction_FRs.npy")
counterfactuals = np.load(path + "sl_sl_compas_nJudges24_counterfactuals_FRs.npy")

baseFigure(true, labeled, contraction, counterfactuals, "24")

# J = 48
true = np.load(path + "sl_sl_compas_nJudges48_true_FRs.npy")
labeled = np.load(path + "sl_sl_compas_nJudges48_labeled_FRs.npy")
contraction = np.load(path + "sl_sl_compas_nJudges48_contraction_FRs.npy")
counterfactuals = np.load(path + "sl_sl_compas_nJudges48_counterfactuals_FRs.npy")

baseFigure(true, labeled, contraction, counterfactuals, "48")

### Draw COMPAS result figure (FR error vs number of judges).

plt.close("all")

fig, ax = plt.subplots()

trans1 = Affine2D().translate(-0.1, 0.0) + ax.transData
trans2 = Affine2D().translate(+0.1, 0.0) + ax.transData

legend_elements = [Line2D([0], [0], color='r', label='CFBI'),
                   Line2D([0], [0], color='b', label='Contraction', ls='--')]

i=0
ticks = []

# J = 12
true = np.load(path + "sl_sl_compas__true_FRs.npy")
contraction = np.load(path + "sl_sl_compas__contraction_FRs.npy")
counterfactuals = np.load(path + "sl_sl_compas__counterfactuals_FRs.npy")

# Counterfactuals
y1 = (true - counterfactuals)
ymean1 = np.nanmean(y1)
yerr1 = np.nanstd(y1, ddof=1)

# Contraction
y2 = (true - contraction)
ymean2 = np.nanmean(y2)
yerr2 = np.nanstd(y2, ddof=1)

# Plot errorbars
er1 = ax.errorbar(i, ymean1, yerr=yerr1, fmt="o", transform=trans1, c='r')
er2 = ax.errorbar(i, ymean2, yerr=yerr2, fmt="o", transform=trans2, c='b')

# Set errorbar linestyle
er2[-1][0].set_linestyle('--')

ticks = np.append(ticks, "12")

i = i + 1

# J = 24
true = np.load(path + "sl_sl_compas_nJudges24_true_FRs.npy")
contraction = np.load(path + "sl_sl_compas_nJudges24_contraction_FRs.npy")
counterfactuals = np.load(path + "sl_sl_compas_nJudges24_counterfactuals_FRs.npy")

# Counterfactuals
y1 = (true - counterfactuals)
ymean1 = np.nanmean(y1)
yerr1 = np.nanstd(y1, ddof=1)

# Contraction
y2 = (true - contraction)
ymean2 = np.nanmean(y2)
yerr2 = np.nanstd(y2, ddof=1)

# Plot errorbars
er1 = ax.errorbar(i, ymean1, yerr=yerr1, fmt="o", transform=trans1, c='r')
er2 = ax.errorbar(i, ymean2, yerr=yerr2, fmt="o", transform=trans2, c='b')

# Set errorbar linestyle
er2[-1][0].set_linestyle('--')

ticks = np.append(ticks, "24")

i = i + 1

# J = 48
true = np.load(path + "sl_sl_compas_nJudges48_true_FRs.npy")
contraction = np.load(path + "sl_sl_compas_nJudges48_contraction_FRs.npy")
counterfactuals = np.load(path + "sl_sl_compas_nJudges48_counterfactuals_FRs.npy")

# Counterfactuals
y1 = (true - counterfactuals)
ymean1 = np.nanmean(y1)
yerr1 = np.nanstd(y1, ddof=1)

# Contraction
y2 = (true - contraction)
ymean2 = np.nanmean(y2)
yerr2 = np.nanstd(y2, ddof=1)

# Plot errorbars
er1 = ax.errorbar(i, ymean1, yerr=yerr1, fmt="o", transform=trans1, c='r')
er2 = ax.errorbar(i, ymean2, yerr=yerr2, fmt="o", transform=trans2, c='b')

# Set errorbar linestyle
er2[-1][0].set_linestyle('--')

ticks = np.append(ticks, "48")

i = i + 1

# Customize xticks
plt.xticks(np.arange(0, i), ticks)

plt.ylabel("Error w.r.t. True evaluation")
plt.xlabel("Number of judges")
plt.grid(axis='y')
plt.axhline(0, c="k", linestyle=":", lw=1)
ax.legend(handles=legend_elements, loc='upper left', title="$\\bf{Evaluators}$")

plt.savefig(path + "sl_errors_compas")

plt.show()

### Draw fixed figure

deciderH = "independent"
deciderM = "batch"
r = "5"
z_coef = "1"

true = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_true_FRs.npy")
labeled = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_labeled_FRs.npy")
contraction = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_contraction_FRs.npy")
counterfactuals = np.load(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_counterfactuals_FRs.npy")

failure_rates = np.zeros((8, 4))
failure_stds = np.zeros((8, 4))

failure_rates[:, 0] = np.nanmean(true, axis=0)
failure_rates[:, 1] = np.nanmean(labeled, axis=0)
failure_rates[:, 2] = np.nanmean(contraction, axis=0)
failure_rates[:, 3] = np.nanmean(counterfactuals, axis=0)

# Compute sample std
failure_stds[:, 0] = np.nanstd(true, axis=0, ddof=1)
failure_stds[:, 1] = np.nanstd(labeled, axis=0, ddof=1)
failure_stds[:, 2] = np.nanstd(contraction, axis=0, ddof=1)
failure_stds[:, 3] = np.nanstd(counterfactuals, axis=0, ddof=1)

x_ax = np.arange(0.1, 0.9, 0.1)

labels = ['True Evaluation', 'Labeled outcomes', 'Contraction',
          'CFBI']

colours = ['g', 'magenta', 'b', 'r']

line_styles = ['--', ':', '-.', '-']

# Fix: Remove contractions 0.6 leniency estimate.
failure_rates[5, 2] = np.nan
failure_stds[5, 2]  = np.nan

# General plot: Failure rate vs. Acceptance rate
for i in range(failure_rates.shape[1]):
    plt.errorbar(x_ax,
                 failure_rates[:, i],
                 label=labels[i],
                 c=colours[i],
                 linestyle=line_styles[i],
                 yerr=failure_stds[:, i])

plt.xlabel('Acceptance rate')
plt.ylabel('Failure rate')
plt.legend(title="$\\bf{Evaluators}$")
plt.grid()

plt.savefig(path + "_deciderH_" + deciderH + "_deciderM_" + deciderM + "_maxR_0_" + r + "coefZ" + z_coef + "_0_all_fixed")

plt.show()