notes.tex

The inverse cumulative function $F^{-1}(r)$ in equations \ref{eq:Tprob} and \ref{eq:Tdet} is the inverse cumulative distribution of \emph{logit-normal distribution} with parameters $\mu=0$ and $\sigma^2=2$, i.e. $F^{-1}$ is the inverse cumulative distribution function of the sum of two standard Gaussians after logistic transformation. If $\beta_X \neq 1$ and/or $\beta_Z \neq 1$ then from the basic properties of variance $\sigma^2=Var(\beta_XX+\beta_ZZ)=\beta_X^2Var(X)+\beta_Z^2Var(Z)$. Finally the inverse cumulative function
\begin{equation} \label{eq:cum_inv}
F^{-1}(r) = \invlogit\left(\text{erf}^{-1}(2r-1)\sqrt{2\sigma^2}-\mu\right)
\end{equation}
where the parameters are as discussed and erf is the error function.

With this knowledge, it can be stated that if we observed $T=0$ with some $x$ and $r$ it must have been that $\invlogit(\beta_Xx+\beta_Zz) \geq F^{-1}(r)$. Using basic algebra we obtain that
\begin{equation} \label{eq:bounds}
\invlogit(x + z) \geq F^{-1}(r) \Leftrightarrow x+z \geq logit(F^{-1}(r)) \Leftrightarrow z \geq logit(F^{-1}(r)) - x
\end{equation}
as the logit and its inverse are strictly increasing functions and hence preserve the order of magnitude for all pairs of values in their domains. From equations \ref{eq:posterior_Z}, \ref{eq:Tprob} and \ref{eq:bounds} we can conclude that $\pr(Z < \invlogit(F^{-1}(r)) - x | T=0, X=x, R=r) = 0$ and that elsewhere the distribution of Z follows a standard Gaussian. The expectation of Z can be computed analytically from the formulas for truncated Gaussians. All this follows analogically for cases with $T=1$ with the changes of some inequalities.

In practise, in lines 1--3 and 10--13 of algorithm \ref{alg:eval:mc} we do as in the True evaluation evaluator algorithm with the distinction that some of the values of Y are imputed with the corresponding counterfactual probabilities. In line 4 we compute the bounds as motivated above. In the for-loop (lines 5--8) we merely compute the expectation of Z given the knowledge of the decision and that the distribution of Z follows a truncated Gaussian. The equation
\begin{equation}
\hat{z} = (1-t) \cdot E(Z | Z > Q_r) + t \cdot E(Z | Z < Q_r)
\end{equation}
computes the correct expectation automatically. Using the expectation, we then compute the probability for the counterfactual $\pr(Y(1) = 0)$ (probability of a negative outcome had a positive decision been given). In line 9 the imputation can be performed in couple of ways: either by taking a random guess with probability $\pr(Y(1) = 0)$ or by assigning the most likely value for Y.

\begin{algorithm}[H] 			% enter the algorithm environment
\caption{Evaluator module: Analytic solution} 		% give the algorithm a caption
\label{alg:eval:mc} 			% and a label for \ref{} commands later in the document
\begin{algorithmic}[1] 		% enter the algorithmic environment
\REQUIRE Data $\D$ with properties $\{x_i, j_i, t_i, y_i\}$, acceptance rate r
\ENSURE
\STATE Split data to test set and training set.
\STATE Train a predictive model $\B$ on training data.
\STATE Estimate and assign probability scores $\s$ using $\B$ for all observations in test data.
\STATE Compute bounds $Q_r = logit(F^{-1}(r)) - x$ for all judges.
\FORALL{observations in test set}
	\STATE Compute expectation $\hat{z} = (1-t) \cdot E(Z | Z > Q_r) + t \cdot E(Z | Z < Q_r)$. % 
	\STATE Compute $\pr(Y(1) = 0) = \invlogit(x + \hat{z})$.
\ENDFOR
\STATE Impute missing observations using the estimates $\pr(Y(1) = 0)$.
\STATE Sort the data by the probabilities $\s$ to ascending order.
\STATE \hskip3.0em $\rhd$ Now the most dangerous subjects are last.
\STATE Calculate the number to release $N_{free} = |\D_{test}| \cdot r$.
\RETURN Compute $\frac{1}{|\D_{test}|}\sum_{i=1}^{N_{free}}\delta\{y_i=0\}$ using the observed and imputed observations.
\end{algorithmic}
\end{algorithm}

\subsection{Summary table}

Summary table of different modules.

\begin{table}[H]
  \centering
  \caption{Summary of modules (under construction)}
  \begin{tabular}{lll}
    \toprule
    \multicolumn{3}{c}{Module type} \\[.5\normalbaselineskip]
    \textbf{Data generator} & \textbf{Decider} & \textbf{Evaluator}  \\
    \midrule
    {\ul Without unobservables}	& {\ul Independent decisions}	& {\ul Labeled outcomes} \\
     					 	& 1. draw T from a Bernoulli	& \tabitem Data $\D$ with properties $\{x_i, t_i, y_i\}$ \\
    {\ul With unobservables}       	& with $P(T=0|X, Z)$			& \tabitem acceptance rate r \\
    \tabitem $P(Y=0|X, Z, W)$ 	& 						& \tabitem knowledge that X affects Y \\[.5\normalbaselineskip]
    
     {\ul With unobservables}	& 2. determine with $F^{-1}(r)$		& {\ul True evaluation} \\
     \tabitem assign $Y=1$		& 							& \tabitem Data $\D$ with properties $\{x_i, t_i, y_i\}$ \\
     if $P(Y=0|X, Z, W) \geq 0.5$	& {\ul Non-independent decisions}	& and \emph{all outcome labels} \\
     						& 3. sort by $P(T=0|X, Z)$			& \tabitem acceptance rate r \\
     						& and assign $t$ by $r$			& \tabitem knowledge that X affects Y \\[.5\normalbaselineskip]
     
     &  & {\ul Human evaluation} \\
     &  & \tabitem Data $\D$ with properties $\{x_i, j_i, t_i, y_i\}$ \\
     &  & \tabitem acceptance rate r \\[.5\normalbaselineskip]
     
     &  & {\ul Contraction algorithm} \\
     &  & \tabitem Data $\D$ with properties $\{x_i, j_i, t_i, y_i\}$ \\
     &  & \tabitem acceptance rate r \\
     &  & \tabitem knowledge that X affects Y \\[.5\normalbaselineskip]
     
%     &  & {\ul Causal model} \\
%     &  & \tabitem Data $\D$ with properties $\{x_i, t_i, y_i\}$ \\
%     &  & \tabitem acceptance rate r \\
%     &  & \tabitem knowledge that X affects Y \\[.5\normalbaselineskip]
     
     &  & {\ul Analytic solution} \\
     &  & \tabitem Data $\D$ with properties $\{x_i, j_i, t_i, y_i\}$ \\
     &  & \tabitem acceptance rate r \\
     &  & \tabitem knowledge that X affects Y \\
     &  & \tabitem more intricate knowledge about $\M$ ? \\[.5\normalbaselineskip]
     
     &  & {\ul Potential outcomes evaluator} \\
     &  & \tabitem Data $\D$ with properties $\{x_i, j_i, t_i, y_i\}$ \\
     &  & \tabitem acceptance rate r \\
     &  & \tabitem knowledge that X affects Y \\[.5\normalbaselineskip]
    \bottomrule
  \end{tabular}
  \label{tab:modules}
\end{table}

\section{Old results} \label{sec:results}

Results obtained from running algorithm \ref{alg:perf_comp} are presented in table \ref{tab:results} and figure \ref{fig:results}. All parameters are in their default values and a logistic regression model is trained.

\begin{table}[H]
\centering
\caption{Mean absolute error (MAE) w.r.t true evaluation. \\ \emph{RL: Updated 26 June.}}
\begin{tabular}{l | c c}
Method & MAE without Z & MAE with Z \\ \hline
Labeled outcomes 	& 0.107249375 	& 0.0827844\\
Human evaluation 	& 0.002383729 	& 0.0042517\\
Contraction 		& 0.004633164		& 0.0075497\\
Causal model, ep 	& 0.000598624 	& 0.0411532\\
\end{tabular}
\label{tab:results}
\end{table}


\begin{figure}[]
    \centering
    \begin{subfigure}[b]{0.5\textwidth}
        \includegraphics[width=\textwidth]{sl_without_Z_8iter}
        \caption{Results without unobservables}
        \label{fig:results_without_Z}
    \end{subfigure}
    ~ %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. 
      %(or a blank line to force the subfigure onto a new line)
    \begin{subfigure}[b]{0.5\textwidth}
        \includegraphics[width=\textwidth]{sl_with_Z_8iter_betaZ_1_0}
        \caption{Results with unobservables, $\beta_Z=1$.}
        \label{fig:results_with_Z}
    \end{subfigure}
    \caption{Failure rate vs. acceptance rate with varying levels of leniency. Logistic regression was trained on labeled training data. \emph{RL: Updated 26 June.}}
    \label{fig:results}
\end{figure}

\subsection{$\beta_Z=0$ and data generated with unobservables.}

If we assign $\beta_Z=0$, almost all failure rates drop to zero in the interval 0.1, ..., 0.3 but the human evaluation failure rate. Results are presented in figures \ref{fig:betaZ_1_5} and \ref{fig:betaZ_0}. 

The disparities between figures \ref{fig:results_without_Z} and \ref{fig:betaZ_0} (result without unobservables and with $\beta_Z=0$) can be explained in the slight difference in the data generating process, namely the effect of $\epsilon$. The effect of adding $\epsilon$ (noise to the decisions) is further explored in section \ref{sec:epsilon}.

\begin{figure}[]
    \centering
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_with_Z_4iter_betaZ_1_5}
        \caption{Results with unobservables, $\beta_Z$ set to 1.5 in algorithm \ref{alg:data_with_Z}.}
        \label{fig:betaZ_1_5}
    \end{subfigure}
    \quad %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. 
      %(or a blank line to force the subfigure onto a new line)
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_with_Z_4iter_beta0}
        \caption{Results with unobservables, $\beta_Z$ set to 0 in algorithm \ref{alg:data_with_Z}.}
        \label{fig:betaZ_0}
    \end{subfigure}
    \caption{Effect of $\beta_z$. Failure rate vs. acceptance rate with unobservables in the data (see algorithm \ref{alg:data_with_Z}). Logistic regression was trained on labeled training data. Results from algorithm \ref{alg:perf_comp}.}
    \label{fig:betaZ_comp}
\end{figure}

\subsection{Noise added to the decision and data generated without unobservables} \label{sec:epsilon}

In this part, Gaussian noise with zero mean and 0.1 variance was added to the probabilities $P(Y=0|X=x)$ after sampling Y but before ordering the observations in line 5 of algorithm \ref{alg:data_without_Z}. Results are presented in Figure \ref{fig:sigma_figure}.

\begin{figure}[]
    \centering
    \includegraphics[width=0.5\textwidth]{sl_without_Z_3iter_sigma_sqrt_01}
    \caption{Failure rate with varying levels of leniency without unobservables. Noise has been added to the decision probabilities. Logistic regression was trained on labeled training data.}
    \label{fig:sigma_figure}
\end{figure}

\subsection{Predictions with random forest classifier} \label{sec:random_forest}

In this section the predictive model was switched to random forest classifier to examine the effect of changing the predictive model. Results are practically identical to those presented in figure \ref{fig:results} previously and are presented in figure \ref{fig:random_forest}.

\begin{figure}[]
    \centering
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_withoutZ_4iter_randomforest}
        \caption{Results without unobservables.}
        \label{fig:results_without_Z_rf}
    \end{subfigure}
    \quad %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. 
      %(or a blank line to force the subfigure onto a new line)
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_withZ_6iter_betaZ_1_0_randomforest}
        \caption{Results with unobservables, $\beta_Z=1$.}
        \label{fig:results_with_Z_rf}
    \end{subfigure}
    \caption{Failure rate vs. acceptance rate with varying levels of leniency. Random forest classifier was trained on labeled training data}
    \label{fig:random_forest}
\end{figure}

\subsection{Sanity check for predictions}

Predictions were checked by drawing a graph of predicted Y versus X, results are presented in figure \ref{fig:sanity_check}. The figure indicates that the predicted class labels and the probabilities for them are consistent with the ground truth.

\begin{figure}[]
    \centering
    \includegraphics[width=0.5\textwidth]{sanity_check}
    \caption{Predicted class label and probability of $Y=1$ versus X. Prediction was done with a logistic regression model. Colors of the points denote ground truth (yellow = 1, purple = 0). Data set was created with the unobservables.}
    \label{fig:sanity_check}
\end{figure}

\subsection{Fully random model $\M$}

Given our framework defined in section \ref{sec:framework}, the results presented next are with model $\M$ that outputs probabilities 0.5 for every instance of $x$. Labeling process is still as presented in algorithm \ref{alg:data_with_Z}.  

\begin{figure}[]
    \centering
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_without_Z_15iter_random_model}
        \caption{Failure rate vs. acceptance rate. Data without unobservables. Machine predictions with random model.}
        \label{fig:random_predictions_without_Z}
    \end{subfigure}
    \quad %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. 
      %(or a blank line to force the subfigure onto a new line)
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_with_Z_15iter_fully_random_model}
        \caption{Failure rate vs. acceptance rate. Data with unobservables. Machine predictions with random model.}
        \label{fig:random_predictions_with_Z}
    \end{subfigure}
    \caption{Failure rate vs. acceptance rate with varying levels of leniency. Machine predictions were done with completely random model, that is prediction $P(Y=0|X=x)=0.5$ for all $x$.}
    \label{fig:random_predictions}
\end{figure}

\subsection{Modular framework -- Monte Carlo evaluator} \label{sec:modules_mc}

For these results, data was generated either with module in algorithm \ref{alg:dg:coinflip_with_z} (drawing Y from Bernoulli distribution with parameter $\pr(Y=0|X, Z, W)$ as previously) or with module in algorithm \ref{alg:dg:threshold_with_Z} (assign Y based on the value of $\invlogit(\beta_XX+\beta_ZZ)$). Decisions were determined using one of the two modules: module in algorithm \ref{alg:decider:quantile} (decision based on quantiles) or \ref{alg:decider:lakkaraju} ("human" decision-maker as in \cite{lakkaraju17}). Curves were computed with True evaluation (algorithm \ref{alg:eval:true_eval}), Labeled outcomes (\ref{alg:eval:labeled_outcomes}), Human evaluation (\ref{alg:eval:human_eval}), Contraction (\ref{alg:eval:contraction}) and Monte Carlo evaluators (\ref{alg:eval:mc}). Results are presented in figure \ref{fig:modules_mc}. The corresponding MAEs are presented in table \ref{tab:modules_mc}.

From the result table we can see that the MAE is at the lowest when the data generating process corresponds closely to the Monte Carlo algorithm.

\begin{table}[]
\centering
\caption{Mean absolute error w.r.t true evaluation. See modules used in section \ref{sec:modules_mc}. Bern = Bernoulli,  indep. = independent, TH = threshold}
\begin{tabular}{l | c c c c}
Method & Bern + indep. & Bern + non-indep. & TH + indep. & TH + non-indep.\\ \hline
Labeled outcomes 	& 0.111075	& 0.103235	& 0.108506 & 0.0970325\\
Human evaluation 	& 0.027298	& NaN (TBA)	& 0.049582 & 0.0033916\\
Contraction 		& 0.004206	& 0.004656	& 0.005557 & 0.0034591\\
Monte Carlo	 	& 0.001292	& 0.016629	& 0.009429 & 0.0179825\\
\end{tabular}
\label{tab:modules_mc}
\end{table}


\begin{figure}[]
    \centering
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_with_Z_10iter_coinflip_quantile_defaults_mc}
        \caption{Outcome Y from Bernoulli, independent decisions using the quantiles.}
        %\label{fig:modules_mc_without_Z}
    \end{subfigure}
    \quad %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. 
      %(or a blank line to force the subfigure onto a new line)
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_with_Z_20iter_threshold_quantile_defaults_mc}
        \caption{Outcome Y from threshold rule, independent decisions using the quantiles.}
        %\label{fig:modules_mc_with_Z}
    \end{subfigure}
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_with_Z_10iter_coinflip_lakkarajudecider_defaults_mc}
        \caption{Outcome Y from Bernoulli, non-independent decisions.}
        %\label{fig:modules_mc_without_Z}
    \end{subfigure}
    \quad %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. 
      %(or a blank line to force the subfigure onto a new line)
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_with_Z_10iter_threshold_lakkarajudecider_defaults_mc}
        \caption{Outcome Y from threshold rule, non-independent decisions.}
        %\label{fig:modules_mc_with_Z}
    \end{subfigure}
    \caption{Failure rate vs. acceptance rate with varying levels of leniency. Different combinations of deciders and data generation modules. See other modules used in section \ref{sec:modules_mc}}
    \label{fig:modules_mc}
\end{figure}

\section{Diagnostic figures} \label{sec:diagnostic}

Here we present supplementary figures of all the settings in the main result section.

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{expanded_model_1_sl_bernoulli_independent_without_Z_diagnostic_plot}
    \caption{Results from estimating failure rate with different levels of leniency using different methods.}
    %\label{fig:}
\end{figure}

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{expanded_model_2_sl_bernoulli_independent_with_Z_diagnostic_plot}
    \caption{Results from estimating failure rate with different levels of leniency using different methods.}
    %\label{fig:}
\end{figure}

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{expanded_model_3_sl_threshold_independent_with_Z_diagnostic_plot}
    \caption{Results from estimating failure rate with different levels of leniency using different methods.}
    %\label{fig:}
\end{figure}

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{expanded_model_4_sl_bernoulli_batch_with_Z_diagnostic_plot}
    \caption{Results from estimating failure rate with different levels of leniency using different methods.}
    %\label{fig:}
\end{figure}

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{expanded_model_5_sl_threshold_batch_with_Z_diagnostic_plot}
    \caption{Results from estimating failure rate with different levels of leniency using different methods.}
    %\label{fig:}
\end{figure}

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{expanded_model_6_sl_random_decider_with_Z_diagnostic_plot}
    \caption{Results from estimating failure rate with different levels of leniency using different methods.}
    %\label{fig:}
\end{figure}

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{expanded_model_7_sl_biased_decider_with_Z_diagnostic_plot}
    \caption{Results from estimating failure rate with different levels of leniency using different methods.}
    %\label{fig:}
\end{figure}

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{expanded_model_8_sl_bad_decider_with_Z_diagnostic_plot}
    \caption{Results from estimating failure rate with different levels of leniency using different methods.}
    %\label{fig:}
\end{figure}

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{expanded_model_9_sl_bernoulli_bernoulli_with_Z_diagnostic_plot}
    \caption{Results from estimating failure rate with different levels of leniency using different methods.}
    %\label{fig:}
\end{figure}

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{expanded_model_10_sl_threshold_batch_beta_Z_3_with_Z_diagnostic_plot}
    \caption{Results from estimating failure rate with different levels of leniency using different methods.}
    %\label{fig:}
\end{figure}

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{expanded_model_11_sl_threshold_batch_beta_Z_5_with_Z_diagnostic_plot}
    \caption{Results from estimating failure rate with different levels of leniency using different methods.}
    %\label{fig:}
\end{figure}

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{sl_compas_diagnostic_plot}
    \caption{Results from estimating failure rate with different levels of leniency using different methods in the COMPAS data set.}
    \label{fig:diagnostic_compas}
\end{figure}

%\end{appendices}

%\begin{wrapfigure}{r}{0.3\textwidth} %this figure will be at the right
%    \centering
%    \begin{tikzpicture}[->,>=stealth',node distance=2cm, semithick]
%
%  \tikzstyle{every state}=[fill=none,draw=black,text=black]
%
%  \node[state] (R)                    {$R$};
%  \node[state] (T) [right of=R] {$T$};
%  \node[state] (X) [above of=T] {$X$};
%  \node[state] (Z) [rectangle, below of=T] {$Z$};
%  \node[state] (Y) [right of=T] {$Y$};
%
%  \path (R) edge node [below] {$\alpha_j$} (T)
%        (X) edge node [left] {$\beta_{xt}$} (T)
%	     edge node [right] {$\beta_{xy}$} (Y)
%        (Z) edge node [left] {$\beta_{zt}$} (T)
%	     edge node [right] {$\beta_{zy}$} (Y)
%        (T) edge node [below] {?} (Y);
%\end{tikzpicture}
%\caption{Initial model.}
%\end{wrapfigure}

\end{document}