notes.tex

     
     &  & {\ul Analytic solution} \\
     &  & \tabitem Data $\D$ with properties $\{x_i, j_i, t_i, y_i\}$ \\
     &  & \tabitem acceptance rate r \\
     &  & \tabitem knowledge that X affects Y \\
     &  & \tabitem more intricate knowledge about $\M$ ? \\[.5\normalbaselineskip]
     
     &  & {\ul Potential outcomes evaluator} \\
     &  & \tabitem Data $\D$ with properties $\{x_i, j_i, t_i, y_i\}$ \\
     &  & \tabitem acceptance rate r \\
     &  & \tabitem knowledge that X affects Y \\[.5\normalbaselineskip]
    \bottomrule
  \end{tabular}
  \label{tab:modules}
\end{table}

\section{Old results} \label{sec:results}

Results obtained from running algorithm \ref{alg:perf_comp} are presented in table \ref{tab:results} and figure \ref{fig:results}. All parameters are in their default values and a logistic regression model is trained.

\begin{table}[H]
\centering
\caption{Mean absolute error (MAE) w.r.t true evaluation. \\ \emph{RL: Updated 26 June.}}
\begin{tabular}{l | c c}
Method & MAE without Z & MAE with Z \\ \hline
Labeled outcomes 	& 0.107249375 	& 0.0827844\\
Human evaluation 	& 0.002383729 	& 0.0042517\\
Contraction 		& 0.004633164		& 0.0075497\\
Causal model, ep 	& 0.000598624 	& 0.0411532\\
\end{tabular}
\label{tab:results}
\end{table}


\begin{figure}[]
    \centering
    \begin{subfigure}[b]{0.5\textwidth}
        \includegraphics[width=\textwidth]{sl_without_Z_8iter}
        \caption{Results without unobservables}
        \label{fig:results_without_Z}
    \end{subfigure}
    ~ %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. 
      %(or a blank line to force the subfigure onto a new line)
    \begin{subfigure}[b]{0.5\textwidth}
        \includegraphics[width=\textwidth]{sl_with_Z_8iter_betaZ_1_0}
        \caption{Results with unobservables, $\beta_Z=1$.}
        \label{fig:results_with_Z}
    \end{subfigure}
    \caption{Failure rate vs. acceptance rate with varying levels of leniency. Logistic regression was trained on labeled training data. \emph{RL: Updated 26 June.}}
    \label{fig:results}
\end{figure}

\subsection{$\beta_Z=0$ and data generated with unobservables.}

If we assign $\beta_Z=0$, almost all failure rates drop to zero in the interval 0.1, ..., 0.3 but the human evaluation failure rate. Results are presented in figures \ref{fig:betaZ_1_5} and \ref{fig:betaZ_0}. 

The disparities between figures \ref{fig:results_without_Z} and \ref{fig:betaZ_0} (result without unobservables and with $\beta_Z=0$) can be explained in the slight difference in the data generating process, namely the effect of $\epsilon$. The effect of adding $\epsilon$ (noise to the decisions) is further explored in section \ref{sec:epsilon}.

\begin{figure}[]
    \centering
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_with_Z_4iter_betaZ_1_5}
        \caption{Results with unobservables, $\beta_Z$ set to 1.5 in algorithm \ref{alg:data_with_Z}.}
        \label{fig:betaZ_1_5}
    \end{subfigure}
    \quad %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. 
      %(or a blank line to force the subfigure onto a new line)
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_with_Z_4iter_beta0}
        \caption{Results with unobservables, $\beta_Z$ set to 0 in algorithm \ref{alg:data_with_Z}.}
        \label{fig:betaZ_0}
    \end{subfigure}
    \caption{Effect of $\beta_z$. Failure rate vs. acceptance rate with unobservables in the data (see algorithm \ref{alg:data_with_Z}). Logistic regression was trained on labeled training data. Results from algorithm \ref{alg:perf_comp}.}
    \label{fig:betaZ_comp}
\end{figure}

\subsection{Noise added to the decision and data generated without unobservables} \label{sec:epsilon}

In this part, Gaussian noise with zero mean and 0.1 variance was added to the probabilities $P(Y=0|X=x)$ after sampling Y but before ordering the observations in line 5 of algorithm \ref{alg:data_without_Z}. Results are presented in Figure \ref{fig:sigma_figure}.

\begin{figure}[]
    \centering
    \includegraphics[width=0.5\textwidth]{sl_without_Z_3iter_sigma_sqrt_01}
    \caption{Failure rate with varying levels of leniency without unobservables. Noise has been added to the decision probabilities. Logistic regression was trained on labeled training data.}
    \label{fig:sigma_figure}
\end{figure}

\subsection{Predictions with random forest classifier} \label{sec:random_forest}

In this section the predictive model was switched to random forest classifier to examine the effect of changing the predictive model. Results are practically identical to those presented in figure \ref{fig:results} previously and are presented in figure \ref{fig:random_forest}.

\begin{figure}[]
    \centering
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_withoutZ_4iter_randomforest}
        \caption{Results without unobservables.}
        \label{fig:results_without_Z_rf}
    \end{subfigure}
    \quad %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. 
      %(or a blank line to force the subfigure onto a new line)
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_withZ_6iter_betaZ_1_0_randomforest}
        \caption{Results with unobservables, $\beta_Z=1$.}
        \label{fig:results_with_Z_rf}
    \end{subfigure}
    \caption{Failure rate vs. acceptance rate with varying levels of leniency. Random forest classifier was trained on labeled training data}
    \label{fig:random_forest}
\end{figure}

\subsection{Sanity check for predictions}

Predictions were checked by drawing a graph of predicted Y versus X, results are presented in figure \ref{fig:sanity_check}. The figure indicates that the predicted class labels and the probabilities for them are consistent with the ground truth.

\begin{figure}[]
    \centering
    \includegraphics[width=0.5\textwidth]{sanity_check}
    \caption{Predicted class label and probability of $Y=1$ versus X. Prediction was done with a logistic regression model. Colors of the points denote ground truth (yellow = 1, purple = 0). Data set was created with the unobservables.}
    \label{fig:sanity_check}
\end{figure}

\subsection{Fully random model $\M$}

Given our framework defined in section \ref{sec:framework}, the results presented next are with model $\M$ that outputs probabilities 0.5 for every instance of $x$. Labeling process is still as presented in algorithm \ref{alg:data_with_Z}.  

\begin{figure}[]
    \centering
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_without_Z_15iter_random_model}
        \caption{Failure rate vs. acceptance rate. Data without unobservables. Machine predictions with random model.}
        \label{fig:random_predictions_without_Z}
    \end{subfigure}
    \quad %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. 
      %(or a blank line to force the subfigure onto a new line)
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_with_Z_15iter_fully_random_model}
        \caption{Failure rate vs. acceptance rate. Data with unobservables. Machine predictions with random model.}
        \label{fig:random_predictions_with_Z}
    \end{subfigure}
    \caption{Failure rate vs. acceptance rate with varying levels of leniency. Machine predictions were done with completely random model, that is prediction $P(Y=0|X=x)=0.5$ for all $x$.}
    \label{fig:random_predictions}
\end{figure}

\subsection{Modular framework -- Monte Carlo evaluator} \label{sec:modules_mc}

For these results, data was generated either with module in algorithm \ref{alg:dg:coinflip_with_z} (drawing Y from Bernoulli distribution with parameter $\pr(Y=0|X, Z, W)$ as previously) or with module in algorithm \ref{alg:dg:threshold_with_Z} (assign Y based on the value of $\invlogit(\beta_XX+\beta_ZZ)$). Decisions were determined using one of the two modules: module in algorithm \ref{alg:decider:quantile} (decision based on quantiles) or \ref{alg:decider:lakkaraju} ("human" decision-maker as in \cite{lakkaraju17}). Curves were computed with True evaluation (algorithm \ref{alg:eval:true_eval}), Labeled outcomes (\ref{alg:eval:labeled_outcomes}), Human evaluation (\ref{alg:eval:human_eval}), Contraction (\ref{alg:eval:contraction}) and Monte Carlo evaluators (\ref{alg:eval:mc}). Results are presented in figure \ref{fig:modules_mc}. The corresponding MAEs are presented in table \ref{tab:modules_mc}.

From the result table we can see that the MAE is at the lowest when the data generating process corresponds closely to the Monte Carlo algorithm.

\begin{table}[]
\centering
\caption{Mean absolute error w.r.t true evaluation. See modules used in section \ref{sec:modules_mc}. Bern = Bernoulli,  indep. = independent, TH = threshold}
\begin{tabular}{l | c c c c}
Method & Bern + indep. & Bern + non-indep. & TH + indep. & TH + non-indep.\\ \hline
Labeled outcomes 	& 0.111075	& 0.103235	& 0.108506 & 0.0970325\\
Human evaluation 	& 0.027298	& NaN (TBA)	& 0.049582 & 0.0033916\\
Contraction 		& 0.004206	& 0.004656	& 0.005557 & 0.0034591\\
Monte Carlo	 	& 0.001292	& 0.016629	& 0.009429 & 0.0179825\\
\end{tabular}
\label{tab:modules_mc}
\end{table}


\begin{figure}[]
    \centering
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_with_Z_10iter_coinflip_quantile_defaults_mc}
        \caption{Outcome Y from Bernoulli, independent decisions using the quantiles.}
        %\label{fig:modules_mc_without_Z}
    \end{subfigure}
    \quad %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. 
      %(or a blank line to force the subfigure onto a new line)
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_with_Z_20iter_threshold_quantile_defaults_mc}
        \caption{Outcome Y from threshold rule, independent decisions using the quantiles.}
        %\label{fig:modules_mc_with_Z}
    \end{subfigure}
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_with_Z_10iter_coinflip_lakkarajudecider_defaults_mc}
        \caption{Outcome Y from Bernoulli, non-independent decisions.}
        %\label{fig:modules_mc_without_Z}
    \end{subfigure}
    \quad %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. 
      %(or a blank line to force the subfigure onto a new line)
    \begin{subfigure}[b]{0.475\textwidth}
        \includegraphics[width=\textwidth]{sl_with_Z_10iter_threshold_lakkarajudecider_defaults_mc}
        \caption{Outcome Y from threshold rule, non-independent decisions.}
        %\label{fig:modules_mc_with_Z}
    \end{subfigure}
    \caption{Failure rate vs. acceptance rate with varying levels of leniency. Different combinations of deciders and data generation modules. See other modules used in section \ref{sec:modules_mc}}
    \label{fig:modules_mc}
\end{figure}

\section{Diagnostic figures} \label{sec:diagnostic}

Here we present supplementary figures of all the settings in the main result section.

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{sl_diagnostic_bernoulli_independent_without_Z}
    \caption{Results from estimating failure rate with different levels of leniency using different methods.}
    %\label{fig:}
\end{figure}

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{sl_diagnostic_bernoulli_independent_with_Z}
    \caption{Results from estimating failure rate with different levels of leniency using different methods.}
    %\label{fig:}
\end{figure}

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{sl_diagnostic_threshold_independent_with_Z}
    \caption{Results from estimating failure rate with different levels of leniency using different methods.}
    %\label{fig:}
\end{figure}

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{sl_diagnostic_bernoulli_batch_with_Z}
    \caption{Results from estimating failure rate with different levels of leniency using different methods.}
    %\label{fig:}
\end{figure}

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{sl_diagnostic_threshold_batch_with_Z}
    \caption{Results from estimating failure rate with different levels of leniency using different methods.}
    %\label{fig:}
\end{figure}

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{sl_diagnostic_random_decider_with_Z}
    \caption{Results from estimating failure rate with different levels of leniency using different methods.}
    %\label{fig:}
\end{figure}

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{sl_diagnostic_biased_decider_with_Z}
    \caption{Results from estimating failure rate with different levels of leniency using different methods.}
    %\label{fig:}
\end{figure}

\begin{figure}[]
    \centering
    \includegraphics[width=\textwidth]{sl_diagnostic_bad_decider_with_Z}
    \caption{Results from estimating failure rate with different levels of leniency using different methods.}
    %\label{fig:}
\end{figure}

%\end{appendices}

\end{document}