Skip to content
Snippets Groups Projects
notes.tex 84.6 KiB
Newer Older
  • Learn to ignore specific revisions
  •      &  & {\ul Analytic solution} \\
    
         &  & \tabitem Data $\D$ with properties $\{x_i, j_i, t_i, y_i\}$ \\
         &  & \tabitem acceptance rate r \\
    
    Riku-Laine's avatar
    Riku-Laine committed
         &  & \tabitem knowledge that X affects Y \\
         &  & \tabitem more intricate knowledge about $\M$ ? \\[.5\normalbaselineskip]
    
         
         &  & {\ul Potential outcomes evaluator} \\
         &  & \tabitem Data $\D$ with properties $\{x_i, j_i, t_i, y_i\}$ \\
         &  & \tabitem acceptance rate r \\
         &  & \tabitem knowledge that X affects Y \\[.5\normalbaselineskip]
    
    Riku-Laine's avatar
    Riku-Laine committed
        \bottomrule
      \end{tabular}
    
      \label{tab:modules}
    
    Riku-Laine's avatar
    Riku-Laine committed
    \end{table}
    
    
    \section{Old results} \label{sec:results}
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
    Results obtained from running algorithm \ref{alg:perf_comp} are presented in table \ref{tab:results} and figure \ref{fig:results}. All parameters are in their default values and a logistic regression model is trained.
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
    \begin{table}[H]
    \centering
    \caption{Mean absolute error (MAE) w.r.t true evaluation. \\ \emph{RL: Updated 26 June.}}
    \begin{tabular}{l | c c}
    Method & MAE without Z & MAE with Z \\ \hline
    Labeled outcomes 	& 0.107249375 	& 0.0827844\\
    Human evaluation 	& 0.002383729 	& 0.0042517\\
    Contraction 		& 0.004633164		& 0.0075497\\
    Causal model, ep 	& 0.000598624 	& 0.0411532\\
    \end{tabular}
    \label{tab:results}
    \end{table}
    
    
    \begin{figure}[]
        \centering
        \begin{subfigure}[b]{0.5\textwidth}
            \includegraphics[width=\textwidth]{sl_without_Z_8iter}
            \caption{Results without unobservables}
            \label{fig:results_without_Z}
        \end{subfigure}
        ~ %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. 
          %(or a blank line to force the subfigure onto a new line)
        \begin{subfigure}[b]{0.5\textwidth}
            \includegraphics[width=\textwidth]{sl_with_Z_8iter_betaZ_1_0}
            \caption{Results with unobservables, $\beta_Z=1$.}
            \label{fig:results_with_Z}
        \end{subfigure}
        \caption{Failure rate vs. acceptance rate with varying levels of leniency. Logistic regression was trained on labeled training data. \emph{RL: Updated 26 June.}}
        \label{fig:results}
    \end{figure}
    
    \subsection{$\beta_Z=0$ and data generated with unobservables.}
    
    If we assign $\beta_Z=0$, almost all failure rates drop to zero in the interval 0.1, ..., 0.3 but the human evaluation failure rate. Results are presented in figures \ref{fig:betaZ_1_5} and \ref{fig:betaZ_0}. 
    
    The disparities between figures \ref{fig:results_without_Z} and \ref{fig:betaZ_0} (result without unobservables and with $\beta_Z=0$) can be explained in the slight difference in the data generating process, namely the effect of $\epsilon$. The effect of adding $\epsilon$ (noise to the decisions) is further explored in section \ref{sec:epsilon}.
    
    \begin{figure}[]
        \centering
        \begin{subfigure}[b]{0.475\textwidth}
            \includegraphics[width=\textwidth]{sl_with_Z_4iter_betaZ_1_5}
            \caption{Results with unobservables, $\beta_Z$ set to 1.5 in algorithm \ref{alg:data_with_Z}.}
            \label{fig:betaZ_1_5}
        \end{subfigure}
        \quad %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. 
          %(or a blank line to force the subfigure onto a new line)
        \begin{subfigure}[b]{0.475\textwidth}
            \includegraphics[width=\textwidth]{sl_with_Z_4iter_beta0}
            \caption{Results with unobservables, $\beta_Z$ set to 0 in algorithm \ref{alg:data_with_Z}.}
            \label{fig:betaZ_0}
        \end{subfigure}
        \caption{Effect of $\beta_z$. Failure rate vs. acceptance rate with unobservables in the data (see algorithm \ref{alg:data_with_Z}). Logistic regression was trained on labeled training data. Results from algorithm \ref{alg:perf_comp}.}
        \label{fig:betaZ_comp}
    \end{figure}
    
    \subsection{Noise added to the decision and data generated without unobservables} \label{sec:epsilon}
    
    In this part, Gaussian noise with zero mean and 0.1 variance was added to the probabilities $P(Y=0|X=x)$ after sampling Y but before ordering the observations in line 5 of algorithm \ref{alg:data_without_Z}. Results are presented in Figure \ref{fig:sigma_figure}.
    
    \begin{figure}[]
        \centering
        \includegraphics[width=0.5\textwidth]{sl_without_Z_3iter_sigma_sqrt_01}
        \caption{Failure rate with varying levels of leniency without unobservables. Noise has been added to the decision probabilities. Logistic regression was trained on labeled training data.}
        \label{fig:sigma_figure}
    \end{figure}
    
    \subsection{Predictions with random forest classifier} \label{sec:random_forest}
    
    In this section the predictive model was switched to random forest classifier to examine the effect of changing the predictive model. Results are practically identical to those presented in figure \ref{fig:results} previously and are presented in figure \ref{fig:random_forest}.
    
    \begin{figure}[]
        \centering
        \begin{subfigure}[b]{0.475\textwidth}
            \includegraphics[width=\textwidth]{sl_withoutZ_4iter_randomforest}
            \caption{Results without unobservables.}
            \label{fig:results_without_Z_rf}
        \end{subfigure}
        \quad %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. 
          %(or a blank line to force the subfigure onto a new line)
        \begin{subfigure}[b]{0.475\textwidth}
            \includegraphics[width=\textwidth]{sl_withZ_6iter_betaZ_1_0_randomforest}
            \caption{Results with unobservables, $\beta_Z=1$.}
            \label{fig:results_with_Z_rf}
        \end{subfigure}
        \caption{Failure rate vs. acceptance rate with varying levels of leniency. Random forest classifier was trained on labeled training data}
        \label{fig:random_forest}
    \end{figure}
    
    \subsection{Sanity check for predictions}
    
    Predictions were checked by drawing a graph of predicted Y versus X, results are presented in figure \ref{fig:sanity_check}. The figure indicates that the predicted class labels and the probabilities for them are consistent with the ground truth.
    
    \begin{figure}[]
        \centering
        \includegraphics[width=0.5\textwidth]{sanity_check}
        \caption{Predicted class label and probability of $Y=1$ versus X. Prediction was done with a logistic regression model. Colors of the points denote ground truth (yellow = 1, purple = 0). Data set was created with the unobservables.}
        \label{fig:sanity_check}
    \end{figure}
    
    \subsection{Fully random model $\M$}
    
    Given our framework defined in section \ref{sec:framework}, the results presented next are with model $\M$ that outputs probabilities 0.5 for every instance of $x$. Labeling process is still as presented in algorithm \ref{alg:data_with_Z}.  
    
    \begin{figure}[]
        \centering
        \begin{subfigure}[b]{0.475\textwidth}
            \includegraphics[width=\textwidth]{sl_without_Z_15iter_random_model}
            \caption{Failure rate vs. acceptance rate. Data without unobservables. Machine predictions with random model.}
            \label{fig:random_predictions_without_Z}
        \end{subfigure}
        \quad %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. 
          %(or a blank line to force the subfigure onto a new line)
        \begin{subfigure}[b]{0.475\textwidth}
            \includegraphics[width=\textwidth]{sl_with_Z_15iter_fully_random_model}
            \caption{Failure rate vs. acceptance rate. Data with unobservables. Machine predictions with random model.}
            \label{fig:random_predictions_with_Z}
        \end{subfigure}
        \caption{Failure rate vs. acceptance rate with varying levels of leniency. Machine predictions were done with completely random model, that is prediction $P(Y=0|X=x)=0.5$ for all $x$.}
        \label{fig:random_predictions}
    \end{figure}
    
    \subsection{Modular framework -- Monte Carlo evaluator} \label{sec:modules_mc}
    
    For these results, data was generated either with module in algorithm \ref{alg:dg:coinflip_with_z} (drawing Y from Bernoulli distribution with parameter $\pr(Y=0|X, Z, W)$ as previously) or with module in algorithm \ref{alg:dg:threshold_with_Z} (assign Y based on the value of $\invlogit(\beta_XX+\beta_ZZ)$). Decisions were determined using one of the two modules: module in algorithm \ref{alg:decider:quantile} (decision based on quantiles) or \ref{alg:decider:lakkaraju} ("human" decision-maker as in \cite{lakkaraju17}). Curves were computed with True evaluation (algorithm \ref{alg:eval:true_eval}), Labeled outcomes (\ref{alg:eval:labeled_outcomes}), Human evaluation (\ref{alg:eval:human_eval}), Contraction (\ref{alg:eval:contraction}) and Monte Carlo evaluators (\ref{alg:eval:mc}). Results are presented in figure \ref{fig:modules_mc}. The corresponding MAEs are presented in table \ref{tab:modules_mc}.
    
    From the result table we can see that the MAE is at the lowest when the data generating process corresponds closely to the Monte Carlo algorithm.
    
    \begin{table}[]
    \centering
    \caption{Mean absolute error w.r.t true evaluation. See modules used in section \ref{sec:modules_mc}. Bern = Bernoulli,  indep. = independent, TH = threshold}
    \begin{tabular}{l | c c c c}
    Method & Bern + indep. & Bern + non-indep. & TH + indep. & TH + non-indep.\\ \hline
    Labeled outcomes 	& 0.111075	& 0.103235	& 0.108506 & 0.0970325\\
    Human evaluation 	& 0.027298	& NaN (TBA)	& 0.049582 & 0.0033916\\
    Contraction 		& 0.004206	& 0.004656	& 0.005557 & 0.0034591\\
    Monte Carlo	 	& 0.001292	& 0.016629	& 0.009429 & 0.0179825\\
    \end{tabular}
    \label{tab:modules_mc}
    \end{table}
    
    
    \begin{figure}[]
        \centering
        \begin{subfigure}[b]{0.475\textwidth}
            \includegraphics[width=\textwidth]{sl_with_Z_10iter_coinflip_quantile_defaults_mc}
            \caption{Outcome Y from Bernoulli, independent decisions using the quantiles.}
            %\label{fig:modules_mc_without_Z}
        \end{subfigure}
        \quad %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. 
          %(or a blank line to force the subfigure onto a new line)
        \begin{subfigure}[b]{0.475\textwidth}
            \includegraphics[width=\textwidth]{sl_with_Z_20iter_threshold_quantile_defaults_mc}
            \caption{Outcome Y from threshold rule, independent decisions using the quantiles.}
            %\label{fig:modules_mc_with_Z}
        \end{subfigure}
        \begin{subfigure}[b]{0.475\textwidth}
            \includegraphics[width=\textwidth]{sl_with_Z_10iter_coinflip_lakkarajudecider_defaults_mc}
            \caption{Outcome Y from Bernoulli, non-independent decisions.}
            %\label{fig:modules_mc_without_Z}
        \end{subfigure}
        \quad %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. 
          %(or a blank line to force the subfigure onto a new line)
        \begin{subfigure}[b]{0.475\textwidth}
            \includegraphics[width=\textwidth]{sl_with_Z_10iter_threshold_lakkarajudecider_defaults_mc}
            \caption{Outcome Y from threshold rule, non-independent decisions.}
            %\label{fig:modules_mc_with_Z}
        \end{subfigure}
        \caption{Failure rate vs. acceptance rate with varying levels of leniency. Different combinations of deciders and data generation modules. See other modules used in section \ref{sec:modules_mc}}
        \label{fig:modules_mc}
    \end{figure}
    
    \section{Diagnostic figures} \label{sec:diagnostic}
    
    Here we present supplementary figures of all the settings in the main result section.
    
    \begin{figure}[]
        \centering
        \includegraphics[width=\textwidth]{sl_diagnostic_bernoulli_independent_without_Z}
        \caption{Results from estimating failure rate with different levels of leniency using different methods.}
        %\label{fig:}
    \end{figure}
    
    \begin{figure}[]
        \centering
        \includegraphics[width=\textwidth]{sl_diagnostic_bernoulli_independent_with_Z}
        \caption{Results from estimating failure rate with different levels of leniency using different methods.}
        %\label{fig:}
    \end{figure}
    
    \begin{figure}[]
        \centering
        \includegraphics[width=\textwidth]{sl_diagnostic_threshold_independent_with_Z}
        \caption{Results from estimating failure rate with different levels of leniency using different methods.}
        %\label{fig:}
    \end{figure}
    
    \begin{figure}[]
        \centering
        \includegraphics[width=\textwidth]{sl_diagnostic_bernoulli_batch_with_Z}
        \caption{Results from estimating failure rate with different levels of leniency using different methods.}
        %\label{fig:}
    \end{figure}
    
    \begin{figure}[]
        \centering
        \includegraphics[width=\textwidth]{sl_diagnostic_threshold_batch_with_Z}
        \caption{Results from estimating failure rate with different levels of leniency using different methods.}
        %\label{fig:}
    \end{figure}
    
    \begin{figure}[]
        \centering
        \includegraphics[width=\textwidth]{sl_diagnostic_random_decider_with_Z}
        \caption{Results from estimating failure rate with different levels of leniency using different methods.}
        %\label{fig:}
    \end{figure}
    
    \begin{figure}[]
        \centering
        \includegraphics[width=\textwidth]{sl_diagnostic_biased_decider_with_Z}
        \caption{Results from estimating failure rate with different levels of leniency using different methods.}
        %\label{fig:}
    \end{figure}
    
    \begin{figure}[]
        \centering
        \includegraphics[width=\textwidth]{sl_diagnostic_bad_decider_with_Z}
        \caption{Results from estimating failure rate with different levels of leniency using different methods.}
        %\label{fig:}
    \end{figure}
    
    %\end{appendices}
    
    Riku-Laine's avatar
    Riku-Laine committed
    
    
    \end{document}