appendix.tex

%!TEX root = sl.tex

\appendix

%\section{Technical details}

%\note{Riku}{From KDD: ''In addition, authors can provide an optional two (2) page supplement at the end of their submitted paper (it needs to be in the same PDF file and start at page 10) focused on reproducibility. This supplement can only be used to include (i) information necessary for reproducing the experimental results, insights, or conclusions reported in the paper (e.g., various algorithmic and model parameters and configurations, hyper-parameter search spaces, details related to dataset filtering and train/test splits, software versions, detailed hardware configuration, etc.), and (ii) any pseudo-code, or proofs that due to space limitations, could not be included in the main nine-page manuscript, but that help in reproducibility (see reproducibility policy below for more details).''}

%Specify the following for the technical appendix:

%\begin{itemize}
%\item Computing environment, versions of
%	\begin{itemize}
%	\item Python 3.6.9
%	\item Stan (PyStan v.2.19.0.0 with cmdstanpy 0.4.3)
%	\end{itemize}
%\item Full model specification
%\item Replication specifics, see above from their requirements
%	\begin{itemize}
%	\item Can we only share link to repository?
%	\item Contraction specification to appendix?
%	\end{itemize}
%\end{itemize}


\section{Counterfactual Inference}\label{sec:counterfactuals}

%\note{Antti}{Writing here in the language I know, to make the assumptions we are making clear.}

Here we derive Equation~\ref{eq:counterfactual_eq}, via Pearl's counterfactual inference protocol involving three steps: abduction, action, and inference \cite{pearl2000}. Our model can be represented with the following structural equations over the graph structure in Figure~\ref{fig:causalmodel}:

\noindent
\hrulefill
\begin{align}
\judge & := \epsilon_{\judge}, \quad   %\epsilon_r \sim N(0,\sigma_z^2)  
\nonumber \\
\unobservable & := \epsilon_\unobservable, \quad   %\epsilon_z \sim N(0,\sigma_z^2)
 \nonumber \\
 \obsFeatures & := \epsilon_\obsFeatures, \quad   %\epsilon_z \sim N(0,\sigma_z^2)
 \nonumber \\
\decision & := g(\human,\obsFeatures,\unobservable,\epsilon_{\decision }),  \nonumber\\
\outcome & := f(\decision,\obsFeatures,\unobservable,\epsilon_\outcome).  \nonumber 
\end{align}

\vspace{-5pt}

\hrulefill

\noindent
For any cases where $\decision=0$ in the data, we calculate the counterfactual value of $\outcome$ if we had had $\decision=1$.  We assume here that all these parameters, functions and distributions are known.
In the \emph{abduction} step we determine $\prob{\epsilon_\human, \epsilon_\unobservable, \epsilon_\obsFeatures, \epsilon_{\decision},\epsilon_\outcome|\judgeValue,\obsFeaturesValue,\decision=0}$,  the distribution of the stochastic disturbance terms updated to take into account the observed evidence on judge leniency, observed features and the decision (given the decision $\decision=0$ disturbances are independent of $\outcome$). %At this point we make use of the additional information a negative decision has on the unobserved risk factor $Z$. 
We directly know $\epsilon_\obsFeatures=\obsFeaturesValue$ and $\epsilon_{_\judge}=\judgeValue$. 
%PROBLEM: As the next step of inferring outcome $\outcome$ is not affected by $\epsilon_{\decision}$ we do not need to take it into account. \acomment{Is this what happens?} \rcomment{See big note.}  
Due to the special form of $f$ the observed evidence is independent of $\epsilon_\outcome$ when $\decision = 0$. We only need to determine $\prob{\epsilon_\unobservable,\epsilon_{\decision}|\humanValue,\obsFeaturesValue,\decision=0}$.
Next, the \emph{action} step involves intervening on $\decision$ and setting $\decision=1$ by intervention.
Finally in the \emph{prediction} step we estimate $\outcome$:
\begin{eqnarray*}
&&\hspace{-10mm}E_{\decision \leftarrow 1}(\outcome|\judgeValue,\decision=0,\obsFeaturesValue)\\%&=&    %\int   f(T=1,x,Z=\epsilon_z,\epsilon_Y) \\
%&& P(Z=\epsilon_Z|R=\epsilon_R, T=0, x)
% P(\epsilon_Y) d\epsilon_Z d\epsilon_Y \\
 &=&  \hspace{-3mm}  \int   f(\decision=1,\obsFeaturesValue,\unobservable = \epsilon_\unobservable,\epsilon_\outcome)   \prob{\epsilon_\unobservable, \epsilon_\decision |\judgeValue,\decision=0,\obsFeaturesValue}
\prob{\epsilon_\outcome}  d\epsilon_{\unobservable} \diff{\epsilon_\outcome}\diff{\epsilon_\decision}\\
 &=&   \hspace{-3mm}   \int   \prob{\outcome=1|\decision=1,\obsFeaturesValue,\unobservableValue}  \prob{\unobservableValue|\judgeValue,\decision=0,\obsFeaturesValue} \diff{\unobservableValue}
\end{eqnarray*}
where we used $\epsilon_\unobservable=\unobservableValue$ and integrated out $\epsilon_\decision$ and $\epsilon_\outcome$. This gives us the counterfactual expectation of $Y$ for a single subject.


%\subsection{Counterfactual Inference (OLD with LENIENCY PROBLEM)}
%
%%\note{Antti}{Writing here in the language I know, to make the assumptions we are making clear.}
%
%Here we derive Equation~4, via Pearl's counterfactual inference protocol involving three steps: abduction, action, and inference. Our model can be represented with the following structural equations over the graph structure in Figure~2:
%
%\noindent
%\hrulefill
%\begin{align}
%\leniency_\human & := \epsilon_{\leniency_\human}, \quad   %\epsilon_r \sim N(0,\sigma_z^2)  
%\nonumber \\
%\unobservable & := \epsilon_\unobservable, \quad   %\epsilon_z \sim N(0,\sigma_z^2)
% \nonumber \\
% \obsFeatures & := \epsilon_\obsFeatures, \quad   %\epsilon_z \sim N(0,\sigma_z^2)
% \nonumber \\
%\decision_\human & := g(\leniency,\obsFeatures,\unobservable,\epsilon_{\decision_\human }),  \nonumber\\
%\outcome & := f(\decision_\human,\obsFeatures,\unobservable,\epsilon_\outcome).  \nonumber 
%\end{align}
%
%\vspace{-5pt}
%
%\hrulefill
%
%\noindent
%For any cases where $\decision_\human=0$ in the data, we calculate the counterfactual value of $\outcome$ if we had had $\decision_\human=1$.  We assume here that all these parameters, functions and distributions are known.
%In the \emph{abduction} step we determine $\prob{\epsilon_\leniency, \epsilon_\unobservable, \epsilon_\obsFeatures, \epsilon_{\decision_H},\epsilon_\outcome|\leniencyValue_\human,\obsFeaturesValue,\decision_H=0,\outcome=1}$,  the distribution of the stochastic disturbance terms updated to take into account the observed evidence on judge leniency, observed features and the decision. %At this point we make use of the additional information a negative decision has on the unobserved risk factor $Z$. 
%We directly know $\epsilon_\obsFeatures=\obsFeaturesValue$ and $\epsilon_{\leniency_\human}=\leniencyValue_\human$. As the next step of inferring outcome $\outcome$ is not affected by $\epsilon_{\decision_\human}$ we do not need to take it into account. \acomment{Is this what happens?} \rcomment{See big note.}  Due to the special form of $f$ the observed evidence is independent of $\epsilon_\outcome$ when $\decision_\human = 0$. We only need to determine $\prob{\epsilon_\unobservable| \leniencyValue_\human,\decision_\human=0,\obsFeaturesValue}$.
%Next, the \emph{action} step involves intervening on $\decision_\human$ and setting $\decision_\human=1$ by intervention.
%Finally in the \emph{prediction} step we estimate $\outcome$:
%\begin{eqnarray*}
%E_{\decision_\human \leftarrow 1}(\outcome|\leniencyValue_\human,\decision_\human=0,\obsFeaturesValue)%&=&    %\int   f(T=1,x,Z=\epsilon_z,\epsilon_Y) \\
%%&& P(Z=\epsilon_Z|R=\epsilon_R, T=0, x)
%% P(\epsilon_Y) d\epsilon_Z d\epsilon_Y \\
% &=&    \int   f(\decision_\human=1,\obsFeaturesValue,\unobservableValue,\epsilon_\outcome)   \prob{\epsilon_\unobservable |\leniencyValue_\human,\decision_\human=0,\obsFeaturesValue}
%\prob{\epsilon_\outcome}  d\epsilon_{\unobservable} \diff{\epsilon_\outcome}\\
% &=&    \int   \prob{\outcome=1|\decision_\human=1,\obsFeaturesValue,\unobservableValue}  \prob{\unobservableValue|\leniencyValue_\human, \decision_\human=0,\obsFeaturesValue} \diff{\unobservableValue}
%\end{eqnarray*}
%This gives us the counterfactual expectation of $Y$ for a single subject.
%
%\note{Riku}{Do we actually know the  true value of $\epsilon_{\leniency_\human}$? What we do know is some \emph{observed} leniency / acceptance rate. In stan modelling I model the leniency (\texttt{alpha\_T} in code) to obtain the correct value for \unobservableValue. But intuitively I think $\epsilon_{\leniency_\human}$ should drop out in some phase anyway. One could follow the above derivation but just augment it with $\epsilon_{\leniency_\human}$ so then
%\begin{eqnarray*}
%E_{\decision_\human \leftarrow 1}(\outcome|\leniencyValue_\human,\decision_\human=0,\obsFeaturesValue)%&=&    %\int   f(T=1,x,Z=\epsilon_z,\epsilon_Y) \\
%%&& P(Z=\epsilon_Z|R=\epsilon_R, T=0, x)
%% P(\epsilon_Y) d\epsilon_Z d\epsilon_Y \\
% &=&    \int   f(\decision_\human=1,\obsFeaturesValue,\unobservableValue,\epsilon_\outcome)  \prob{\epsilon_\outcome} \prob{\epsilon_\unobservable |\epsilon_{\leniency_\human},\decision_\human=0,\obsFeaturesValue} \prob{\epsilon_{\leniency_\human}}
%  \diff{\epsilon_{\leniency_\human}} \diff{\epsilon_{\unobservable}} \diff{\epsilon_\outcome}\\
% &=&    \int   \prob{\outcome=1|\decision_\human=1,\obsFeaturesValue,\unobservableValue}  \prob{\unobservableValue| \epsilon_{\leniency_\human}, \decision_\human=0,\obsFeaturesValue} \prob{\epsilon_{\leniency_\human}} \diff{\epsilon_{\leniency_\human}} \diff{\unobservableValue}.
%\end{eqnarray*}
%
%But this leaves us with $\epsilon_{\leniency_\human}$ in the equation.
%
%}


\section{On the Priors} \label{sec:model_definition}

%\iffalse
%\note{Riku}{Copied from sec 3.5}
%
%The causal diagram of Figure~\ref{fig:causalmodel} provides the structure of causal relationships for quantities of interest.
%%
%In addition, we consider \judgeAmount instances $\{\human_j, j = 1, 2, \ldots, \judgeAmount\}$ of decision makers \human.
%%
%For the purposes of Bayesian modelling, we present the hierarchical model and explicate our assumptions about the relationships and the quantities below.
%%
%Note that index $j$ refers to decision maker $\human_j$ and \invlogit is the standard logistic function.
%
%\noindent
%\hrulefill
%\begin{align}
%\prob{\unobservable = \unobservableValue} & = (2\pi)^{-\nicefrac{1}{2}}\exp(-\unobservableValue^2/2)  \nonumber \\
%\prob{\decision = 0~|~\leniency_j = \leniencyValue, \obsFeatures = \obsFeaturesValue, \unobservable = \unobservableValue} & = \invlogit(\alpha_j + \gamma_\obsFeaturesValue\obsFeaturesValue + \gamma_\unobservableValue \unobservableValue + \epsilon_\decisionValue),  \label{eq:judgemodel} \\
%	\text{where}~ \alpha_{j} & \approx \logit(\leniencyValue_j) \label{eq:leniencymodel}\\
%\prob{\outcome=0~|~\decision, \obsFeatures=\obsFeaturesValue, \unobservable=\unobservableValue} & =
%	\begin{cases}
%		0,~\text{if}~\decision = 0\\
%		\invlogit(\alpha_\outcomeValue + \beta_\obsFeaturesValue \obsFeaturesValue + \beta_\unobservableValue \unobservableValue + \epsilon_\outcomeValue),~\text{o/w} \label{eq:defendantmodel}
%	\end{cases}
%\end{align}
%\hrulefill
%
%
%As stated in the equations above, we consider normalized features \obsFeatures and \unobservable.
%%
%Moreover, the probability that the decision maker makes a positive decision takes the form of a logistic function (Equation~\ref{eq:judgemodel}).
%% 
%Note that we are making the simplifying assumption that coefficients $\gamma$ are the same for all defendants, but decision makers are allowed to differ in intercept $\alpha_j \approx \logit(\leniencyValue_j)$ so as to model varying leniency levels among them (Eq. \ref{eq:leniencymodel}).
%%
%The probability that the outcome is successful conditional on a positive decision (Eq.~\ref{eq:defendantmodel}) is also provided by a logistic function, applied on the same features as the logistic formula of equation \ref{eq:judgemodel}.
%%
%In general, these two logistic functions may differ in their coefficients.
%%
%However, in many settings, a decision maker would be considered good if the two functions were the same -- i.e., if the probability to make a positive decision was the same as the probability to obtain a successful outcome after a positive decision.
%
%\fi

For the Bayesian modelling, the priors for the coefficients $\gamma_\obsFeatures, ~\beta_\obsFeatures, ~\gamma_\unobservable$ and $\beta_\unobservable$ were defined using the gamma-mixture representation of Student's t-distribution with $6$ degrees of freedom.
%
The gamma-mixture for Student's t-distribution with $\nu$ degrees of freedom is obtained by first sampling a variance parameter from Gamma($\nicefrac{\nu}{2},~\nicefrac{\nu}{2}$) distribution.
%
Then the coefficient is drawn from zero-mean Gaussian distribution with variance equal to the inverse of the sampled variance parameter.
%
Student's t-distribution was chosen for prior over the Gaussian for its better robustness against outliers.
%no need to cite, general knowledge at this point \cite{ohagan1979outlier}.
%
The scale parameters $\eta_\unobservable, ~\eta_{\beta_\obsFeatures}$ and $\eta_{\gamma_\obsFeatures}$ were sampled independently from Gamma$(\nicefrac{6}{2},~\nicefrac{6}{2})$ and then the coefficients were sampled from Gaussian distribution with expectation $0$ and variance parameters $\eta_\unobservable^{-1}, ~\eta_{\beta_\obsFeatures}^{-1}$ and $\eta_{\gamma_\obsFeatures}^{-1}$ as shown below. The coefficients for the unobserved confounder \unobservable were bounded to the positive values to ensure identifiability.
\begin{align}
\eta_\unobservable, ~\eta_{\beta_\obsFeatures}, ~\eta_{\gamma_\obsFeatures} & \sim \text{Gamma}(3, 3) \nonumber\\
\gamma_\unobservable, ~\beta_\unobservable & \sim N_+(0, \eta_\unobservable^{-1})\nonumber \\
\gamma_\obsFeatures & \sim N(0, \eta_{\gamma_\obsFeatures}^{-1}) \nonumber\\
\beta_\obsFeatures & \sim N(0, \eta_{\beta_\obsFeatures}^{-1})\nonumber
\end{align}

The intercepts for the %\judgeAmount 
decision-makers in the data and outcome \outcome were defined to have hierarchical Gaussian priors with variances $\sigma_\decision^2$ and $\sigma_\outcome^2$ as shown below. Note that the decision-makers have a joint variance parameter $\sigma_\decision^2$.
\begin{align}
\sigma_\decision^2, ~\sigma_\outcome^2 & \sim N_+(0, \tau^2) \nonumber \\
\alpha_\judgeValue & \sim N(0, \sigma_\decision^2)\nonumber \\
\alpha_\outcome & \sim N(0, \sigma_\outcome^2)\nonumber
\end{align}
%
The variance parameters $\sigma_\decision^2$ and $\sigma_\outcome^2$ were drawn independently from bounded zero-mean Gaussian distributions.
% 
The Gaussians were restricted to the positive real numbers and both had mean $0$ and variance $\tau^2=1$.

%\hide{
%The sampler diagnostics exhibited poor performance only with XXX decider having E-BFMI value constantly below the nominal threshold of 0.2. Having a low value of E-BFMI with the sampler implies that the posterior may not have been explored fully.
%}

\section{Independent decision-maker}
 \acomment{Needs updating?}

In section \ref{sec:decisionmakers} we presented an {\it independent} decision maker. 
%
Here we motivate it.
%
The independent decision maker stems from the notion that an experienced decision maker has made decisions on numerous subjects in their past.
%
Thus the decision maker has a good idea of the absolute dangerousness of a subject and can simply decide to make a negative decision if the probability of a subject for a negative result is too high.
%
This threshold in itself implies a level of leniency for the decision maker: if the threshold is low, the decision maker is likely to be lenient given that the subjects are randomly assigned.
%
For the purposes of studying the effect of differing levels of leniency of the decision makers we do the required derivations below to show how the threshold is converted to a leniency level. 

In the text we state that the features \obsFeatures and \unobservable are standard Gaussian random variables. 
%
The independent decision-maker makes its decisions independently for each subject as follows. 
%
First, we generate features \obsFeatures and \unobservable for a subject from the standard Gaussians.
%
Then the subject is assigned to a decision-maker with some leniency $\leniencyValue'$. We assign the decision itself deterministically as follows:
\begin{equation} \label{eq:Tdet}
  \decision=\begin{cases}
    0, & \text{if } \prob{\outcome=0|~\obsFeatures= \obsFeaturesValue, \unobservable= \unobservableValue} \geq F^{-1}(\leniencyValue')\\
    1, & \text{otherwise}.
  \end{cases}
\end{equation}

In equation \ref{eq:Tdet}, $\prob{\outcome=0|~\obsFeatures= \obsFeaturesValue, \unobservable= \unobservableValue}$ is the predicted probability of a negative outcome given \obsFeaturesValue and \unobservableValue.
%
The probability $\prob{\outcome=0|~\obsFeaturesValue, \unobservableValue}$ is predicted by the judge.
%
The prediction is computed with equation \ref{eq:defendantmodel} omitting $\epsilon_\outcomeValue$ and it assumes that the judge perfect, i.e. that $\gamma_\unobservableValue \approx \beta_\unobservableValue$ and $\gamma_\obsFeaturesValue \approx \beta_\obsFeaturesValue$. 


We note that the right hand side of equation \ref{eq:defendantmodel} defines a random variable when the  values of \obsFeatures and \unobservable is not known. 
%
It then folllows that the inverse cumulative function $F^{-1}(\leniencyValue')$ in equation \ref{eq:Tdet} is the inverse cumulative distribution of \emph{logit-normal distribution} with parameters $\mu=0$ and $\sigma^2=2$. 
%
More specifically $F^{-1}$ is the inverse cumulative distribution function of the sum of two standard Gaussians after logistic transformation.
%
If $\beta_\obsFeaturesValue \neq 1$ and/or $\beta_\unobservableValue \neq 1$ then from the basic properties of variance $\sigma^2=Var(\beta_\obsFeaturesValue\obsFeatures+\beta_\obsFeaturesValue\obsFeatures)=\beta_\obsFeaturesValue^2Var(\obsFeatures)+\beta_\unobservableValue^2Var(\unobservable)$. 
%
Finally the inverse cumulative function
\begin{equation*} \label{eq:cum_inv}
F^{-1}(\leniencyValue') = \left(1 + \exp\left(-(\text{erf}^{-1}(2\leniencyValue'-1)\sqrt{2\sigma^2}-\mu)\right)\right)^{-1},
\end{equation*}
where erf is the error function. 
%
After assigning the decisions with this method, the true, observed leniency \leniencyValue of a decision-maker can be computed from the sample of all the observations.

%\begin{figure}%
%    \centering
%    \subfloat[~]{{\includegraphics[width=0.5\linewidth]{./img/prior_posterior_gamma_z} }}
%    ~
%    \subfloat[~]{{\includegraphics[width=0.5\linewidth]{./img/prior_posterior_beta_z} }}
%    \caption{Prior and posterior densities for $\gamma_\unobservableValue$ (a) and $\beta_\unobservableValue$ (b). Prior density (red line) is Student's {\it t}-distribution with $6$ degrees of freedom. The estimated posterior density is shown in blue and the true value of $1$ is marked with a black dashed line.}
%    \label{fig:prior_posterior}%
%\end{figure}


%NOT GOING TO BE IN THE FINAL
%\begin{figure}
%\begin{center}\includegraphics[width=0.5\linewidth]{img/decisions_ZvsX}
%\end{center}
%\caption{This is only one judge (batch decision-maker with an error term $\epsilon_\decisionValue$) with leniency 0.5. 150 out 300 have $T=1$.  56 have $ T=1,Y=0$, 94 have $T=1,Y=1$. 157 had $Y=0$ for the subjects before censoring with decision.}

%\label{fig:}
%\end{figure}

%moved to the main paper
%\begin{figure}
%\begin{center}\includegraphics[width=0.5\linewidth]{img/sl_errors_betaZ5}
%\end{center}
%\caption{Summarization of figure \ref{fig:betaZ5} in the appendix.}

%\label{fig:}
%\end{figure}

\clearpage

\acomment{The remaining will not appear on the final paper.}

\section{Additional figures}


\begin{figure}[!b]
\centering
\subfloat[12 decision makers.]{\includegraphics[width = \linewidth]{./img/sl_compas_nJudges12_all}}\\
\subfloat[24 decision makers.]{\includegraphics[width = \linewidth]{./img/sl_compas_nJudges24_all}}\\
\subfloat[48 decision makers.]{\includegraphics[width = \linewidth]{./img/sl_compas_nJudges48_all}}

\caption{Results of experiments with COMPAS data using different number of judges. }
%\label{fig:}
\end{figure}

These figures also feature the \textbf{Probabilistic} decision maker: Each subject is released with probability based on the logistic regression model, where the leniency is inputted through $\alpha_j$. 
%\rcomment{Hard to justify any more? Or this decision maker could now be described as follows: Each subject is released with probability equal to some risk score which differs based on the assigned judge. In the experiments, the risk scores were  computed with equation \ref{eq:judgemodel} where leniency was inputted through $\alpha_j$.}

%\begin{figure}
%\includegraphics[width=\linewidth]{img/leniency_figure}
%\caption{Figure illustrating the relationship of \leniency, \obsFeatures and \unobservable. Points $A$, $B$, $C$ and $D$ represent four subjects each with different features \obsFeatures and \unobservable.
%%
%Lines $\leniencyValue_1$ and $\leniencyValue_2$ show decision boundaries for decision-makers with different leniencies.
%%
%Figure shows how while sharing features \obsFeatures subjects $A$ and $C$ receive different decisions from decision-maker $1$ but not from decision-maker $2$ due to difference in \unobservable.
%%
%The figure also explicates the interplay of features \obsFeatures and \unobservable. Considering subjects $A$ and $D$, one might claim $D$ to be more dangerous than subject $A$ based on features \obsFeatures alone. However, assuming that the decision-maker $2$ uses feature \unobservable efficiently, they will keep the decision the same as they observe reduction in \unobservable.}
%\label{fig:approach}
%\end{figure}


\begin{figure*}%[H]
\centering
\subfloat[Random H, Random M]{\includegraphics[width = 3in]{./img/_deciderH_random_deciderM_random_maxR_0_9coefZ1_0_all}} ~
\subfloat[Random H, batch M]{\includegraphics[width = 3in]{./img/_deciderH_random_deciderM_batch_maxR_0_9coefZ1_0_all}}\\
\subfloat[Batch H, Random M]{\includegraphics[width = 3in]{./img/_deciderH_batch_deciderM_random_maxR_0_9coefZ1_0_all}}~
\subfloat[Batch H, Batch M]{\includegraphics[width = 3in]{./img/_deciderH_batch_deciderM_batch_maxR_0_9coefZ1_0_all}} \\
\subfloat[Independent H and Random M]{\includegraphics[width = 3in]{./img/_deciderH_independent_deciderM_random_maxR_0_9coefZ1_0_all}} ~
\subfloat[Independent H, Batch M]{\includegraphics[width = 3in]{./img/_deciderH_independent_deciderM_batch_maxR_0_9coefZ1_0_all}}\\
\subfloat[Probabilistic H, Random M]{\includegraphics[width = 3in, height = 1.5in]{./img/_deciderH_probabilistic_deciderM_random_maxR_0_9coefZ1_0_all}}~
\subfloat[Probabilistic H, Batch M]{\includegraphics[width = 3in, height = 1.5in]{./img/_deciderH_probabilistic_deciderM_batch_maxR_0_9coefZ1_0_all}}\\

\caption{Figures with different deciders (N=5k, 50 judges, max$(r)=0.9,~ \beta_z=\gamma_z=1$).}
\label{some example}
\end{figure*}

%%

\begin{figure*}%[H]
\centering
\subfloat[Random H, Random M]{\includegraphics[width = 3in]{./img/_deciderH_random_deciderM_random_maxR_0_5coefZ1_0_all}} ~
\subfloat[Random H, batch M]{\includegraphics[width = 3in]{./img/_deciderH_random_deciderM_batch_maxR_0_5coefZ1_0_all}}\\
\subfloat[Batch H, Random M]{\includegraphics[width = 3in]{./img/_deciderH_batch_deciderM_random_maxR_0_5coefZ1_0_all}}~
\subfloat[Batch H, Batch M]{\includegraphics[width = 3in]{./img/_deciderH_batch_deciderM_batch_maxR_0_5coefZ1_0_all}} \\
\subfloat[Independent H and Random M]{\includegraphics[width = 3in]{./img/_deciderH_independent_deciderM_random_maxR_0_5coefZ1_0_all}} ~
\subfloat[Independent H, Batch M]{\includegraphics[width = 3in]{./img/_deciderH_independent_deciderM_batch_maxR_0_5coefZ1_0_all}}\\
\subfloat[Probabilistic H, Random M]{\includegraphics[width = 3in, height = 1.5in]{./img/_deciderH_probabilistic_deciderM_random_maxR_0_5coefZ1_0_all}}~
\subfloat[Probabilistic H, Batch M]{\includegraphics[width = 3in, height = 1.5in]{./img/_deciderH_probabilistic_deciderM_batch_maxR_0_5coefZ1_0_all}}\\

\caption{Figures with different deciders (N=5k, 50 judges, max$(r)=0.5,~ \beta_z=\gamma_z=1$).}
\label{some example}
\end{figure*}

%%

\begin{figure*}%[H]
\centering
\subfloat[Random H, Random M]{\includegraphics[width = 3in]{./img/_deciderH_random_deciderM_random_maxR_0_9coefZ5_0_all}} ~
\subfloat[Random H, batch M]{\includegraphics[width = 3in]{./img/_deciderH_random_deciderM_batch_maxR_0_9coefZ5_0_all}}\\
\subfloat[Batch H, Random M]{\includegraphics[width = 3in]{./img/_deciderH_batch_deciderM_random_maxR_0_9coefZ5_0_all}}~
\subfloat[Batch H, Batch M]{\includegraphics[width = 3in]{./img/_deciderH_batch_deciderM_batch_maxR_0_9coefZ5_0_all}} \\
\subfloat[Independent H and Random M]{\includegraphics[width = 3in]{./img/_deciderH_independent_deciderM_random_maxR_0_9coefZ5_0_all}} ~
\subfloat[Independent H, Batch M]{\includegraphics[width = 3in]{./img/_deciderH_independent_deciderM_batch_maxR_0_9coefZ5_0_all}}\\
\subfloat[Probabilistic H, Random M]{\includegraphics[width = 3in]{./img/_deciderH_probabilistic_deciderM_random_maxR_0_9coefZ5_0_all}}~
\subfloat[Probabilistic H, Batch M]{\includegraphics[width = 3in, height = 1.5in]{./img/_deciderH_probabilistic_deciderM_batch_maxR_0_9coefZ5_0_all}}\\

\caption{Figures with different deciders (N=5k, 50 judges, max$(r)=0.9, \beta_z=\gamma_z=5$).}
\label{fig:betaZ5}
\end{figure*}

%%%

\begin{figure*}%[H]
\centering
\subfloat[Random M, $\max(r)=0.9, \beta_z=\gamma_z=1$]{\includegraphics[width = 3in]{./img/with_epsilon_deciderH_independent_deciderM_random_maxR_0_9coefZ1_0_all}} ~
\subfloat[Batch M, $\max(r)=0.9, \beta_z=\gamma_z=1$]{\includegraphics[width = 3in]{./img/with_epsilon_deciderH_independent_deciderM_batch_maxR_0_9coefZ1_0_all}} \\
\subfloat[Random M, $\max(r)=0.5, \beta_z=\gamma_z=1$]{\includegraphics[width = 3in]{./img/with_epsilon_deciderH_independent_deciderM_random_maxR_0_5coefZ1_0_all}} ~
\subfloat[Batch M, $\max(r)=0.5, \beta_z=\gamma_z=1$]{\includegraphics[width = 3in]{./img/with_epsilon_deciderH_independent_deciderM_batch_maxR_0_5coefZ1_0_all}} \\
\subfloat[Random M, $\max(r)=0.9, \beta_z=\gamma_z=5$]{\includegraphics[width = 3in]{./img/with_epsilon_deciderH_independent_deciderM_random_maxR_0_9coefZ5_0_all}} ~
\subfloat[Batch M, $\max(r)=0.9, \beta_z=\gamma_z=5$]{\includegraphics[width = 3in]{./img/with_epsilon_deciderH_independent_deciderM_batch_maxR_0_9coefZ5_0_all}} \\
\caption{Figures with an independent decider H (with error term in decisions) and logistic regression imputation (N=5k, 50 judges). The curves for logistic regression imputation and labeled outcomes overlap heavily in subfigure e. In logistic regression evaluation we impute all the missing outcomes in the test data and the deploy true evaluation on the imputed data. The data is imputed using a regression model built on subjects with observed outcomes in the test data.}
%\label{}
\end{figure*}


%%%
%
%\begin{figure}[H]
%\subfloat[Random H, Random M]{\includegraphics[width = 3in]{./img/random_H_random_M}} ~
%\subfloat[Random H, batch M]{\includegraphics[width = 3in]{./img/random_H_batch_M}}\\
%\subfloat[Batch H, Random M]{\includegraphics[width = 3in, height = 1.5in]{example-image}}~
%\subfloat[Batch H, Batch M]{\includegraphics[width = 3in]{./img/lakkarajus_H_batch_M}} \\
%\subfloat[Independent H and Random M]{\includegraphics[width = 3in, height = 1.5in]{example-image}} ~
%\subfloat[Independent H, Batch M]{\includegraphics[width = 3in]{./img/independent_H_batch_M}}\\
%\subfloat[Probabilistic H, Random M]{\includegraphics[width = 3in]{./img/probabilistic_H_random_M}}~
%\subfloat[Probabilistic H, Batch M]{\includegraphics[width = 3in]{./img/probabilistic_H_batch_M}}\\
%
%\caption{Figures with different configs.}
%\label{some example}
%\end{figure}
%
%%%
%
%
%\begin{figure}[H]
%\subfloat[Probabilistic H, batch M, $\beta_Z=\gamma_Z=5$]{\includegraphics[width = 3in]{./img/probabilistic_H_batch_M_coef_betaZ_5}} ~
%\subfloat[Probabilistic H, batch M, max$(r)=0.5$.]{\includegraphics[width = 3in]{./img/sl_rmax05}}
%\caption{Figures with different configs. (cont.)}
%\label{some example}
%\end{figure}

%\newpage