From 3d4b6ef19f52a06f5e74e84d4fd5928a5afebe06 Mon Sep 17 00:00:00 2001
From: Antti Hyttinen <ajhyttin@gmail.com>
Date: Tue, 17 Dec 2019 17:01:04 +0200
Subject: [PATCH] Something.

---
 paper/experiments.tex |  4 ++--
 paper/imputation.tex  | 51 ++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/paper/experiments.tex b/paper/experiments.tex
index d48b72d..17dd4eb 100644
--- a/paper/experiments.tex
+++ b/paper/experiments.tex
@@ -177,7 +177,7 @@ This model was used as decision-maker \machine and these same features were used
 \begin{figure}
 %\centering
 \includegraphics[width=\linewidth]{./img/sl_absolute_errors}
-\caption{Results using different decision-makers and settings. }
+\caption{Results using different decision-makers and settings. Here $\max(\leniencyValue)=0.9$.}
 \label{fig:results_compas}
 \end{figure}
 
@@ -204,7 +204,7 @@ Decision-maker \human random 					& 0.01522     & 0.00137         \\
 Decision-maker \machine random 					& 0.03005     & 0.00327              \\
 Lakkaraju's decision-maker \human \cite{lakkaraju2017selective} & 0.01187          & 0.00288            \\ \bottomrule
 \end{tabular}
-\caption{Comparison of mean absolute error w.r.t true evaluation between contraction and the counterfactual-based method we have  presented. The table shows that our method can perform welll despite violations of the assumptions (eg. having decision-maker \human giving random and non-informative decisions). }
+\caption{Comparison of mean absolute error w.r.t true evaluation between contraction and the counterfactual-based method we have  presented. The table shows that our method can perform welll despite violations of the assumptions (eg. having decision-maker \human giving random and non-informative decisions). Here $\max(\leniencyValue)=0.9$.}
 \label{tab:}
 \end{table}
 
diff --git a/paper/imputation.tex b/paper/imputation.tex
index 1a25dd7..f81ef13 100644
--- a/paper/imputation.tex
+++ b/paper/imputation.tex
@@ -96,13 +96,58 @@ In the abduction step we update the distribution of the disturbance terms $P(\ep
 Action step involves intervening on $T$ and setting $T=1$.
 Finally in the prediction step we estimate $Y$ by taking account the observations:
 \begin{eqnarray*}
-E(Y)&=&    \int   f(T=1,X=x,Z=\epsilon_z,\epsilon_Y) \\
-&& P(Z=\epsilon_Z|R=\epsilon_R, T=0, X=x)
- P(\epsilon_Y) d\epsilon_Z d\epsilon_Y
+E(Y)%&=&    %\int   f(T=1,x,Z=\epsilon_z,\epsilon_Y) \\
+%&& P(Z=\epsilon_Z|R=\epsilon_R, T=0, x)
+% P(\epsilon_Y) d\epsilon_Z d\epsilon_Y \\
+ &=&    \int   f(T=1,x,z,\epsilon_Y)   P(z|R=\epsilon_R, T=0, x)
+P(\epsilon_Y)  dz d\epsilon_Y\\
+ &=&    \int   P(Y=1|T=1,x,z)   P(z|R=r, T=0, x) dz \\
 \end{eqnarray*}
 
 Taking into account that we need to learn parameters from the data we integrate this expression over the posterior of the parameters. Note that since $Z$ is unobserved, it is not straightforwardly clear that we can estimate parameters associated to it. However, since $Z$ is not observed we can assume it has zero mean and unit variance. Furthermore we can assume positivity of parameters, since $Z$ increases risk of failure and induces $T=0$ decisions.
 
+\subsection{Model definition}
+
+To make inference we obviously have to learn the parametric model from the data instead of fixed functions of the previous section. We can define the model as a probabilistic due to the simplification of the counterfactual expression in the previous section.
+
+We assume feature vectors $\obsFeaturesValue$ and $\unobservableValue$ representing risk can be consensed to unidimension risk values, for example by ... . Furthermore we assume their distribution as Gaussian. Since $Z$ is unobserved we can assume its variance to be 1.
+\begin{eqnarray*}
+\unobservable &\sim& N(0,1), \quad \obsFeatures \sim N(0,\sigma_\obsFeatures^2)  
+\end{eqnarray*}
+\acomment{Where is the variance of X???}
+
+%
+Note that index $j$ refers to decision maker $\human_j$ and \invlogit is the standard logistic function.
+
+\noindent
+\hrulefill
+\begin{align}
+\decision  \sim  \nonumber \\
+\prob{\decision = 0~|~\leniency_j = \leniencyValue, \obsFeatures = \obsFeaturesValue, \unobservable = \unobservableValue} & = \invlogit(\alpha_j + \gamma_\obsFeaturesValue\obsFeaturesValue + \gamma_\unobservableValue \unobservableValue + \epsilon_\decisionValue),  \label{eq:judgemodel} \\
+	\text{where}~ \alpha_{j} & \approx \logit(\leniencyValue_j) \label{eq:leniencymodel}\\
+\prob{\outcome=0~|~\decision, \obsFeatures=\obsFeaturesValue, \unobservable=\unobservableValue} & =
+	\begin{cases}
+		0,~\text{if}~\decision = 0\\
+		\invlogit(\alpha_\outcomeValue + \beta_\obsFeaturesValue \obsFeaturesValue + \beta_\unobservableValue \unobservableValue + \epsilon_\outcomeValue),~\text{o/w} \label{eq:defendantmodel}
+	\end{cases}
+\end{align}
+\hrulefill
+
+\acomment{Unable to complete this!}
+This gives us parameters:
+$\parameters = \{ \alpha_\outcomeValue, \alpha_j, \beta_\obsFeaturesValue, \beta_\unobservableValue, \gamma_\obsFeaturesValue, \gamma_\unobservableValue\}$. \acomment{Where are the variance parameters?} Our estimate is simply integrating over the posterior of these variables.
+
+\begin{eqnarray*}
+E(Y)
+ &=&    \int   P(Y=1|T=1,x,z)   P(z|R=r, T=0, x) P(\theta|D) dz d\theta \\
+\end{eqnarray*}
+
+We use prior distributions given in Appendix for all parameters to ensure their identifiability.
+
+\subsection{Implementation}
+
+Stan allows us to directly sample from the posterior both of the parameters and the unobservable features.
+
 
 \subsection{Our approach}
 
-- 
GitLab