From 3d4b6ef19f52a06f5e74e84d4fd5928a5afebe06 Mon Sep 17 00:00:00 2001 From: Antti Hyttinen <ajhyttin@gmail.com> Date: Tue, 17 Dec 2019 17:01:04 +0200 Subject: [PATCH] Something. --- paper/experiments.tex | 4 ++-- paper/imputation.tex | 51 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/paper/experiments.tex b/paper/experiments.tex index d48b72d..17dd4eb 100644 --- a/paper/experiments.tex +++ b/paper/experiments.tex @@ -177,7 +177,7 @@ This model was used as decision-maker \machine and these same features were used \begin{figure} %\centering \includegraphics[width=\linewidth]{./img/sl_absolute_errors} -\caption{Results using different decision-makers and settings. } +\caption{Results using different decision-makers and settings. Here $\max(\leniencyValue)=0.9$.} \label{fig:results_compas} \end{figure} @@ -204,7 +204,7 @@ Decision-maker \human random & 0.01522 & 0.00137 \\ Decision-maker \machine random & 0.03005 & 0.00327 \\ Lakkaraju's decision-maker \human \cite{lakkaraju2017selective} & 0.01187 & 0.00288 \\ \bottomrule \end{tabular} -\caption{Comparison of mean absolute error w.r.t true evaluation between contraction and the counterfactual-based method we have presented. The table shows that our method can perform welll despite violations of the assumptions (eg. having decision-maker \human giving random and non-informative decisions). } +\caption{Comparison of mean absolute error w.r.t true evaluation between contraction and the counterfactual-based method we have presented. The table shows that our method can perform welll despite violations of the assumptions (eg. having decision-maker \human giving random and non-informative decisions). Here $\max(\leniencyValue)=0.9$.} \label{tab:} \end{table} diff --git a/paper/imputation.tex b/paper/imputation.tex index 1a25dd7..f81ef13 100644 --- a/paper/imputation.tex +++ b/paper/imputation.tex @@ -96,13 +96,58 @@ In the abduction step we update the distribution of the disturbance terms $P(\ep Action step involves intervening on $T$ and setting $T=1$. Finally in the prediction step we estimate $Y$ by taking account the observations: \begin{eqnarray*} -E(Y)&=& \int f(T=1,X=x,Z=\epsilon_z,\epsilon_Y) \\ -&& P(Z=\epsilon_Z|R=\epsilon_R, T=0, X=x) - P(\epsilon_Y) d\epsilon_Z d\epsilon_Y +E(Y)%&=& %\int f(T=1,x,Z=\epsilon_z,\epsilon_Y) \\ +%&& P(Z=\epsilon_Z|R=\epsilon_R, T=0, x) +% P(\epsilon_Y) d\epsilon_Z d\epsilon_Y \\ + &=& \int f(T=1,x,z,\epsilon_Y) P(z|R=\epsilon_R, T=0, x) +P(\epsilon_Y) dz d\epsilon_Y\\ + &=& \int P(Y=1|T=1,x,z) P(z|R=r, T=0, x) dz \\ \end{eqnarray*} Taking into account that we need to learn parameters from the data we integrate this expression over the posterior of the parameters. Note that since $Z$ is unobserved, it is not straightforwardly clear that we can estimate parameters associated to it. However, since $Z$ is not observed we can assume it has zero mean and unit variance. Furthermore we can assume positivity of parameters, since $Z$ increases risk of failure and induces $T=0$ decisions. +\subsection{Model definition} + +To make inference we obviously have to learn the parametric model from the data instead of fixed functions of the previous section. We can define the model as a probabilistic due to the simplification of the counterfactual expression in the previous section. + +We assume feature vectors $\obsFeaturesValue$ and $\unobservableValue$ representing risk can be consensed to unidimension risk values, for example by ... . Furthermore we assume their distribution as Gaussian. Since $Z$ is unobserved we can assume its variance to be 1. +\begin{eqnarray*} +\unobservable &\sim& N(0,1), \quad \obsFeatures \sim N(0,\sigma_\obsFeatures^2) +\end{eqnarray*} +\acomment{Where is the variance of X???} + +% +Note that index $j$ refers to decision maker $\human_j$ and \invlogit is the standard logistic function. + +\noindent +\hrulefill +\begin{align} +\decision \sim \nonumber \\ +\prob{\decision = 0~|~\leniency_j = \leniencyValue, \obsFeatures = \obsFeaturesValue, \unobservable = \unobservableValue} & = \invlogit(\alpha_j + \gamma_\obsFeaturesValue\obsFeaturesValue + \gamma_\unobservableValue \unobservableValue + \epsilon_\decisionValue), \label{eq:judgemodel} \\ + \text{where}~ \alpha_{j} & \approx \logit(\leniencyValue_j) \label{eq:leniencymodel}\\ +\prob{\outcome=0~|~\decision, \obsFeatures=\obsFeaturesValue, \unobservable=\unobservableValue} & = + \begin{cases} + 0,~\text{if}~\decision = 0\\ + \invlogit(\alpha_\outcomeValue + \beta_\obsFeaturesValue \obsFeaturesValue + \beta_\unobservableValue \unobservableValue + \epsilon_\outcomeValue),~\text{o/w} \label{eq:defendantmodel} + \end{cases} +\end{align} +\hrulefill + +\acomment{Unable to complete this!} +This gives us parameters: +$\parameters = \{ \alpha_\outcomeValue, \alpha_j, \beta_\obsFeaturesValue, \beta_\unobservableValue, \gamma_\obsFeaturesValue, \gamma_\unobservableValue\}$. \acomment{Where are the variance parameters?} Our estimate is simply integrating over the posterior of these variables. + +\begin{eqnarray*} +E(Y) + &=& \int P(Y=1|T=1,x,z) P(z|R=r, T=0, x) P(\theta|D) dz d\theta \\ +\end{eqnarray*} + +We use prior distributions given in Appendix for all parameters to ensure their identifiability. + +\subsection{Implementation} + +Stan allows us to directly sample from the posterior both of the parameters and the unobservable features. + \subsection{Our approach} -- GitLab