From 2a5365e096293529be2acbcdea25668dc5b29984 Mon Sep 17 00:00:00 2001
From: Michael Mathioudakis <michael.mathioudakis@helsinki.fi>
Date: Tue, 7 May 2019 14:55:01 +0300
Subject: [PATCH] Use cumulative distrivution

---
 paper/macros.tex |  2 +-
 paper/sl.tex     | 45 ++++++++++++++++++++++++---------------------
 2 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/paper/macros.tex b/paper/macros.tex
index 335d7b8..217f428 100755
--- a/paper/macros.tex
+++ b/paper/macros.tex
@@ -4,7 +4,7 @@
 % \newtheorem{problem}{Problem}
 % \newtheorem{lemma}{Lemma}
 
-\newcommand{\prob}[1]{\ensuremath{\mathbf{Pr}(#1)}}
+\newcommand{\prob}[1]{\ensuremath{\mathbf{P}(#1)}}
 
 % \newcommand{\path}[2]{\ensuremath{\mathit{path}({#1}, {#2})}}
 \newcommand{\subtree}[2]{\ensuremath{\mathit{ST}_{#1}({#2})}}
diff --git a/paper/sl.tex b/paper/sl.tex
index 794e9ae..faad3cc 100755
--- a/paper/sl.tex
+++ b/paper/sl.tex
@@ -98,22 +98,25 @@ The decision is based on the following variables. First, the features \features
 Secondly, the leniency of the judge, expressed as a variable \leniency.
 Specifically, we assume that every judge evaluates a given candidate according to the probability 
 \[
-\prob{\outcome = 0 | \features = \featuresValue, \doop{\decision = 1}} 
+\prob{\outcome = 1 | \features = \featuresValue, \doop{\decision = 1}} 
 \]
-that the candidate will violate bail conditions (\outcome = 0) if they were granted bail.
-We write \outcome = 1 to refer to the case when the defendant does not violate bail, whether bail is granted or not.
+that the candidate will violate bail conditions (\outcome = 1) if they were granted bail.
+We write \outcome = 0 to refer to the case when the defendant does not violate bail, whether bail is granted or not.
 The \doop{condition} expression signifies that, in evaluating the probability, we consider the event where the condition  (here, it is the condition $\decision = 1$) is imposed to the data-generation process (and therefore alters the generative model).
 In addition, we assume that every judge would assign the same value to the above probability, given by a function \score{\featuresValue}.
 \[
-\score{\featuresValue} = \prob{\outcome = 0 | \features = \featuresValue, \doop{\decision = 1}}
+\score{\featuresValue} = \prob{\outcome = 1 | \features = \featuresValue, \doop{\decision = 1}}
 \]
 The assumption that, essentially, all judges have the same model for the probability that a defendant would violate bail is not far-fetched for the purposes of our analysis, particularly taking into account that \score{\featuresValue} can be learned from the observed data
 \[
-\prob{\outcome = 0 | \features = \featuresValue, \doop{\decision = 1}} = \prob{\outcome = 0 | \features = \featuresValue, \decision = 1}
+\prob{\outcome = 1 | \features = \featuresValue, \doop{\decision = 1}} = \prob{\outcome = 1 | \features = \featuresValue, \decision = 1}
 \]
 and that data are publicly accessible, allowing us to assume that all judges have access to the same information.
 Where judges {\it do differ} is at the level of their leniency \leniency.
-Following the above assumptions, a judge with leniency \leniency = \leniencyValue grants bail to the defendants for which $\score{\featuresValue} < r$.
+Following the above assumptions, a judge with leniency \leniency = \leniencyValue grants bail to the defendants for which $F(\featuresValue) < r$, where $F$ is the cumulative distribution.
+\begin{equation}
+	F(\featuresValue_0) = \int { \indicator{\prob{\outcome = 1| \decision = 1, \features = \featuresValue} > \prob{\outcome = 1| \decision = 1, \features = \featuresValue_0}}	d\prob{\featuresValue}	}
+\end{equation}
 
 The bail-or-jail scenario is just one example of settings that involve a decision $\decision \in\{0,1\}$ that is based on individual features \features and leniency (acceptance rate) \leniency -- and where a behavior of interest \outcome is observed only for the cases where \decision = 1.
 The diagram of the causal model is shown in Figure~\ref{fig:causalmodel}.
@@ -121,7 +124,7 @@ Our results are applicable to other scenarios with same causal model.
 
 \begin{figure}
 \begin{center}
-\includegraphics[width=\columnwidth]{./img/causalmodel.png}
+\includegraphics[width=\columnwidth]{img/causalmodel.png}
 \end{center}
 \caption{Causal model.}
 \label{fig:causalmodel}
@@ -135,33 +138,33 @@ Performance is measured {\it for a given leniency level} as the rate at which ba
 In other words, performance is measured as the probability that a decision lead to undesired outcome.
 \section{Analysis}
 
-We wish to calculate the probability of undesired outcome (\outcome = 0) at a fixed leniency level.
+We wish to calculate the probability of undesired outcome (\outcome = 1) at a fixed leniency level.
 \begin{align*}
-& \prob{\outcome = 0 | \doop{\leniency = \leniencyValue}} = \nonumber \\
-& = \sum_\decisionValue \prob{\outcome = 0, \decision = \decisionValue | \doop{\leniency = \leniencyValue}} \nonumber \\
-& = \prob{\outcome = 0, \decision = 0 | \doop{\leniency = \leniencyValue}} + \prob{\outcome = 0, \decision = 1 | \doop{\leniency = \leniencyValue}} \nonumber \\
-& = 0 + \prob{\outcome = 0, \decision = 1 | \doop{\leniency = \leniencyValue}} \nonumber \\
-& = \prob{\outcome = 0, \decision = 1 | \doop{\leniency = \leniencyValue}} \nonumber \\
-& = \sum_\featuresValue \prob{\outcome = 0, \decision = 1, \features = \featuresValue | \doop{\leniency = \leniencyValue}} \nonumber \\
-& = \sum_\featuresValue \prob{\outcome = 0, \decision = 1 | \doop{\leniency = \leniencyValue}, \features = \featuresValue} \prob{\features = \featuresValue | \doop{\leniency = \leniencyValue}} \nonumber \\
-& = \sum_\featuresValue \prob{\outcome = 0, \decision = 1 | \doop{\leniency = \leniencyValue}, \features = \featuresValue} \prob{\features = \featuresValue} \nonumber \\
-& = \sum_\featuresValue \prob{\outcome = 0 | \decision = 1, \doop{\leniency = \leniencyValue}, \features = \featuresValue} \prob{\decision = 1 | \doop{\leniency = \leniencyValue}, \features = \featuresValue} \prob{\features = \featuresValue} \nonumber \\
-& = \sum_\featuresValue \prob{\outcome = 0 | \decision = 1, \features = \featuresValue} \prob{\decision = 1 | \leniency = \leniencyValue, \features = \featuresValue} \prob{\features = \featuresValue}
+& \prob{\outcome = 1 | \doop{\leniency = \leniencyValue}} = \nonumber \\
+& = \sum_\decisionValue \prob{\outcome = 1, \decision = \decisionValue | \doop{\leniency = \leniencyValue}} \nonumber \\
+& = \prob{\outcome = 1, \decision = 0 | \doop{\leniency = \leniencyValue}} + \prob{\outcome = 1, \decision = 1 | \doop{\leniency = \leniencyValue}} \nonumber \\
+& = 0 + \prob{\outcome = 1, \decision = 1 | \doop{\leniency = \leniencyValue}} \nonumber \\
+& = \prob{\outcome = 1, \decision = 1 | \doop{\leniency = \leniencyValue}} \nonumber \\
+& = \sum_\featuresValue \prob{\outcome = 1, \decision = 1, \features = \featuresValue | \doop{\leniency = \leniencyValue}} \nonumber \\
+& = \sum_\featuresValue \prob{\outcome = 1, \decision = 1 | \doop{\leniency = \leniencyValue}, \features = \featuresValue} \prob{\features = \featuresValue | \doop{\leniency = \leniencyValue}} \nonumber \\
+& = \sum_\featuresValue \prob{\outcome = 1, \decision = 1 | \doop{\leniency = \leniencyValue}, \features = \featuresValue} \prob{\features = \featuresValue} \nonumber \\
+& = \sum_\featuresValue \prob{\outcome = 1 | \decision = 1, \doop{\leniency = \leniencyValue}, \features = \featuresValue} \prob{\decision = 1 | \doop{\leniency = \leniencyValue}, \features = \featuresValue} \prob{\features = \featuresValue} \nonumber \\
+& = \sum_\featuresValue \prob{\outcome = 1 | \decision = 1, \features = \featuresValue} \prob{\decision = 1 | \leniency = \leniencyValue, \features = \featuresValue} \prob{\features = \featuresValue}
 \end{align*}
 
 Expanding the above derivation for model \score{\featuresValue} learned from the data
 \[
-\score{\featuresValue} = \prob{\outcome = 0 | \features = \featuresValue, \decision = 1},
+\score{\featuresValue} = \prob{\outcome = 1 | \features = \featuresValue, \decision = 1},
 \]
 the {\it generalized performance} \generalPerformance of that model is given by the following formula.
 \begin{equation}
-\generalPerformance = \sum_\featuresValue \score{\featuresValue} \indicator{\score{\featuresValue} < r} \prob{\features = \featuresValue}
+\generalPerformance = \sum_\featuresValue \score{\featuresValue} \indicator{F(\featuresValue) < r} \prob{\features = \featuresValue}
 \label{eqn:gp}	
 \end{equation}
 Equation~\ref{eqn:gp} can be calculated for a given model \datadistr{\featuresValue} = \prob{\features = \featuresValue} of individual features.
 Alternatively, we can have an empirical measure \empiricalPerformance of performance over the $\datasize$ data points in dataset \dataset, given by the following equation.
 \begin{equation}
-\empiricalPerformance = \frac{1}{\datasize} \sum_{(\featuresValue, \outcomeValue)\in\dataset}  \indicator{\outcomeValue = 0} \indicator{\score{\featuresValue} < r} 
+\empiricalPerformance = \frac{1}{\datasize} \sum_{(\featuresValue, \outcomeValue)\in\dataset}  \indicator{\outcomeValue = 1} \indicator{F(\featuresValue) < r} 
 \label{eqn:gp}	
 \end{equation}
 
-- 
GitLab