From 034bdce06a306fa70f010a68a327cf79c2533b35 Mon Sep 17 00:00:00 2001
From: Riku-Laine <28960190+Riku-Laine@users.noreply.github.com>
Date: Tue, 2 Jul 2019 11:09:31 +0300
Subject: [PATCH] Explained and simplified algorithms

---
 analysis_and_scripts/notes.tex | 140 +++++++++++++++++++--------------
 1 file changed, 81 insertions(+), 59 deletions(-)

diff --git a/analysis_and_scripts/notes.tex b/analysis_and_scripts/notes.tex
index f9c105d..449aeb5 100644
--- a/analysis_and_scripts/notes.tex
+++ b/analysis_and_scripts/notes.tex
@@ -358,8 +358,8 @@ where $\epsilon \sim N(0, 0.1)$. Next, the data is sorted for each judge by the
 \STATE Calculate $P(Y=0|X, Z, W)$ for each observation.
 \STATE Set Y to 0 if $P(Y = 0| X, Z, W) \geq 0.5$ and to 1 otherwise.
 \STATE Calculate $P(T=0|X, Z)$ for each observation and attach to data.
-\STATE Sort the data by (1) the judges' and (2) by probabilities $P(T=0|X, Z)$ in descending order. 
-\STATE \hskip3.0em $\rhd$ Now the most dangerous subjects for each of the judges are at the top.
+\STATE Sort the data by (1) the judges and (2) by probabilities $P(T=0|X, Z)$ in descending order. 
+\STATE \hskip3.0em $\rhd$ Now the most dangerous subjects for each judge are at the top.
 \STATE If subject belongs to the top $(1-r) \cdot 100 \%$ of observations assigned to that judge, set $T=0$ else set $T=1$.
 \STATE Halve the data to training and test sets at random.
 \STATE For both halves, set $Y=$ NA if decision is negative ($T=0$).
@@ -691,51 +691,51 @@ Monte Carlo	 	& 0.001292	& 0.016629	& 0.009429 & 0.0179825\\
 
 Different types of modules (data generation, decider and evaluator) are presented in this section. Summary table is presented last. See section \ref{sec:modular_framework} for a more thorough break-down on the properties of each module.
 
-\begin{algorithm}[] 			% enter the algorithm environment
+\subsection{Data generation modules} 
+
+We have three different kinds of data generating modules (DG modules). The differences of the DG modules are due to two factors: whether there are unobservables and whether the outcome will be drawn from Bernoulli distribution. The only algorithm generating data without unobservables is algorithm \ref{alg:dg:coinflip_without_z}, algorithms \ref{alg:dg:threshold_with_Z} and \ref{alg:dg:coinflip_with_z} generate data with unobservables. The outcome is drawn from a Bernoulli distribution in algorithms \ref{alg:dg:coinflip_without_z} and \ref{alg:dg:coinflip_with_z} and in algorithm \ref{alg:dg:threshold_with_Z} the outcome is set when a value exceeds a certain threshold.
+
+\begin{algorithm}[h] 			% enter the algorithm environment
 \caption{Data generation module: outcome from Bernoulli without unobservables} 		% give the algorithm a caption
 \label{alg:dg:coinflip_without_z} 			% and a label for \ref{} commands later in the document
 \begin{algorithmic}[1] 		% enter the algorithmic environment
 \REQUIRE Parameters: Total number of subjects $N_{total}$
 \ENSURE
-\FORALL{$i$ in $1, \ldots, N_{total}$}
-	\STATE Draw $x_i$ from from a standard Gaussian.
-	\STATE Draw $y_i$ from Bernoulli$(1-\sigma(x_i))$.
-	\STATE Attach to data.
+\FORALL{observations}
+	\STATE Draw $x$ from from a standard Gaussian.
+	\STATE Draw $y$ from Bernoulli$(1-\sigma(x))$.
 \ENDFOR 
 \RETURN data
 \end{algorithmic}
 \end{algorithm}
 
-
-\begin{algorithm}[] 			% enter the algorithm environment
+\begin{algorithm}[h] 			% enter the algorithm environment
 \caption{Data generation module: outcome by threshold with unobservables} 		% give the algorithm a caption
 \label{alg:dg:threshold_with_Z} 			% and a label for \ref{} commands later in the document
 \begin{algorithmic}[1] 		% enter the algorithmic environment
 \REQUIRE Parameters: Total number of subjects $N_{total},~\beta_X=1,~\beta_Z=1$ and $\beta_W=0.2$.
 \ENSURE
-\FORALL{$i$ in $1, \ldots, N_{total}$}
-	\STATE Draw $x_i, z_i$ and $w_i$ from from standard Gaussians independently.
-	\IF{$\sigma(\beta_Xx_i+\beta_Zz_i+\beta_Ww_i) \geq 0.5$}
-		\STATE {Set $y_i$ to 0.}
+\FORALL{observations}
+	\STATE Draw $x, z$ and $w$ from from standard Gaussians independently.
+	\IF{$\sigma(\beta_Xx+\beta_Zz+\beta_Ww) \geq 0.5$}
+		\STATE {Set $y$ to 0.}
 	\ELSE
-		\STATE {Set $y_i$ to 1.}
+		\STATE {Set $y$ to 1.}
 	\ENDIF
-	\STATE Attach to data.
 \ENDFOR 
 \RETURN data
 \end{algorithmic}
 \end{algorithm}
 
-\begin{algorithm}[] 			% enter the algorithm environment
+\begin{algorithm}[h] 			% enter the algorithm environment
 \caption{Data generation module: outcome from Bernoulli with unobservables} 		% give the algorithm a caption
 \label{alg:dg:coinflip_with_z} 			% and a label for \ref{} commands later in the document
 \begin{algorithmic}[1] 		% enter the algorithmic environment
 \REQUIRE Parameters: Total number of subjects $N_{total},~\beta_X=1,~\beta_Z=1$ and $\beta_W=0.2$.
 \ENSURE
-\FORALL{$i$ in $1, \ldots, N_{total}$}
-	\STATE Draw $x_i, z_i$ and $w_i$ from from standard Gaussians independently.
-	\STATE Draw $y_i$ from Bernoulli$(1-\sigma(\beta_XX+\beta_ZZ+\beta_WW))$.
-	\STATE Attach to data.
+\FORALL{observations}
+	\STATE Draw $x, z$ and $w$ from from standard Gaussians independently.
+	\STATE Draw $y$ from Bernoulli$(1-\sigma(\beta_Xx+\beta_Zz+\beta_Ww))$.
 \ENDFOR 
 \RETURN data
 \end{algorithmic}
@@ -743,63 +743,72 @@ Different types of modules (data generation, decider and evaluator) are presente
 
 %For decider modules, input as terms of knowledge and parameters should be as explicitly specified as possible.
 
-\begin{algorithm}[] 			% enter the algorithm environment
-\caption{Decider module: human judge as specified by Lakkaraju et al. \cite{lakkaraju17}} 		% give the algorithm a caption
+\subsection{Decider modules} 
+
+We have three different kinds of decider modules. Their distinctive feature is the decisions' independence, for example in algorithm \ref{alg:decider:human} the decisions of a decision-maker are dependent on the other subjects assigned to that decision-maker.
+
+Below is presented the human decision-maker \cite{lakkaraju17}. The human decision-maker (1) takes all the subjects as a batch, (2) makes an approximation of the subjects' probabilities for a negative outcome and (3) assigns the decisions by giving $r\cdot 100\%$ of the least likely to fail a positive decision. The resulting decisions are not independent as they depend on the presence of other observations.
+
+\begin{algorithm}[H] 			% enter the algorithm environment
+\caption{Decider module: human decision-maker by Lakkaraju et al. \cite{lakkaraju17}} 		% give the algorithm a caption
 \label{alg:decider:human} 			% and a label for \ref{} commands later in the document
 \begin{algorithmic}[1] 		% enter the algorithmic environment
-\REQUIRE Data with features $X, Z$ of size $N_{total}$, knowledge that both of them affect the outcome Y and that they are independent / Parameters: $M=100, \beta_X=1, \beta_Z=1$.
+\REQUIRE Data with features $X, Z$, knowledge that both of them affect the outcome Y and that they are independent / Parameters: $M=100, \beta_X=1, \beta_Z=1$.
 \ENSURE
 \STATE Sample acceptance rates for each M judges from Uniform$(0.1; 0.9)$ and round to tenth decimal place.
 \STATE Assign each observation to a judge at random.
 \STATE Calculate $\pr(T=0|X, Z) = \sigma(\beta_XX+\beta_ZZ) + \epsilon$ for each observation and attach to data.
 \STATE Sort the data by (1) the judges and (2) by the probabilities in descending order. 
-\STATE \hskip3.0em $\rhd$ Now the most dangerous subjects for each of the judges are at the top.
 \STATE If subject belongs to the top $(1-r) \cdot 100 \%$ of observations assigned to that judge, set $T=0$ else set $T=1$.
-\STATE Set $Y=$ NA if decision is negative ($T=0$). \emph{Might not be performed.}
+\STATE Set $Y=$ NA if decision is negative ($T=0$). \emph{Optional.}
 \RETURN data with decisions.
 \end{algorithmic}
 \end{algorithm}
 
-\begin{algorithm}[] 			% enter the algorithm environment
-\caption{Decider module: decisions from Bernoulli (pseudo-leniencies set at 0.5)} 		% give the algorithm a caption
+One discussed way of making the decisions independent was to "flip a coin at some probability". An implementation of that idea is presented below in algorithm \ref{alg:decider:coinflip}. As $\pr(T=0|X, Z) = \sigma(\beta_XX+\beta_ZZ)$ the parameter for the Bernoulli distribution is set to $1-\sigma(\beta_XX+\beta_ZZ)$. In the practical implementation, as some algorithms need to know the leniency of the decision, acceptance rate is then calculated then from the decisions.
+
+\begin{algorithm}[H] 			% enter the algorithm environment
+\caption{Decider module: decisions from Bernoulli} 		% give the algorithm a caption
 \label{alg:decider:coinflip} 			% and a label for \ref{} commands later in the document
 \begin{algorithmic}[1] 		% enter the algorithmic environment
-\REQUIRE Data with features $X, Z$ of size $N_{total}$, knowledge that both of them affect the outcome Y and that they are independent / Parameters: $\beta_X=1, \beta_Z=1$.
+\REQUIRE Data with features $X, Z$, knowledge that both of them affect the outcome Y and that they are independent / Parameters: $\beta_X=1, \beta_Z=1$.
 \ENSURE
-\FORALL{$i$ in $1, \ldots, N_{total}$}
-	\STATE Draw $t_i$ from Bernoulli$(\sigma(\beta_Xx_i+\beta_Zz_i))$.
-	\STATE Attach to data.
-\ENDFOR 
-\STATE Set $Y=$ NA if decision is negative ($T=0$). \emph{Might not be performed.}
+\STATE Draw $t$ from Bernoulli$(1-\sigma(\beta_Xx+\beta_Zz))$ for all observations.
+\STATE Compute the acceptance rate.
+\STATE Set $Y=$ NA if decision is negative ($T=0$). \emph{Optional.}
 \RETURN data with decisions.
 \end{algorithmic}
 \end{algorithm}
 
-\begin{algorithm}[] 			% enter the algorithm environment
+A quantile-based decider module is presented in algorithm \ref{alg:decider:quantile}. The algorithm tries to emulate the human decision-maker as in algorithm \ref{alg:decider:human} while giving out independent decisions. To achieve this, we first "train" the decision-maker by showing it a large number of subjects so that they can assess how high would a new subject rank in their probability for a negative outcome, whether they would be in the top 10\% or in the bottom 25\%. Then new decisions can be made using this rule with a guarantee that the fraction of positive decisions will converge to $r$.
+
+In practice, the pdf and subsequently the inverse cdf $F^{-1}$ is constructed by first sampling $10^7$ (i.e. many) observations from $\beta_XX+\beta_ZZ$ (where $X, Z \sim N(0, 1)$) and applying the inverse of logit function $\sigma(x)$. Now the decision-maker has a reference distribution against which to compare any new subjects. Whenever presented with a new subject, the decision-maker uses the reference distribution and makes a judgement based on the $r^{th}$ quantile. 
+
+For example, a decision-maker with leniency 0.60 gets a new subject $\{x, z\}$ with a predicted probability $\sigma(\beta_Xx+\beta_Zz)\approx 0.7$ for a negative outcome with some coefficients $\beta$. Now, as the judge has leniency 0.6 their cut-point $F_{-1}(0.60)\approx0.65$. That is, the judge will not give a positive decision to anyone with failure probability greater than 0.65 so our example subject will receive a negative decision. Due to simulating a large number of instances for training the judge, we can say that in the long run the judge will give positive decisions to 60\% of subjects presented to them.
+
+\begin{algorithm}[H] 			% enter the algorithm environment
 \caption{Decider module: "quantile decisions"} 		% give the algorithm a caption
 \label{alg:decider:quantile} 			% and a label for \ref{} commands later in the document
 \begin{algorithmic}[1] 		% enter the algorithmic environment
-\REQUIRE Data with features $X, Z$ of size $N_{total}$, knowledge that both of them affect the outcome Y and that they are independent / Parameters: $\beta_X=1, \beta_Z=1$.
+\REQUIRE Data with features $X, Z$, knowledge that both of them affect the outcome Y and that they are independent / Parameters: $\beta_X=1, \beta_Z=1$.
 \ENSURE
 \STATE Sample acceptance rates for each M judges from Uniform$(0.1; 0.9)$ and round to tenth decimal place.
 \STATE Assign each observation to a judge at random.
-\STATE Calculate $\pr(T=0|X, Z) = \sigma(\beta_XX+\beta_ZZ)$ for each observation and attach to data.
-\FORALL{$i$ in $1, \ldots, N_{total}$}
-	\IF{$\sigma(\beta_Xx_i+\beta_Zz_i) \geq F^{-1}_{\pr(T=0|X, Z)}(r)$ \footnotemark} % Footnote text below algorithm
-		\STATE{Set $t_i=0$.} 
-	\ELSE 
-		\STATE{Set $t_i=1$.} 
-	\ENDIF
-	\STATE Attach to data.
-\ENDFOR 
-\STATE Set $Y=$ NA if decision is negative ($T=0$). \emph{Might not be performed.}
+\STATE Construct the quantile function $F^{-1}(q)$.
+\STATE Calculate $\pr(T=0|X, Z) = \sigma(\beta_XX+\beta_ZZ)$ for all observations.
+\STATE If $\sigma(\beta_Xx+\beta_Zz) \geq F^{-1}(r)$ set $t=0$, otherwise set $t=1$.
+\STATE Set $Y=$ NA if decision is negative ($T=0$). \emph{Optional.}
 \RETURN data with decisions.
 \end{algorithmic}
 \end{algorithm}
 
-\footnotetext{The inverse cumulative distribution function (or quantile function) $F^{-1}$ was constructed by first sampling $10^7$ observations from $N(0, 2)$ (sum of two Gaussians) and applying the inverse of logit function $\sigma(x)$. The value of $F^{-1}(r)$ was computed utilizing the previously computed array and numpy's \texttt{quantile} function.}
+\subsection{Evaluator modules}
 
-\begin{algorithm}[] 			% enter the algorithm environment
+Evaluator modules take some version of data as input and output an estimate of the failure given the input.
+
+Motivation for the contraction algorithm is presented in Lakkaraju's paper \cite{lakkaraju17}. The algorithm below is a slight modification of it to incorporate model $\B$. The original algorithm has been copied to algorithm \ref{alg:contraction}.
+
+\begin{algorithm}[H] 			% enter the algorithm environment
 \caption{Evaluator module: Contraction algorithm \cite{lakkaraju17}} 		% give the algorithm a caption
 \label{alg:eval:contraction} 			% and a label for \ref{} commands later in the document
 \begin{algorithmic}[1] 		% enter the algorithmic environment
@@ -822,7 +831,11 @@ Different types of modules (data generation, decider and evaluator) are presente
 \end{algorithmic}
 \end{algorithm}
 
-\begin{algorithm}[] 			% enter the algorithm environment
+%te eroaa muista algoritmeista / eriyispiirteenä että se saa käyttöönsä myös piilotetut outcomet.
+
+True evaluation module computes the "true failure rate" of a predictive model had it been deployed to make independent decisions. The module first assigns each observation with a predicted 
+
+\begin{algorithm}[H] 			% enter the algorithm environment
 \caption{Evaluator module: True evaluation} 		% give the algorithm a caption
 \label{alg:eval:true_eval} 			% and a label for \ref{} commands later in the document
 \begin{algorithmic}[1] 		% enter the algorithmic environment
@@ -838,7 +851,9 @@ Different types of modules (data generation, decider and evaluator) are presente
 \end{algorithmic}
 \end{algorithm}
 
-\begin{algorithm}[] 			% enter the algorithm environment
+%lo kuten te, mutta määrätyt Y:t on piilotettu
+
+\begin{algorithm}[H] 			% enter the algorithm environment
 \caption{Evaluator module: Labeled outcomes} 		% give the algorithm a caption
 \label{alg:eval:labeled_outcomes} 			% and a label for \ref{} commands later in the document
 \begin{algorithmic}[1] 		% enter the algorithmic environment
@@ -855,7 +870,7 @@ Different types of modules (data generation, decider and evaluator) are presente
 \end{algorithmic}
 \end{algorithm}
 
-\begin{algorithm}[] 			% enter the algorithm environment
+\begin{algorithm}[H] 			% enter the algorithm environment
 \caption{Evaluator module: Human evaluation} 		% give the algorithm a caption
 \label{alg:eval:human_eval} 			% and a label for \ref{} commands later in the document
 \begin{algorithmic}[1] 		% enter the algorithmic environment
@@ -869,7 +884,10 @@ Different types of modules (data generation, decider and evaluator) are presente
 \end{algorithmic}
 \end{algorithm}
 
-\begin{algorithm}[] 			% enter the algorithm environment
+
+%kausaali evaluaattori kuten MM ensimmäisenä sen esitti
+
+\begin{algorithm}[H] 			% enter the algorithm environment
 \caption{Evaluator module: Causal evaluator (?)} 		% give the algorithm a caption
 \label{alg:eval:causal_eval} 			% and a label for \ref{} commands later in the document
 \begin{algorithmic}[1] 		% enter the algorithmic environment
@@ -886,7 +904,9 @@ Different types of modules (data generation, decider and evaluator) are presente
 \end{algorithmic}
 \end{algorithm}
 
-\begin{algorithm}[] 			% enter the algorithm environment
+%alla oleva montecarlo perusajatus ennustaa Z ja sen perusteella imputoida Y. selitä kaikki ja yksinkertaista
+
+\begin{algorithm}[H] 			% enter the algorithm environment
 \caption{Evaluator module: Monte Carlo evaluator, imputation} 		% give the algorithm a caption
 \label{alg:eval:mc} 			% and a label for \ref{} commands later in the document
 \begin{algorithmic}[1] 		% enter the algorithmic environment
@@ -901,8 +921,8 @@ Different types of modules (data generation, decider and evaluator) are presente
 \STATE Compute the values of the inverse cdf of the observations in \texttt{quants} for the acceptance rates r of each judge and assign to $Q_r$.
 \FORALL{$i$ in $1, \ldots, N_{test}$}
 	\IF{$t_i = 0$}
-		\STATE{Take all $Z + \epsilon > logit(Q_{r,i})-x_i$ , \footnotemark~where $\epsilon \sim N(0, 0.1)$.} 
-	\ELSE 
+		\STATE{Take all $Z + \epsilon > logit(Q_{r,i})-x_i$ , where $\epsilon \sim N(0, 0.1)$.} 
+	\ELSE
 		\STATE{Take all $Z + \epsilon < logit(Q_{r,i})-x_i$ , where $\epsilon \sim N(0, 0.1)$.} 
 	\ENDIF
 	\STATE Compute $\bar{z}=\frac{1}{n}\sum z$
@@ -916,9 +936,11 @@ Different types of modules (data generation, decider and evaluator) are presente
 \end{algorithmic}
 \end{algorithm}
 
-\footnotetext{$logit^{-1}(x+z)>a \Leftrightarrow x+z > logit(a) \Leftrightarrow z > logit(a)-x$}
+\subsection{Summary table}
 
-\begin{table}[h!]
+Summary table of different modules.
+
+\begin{table}[H]
   \centering
   \caption{Summary of modules (under construction)}
   \begin{tabular}{lll}
@@ -927,8 +949,8 @@ Different types of modules (data generation, decider and evaluator) are presente
     \textbf{Data generator} & \textbf{Decider} & \textbf{Evaluator}  \\
     \midrule
     {\ul Without unobservables}		& Independent decisions		& {\ul Labeled outcomes} \\
-     					 		& 1. flip a coin by 			& \tabitem Data $\D$ with properties $\{x_i, t_i, y_i\}$ \\
-    {\ul With unobservables}       		& $P(T=0|X, Z)$			& \tabitem acceptance rate r \\
+     					 		& 1. draw T from a Bernoulli	& \tabitem Data $\D$ with properties $\{x_i, t_i, y_i\}$ \\
+    {\ul With unobservables}       		& with $P(T=0|X, Z)$			& \tabitem acceptance rate r \\
     \tabitem $P(Y=0|X, Z, W)$ 		& 2. determine with $F^{-1}(r)$	& \tabitem knowledge that X affects Y \\[.5\normalbaselineskip]
     
      {\ul With unobservables}	& Non-independent decisions  	& {\ul True evaluation} \\
@@ -961,7 +983,7 @@ Different types of modules (data generation, decider and evaluator) are presente
   \label{tab:modules}
 \end{table}
 
-\begin{thebibliography}{9}
+\begin{thebibliography}{9} % Might have been apa
 
 \bibitem{dearteaga18}
    De-Arteaga, Maria. Learning Under Selective Labels in the Presence of Expert Consistency. 2018. 
-- 
GitLab