diff --git a/analysis_and_scripts/notes.tex b/analysis_and_scripts/notes.tex
index a066f45c8950c9a5ed92aa67aa900fbdf5468da8..cb4dbcdbeffe4e9a29a8853bddbdd9702c12b759 100644
--- a/analysis_and_scripts/notes.tex
+++ b/analysis_and_scripts/notes.tex
@@ -13,6 +13,7 @@
 \usepackage{algorithmic}% http://ctan.org/pkg/algorithms
 \renewcommand{\algorithmicrequire}{\textbf{Input:}}
 \renewcommand{\algorithmicensure}{\textbf{Procedure:}}
+\renewcommand{\algorithmicreturn}{\textbf{Return}}
 
 \renewcommand{\descriptionlabel}[1]{\hspace{\labelsep}\textnormal{#1}}
 
@@ -90,7 +91,7 @@ This document presents the implementations of RL in pseudocode level. First, I p
 
 Mnemonic rule for the binary coding: zero bad (crime or jail), one good!
 
-\section{RL's notes about the selective labels paper (optional reading)}
+\section{RL's notes about the selective labels paper (optional reading)} \label{sec:comments}
 
 \emph{This chapter is to present my comments and insight regarding the topic.}
 
@@ -141,9 +142,9 @@ In the setting with unobservables Z, we first sample an acceptance rate r for al
 \ENSURE
 \STATE Sample acceptance rates for each M judges from $U(0.1; 0.9)$ and round to tenth decimal place.
 \STATE Sample features X, Z and W for each $N_{total}$ observations from standard Gaussian independently.
-\STATE Calculate $P(Y=0|X, Z, W)$ for each observation
+\STATE Calculate $P(Y=0|X, Z, W)$ for each observation.
 \STATE Set Y to 0 if $P(Y = 0| X, Z, W) \geq 0.5$ and to 1 otherwise.
-\STATE Calculate $P(T=0|X, Z)$ for each observation
+\STATE Calculate $P(T=0|X, Z)$ for each observation and attach to data.
 \STATE Sort the data by (1) the judges' and (2) by probabilities $P(T=0|X, Z)$ in descending order. 
 \STATE \hskip3.0em $\rhd$ Now the most dangerous subjects for each of the judges are at the top.
 \STATE If subject belongs to the top $(1-r) \cdot 100 \%$ of observations assigned to that judge, set $T=0$ else set $T=1$.
@@ -163,13 +164,13 @@ NB: The sklearn's regression model can not be fitted if the data includes missin
 
 \section{Plotting}
 
-The following quantities are estimated from the data:
+The following quantities are computed from the data:
 
 \begin{itemize}
-\item True evaluation: The true failure rate of the model. Can only be calculated for synthetic data sets. See algorithm \ref{alg:true_eval}.
+\item True evaluation: The true failure rate of the model. Can only be calculated for synthetic data sets. See algorithm \ref{alg:true_eval} and discussion in section \ref{sec:comments}.
 \item Labeled outcomes: The "traditional"/vanilla estimate of model performance. See algorithm \ref{alg:labeled_outcomes}.
 \item Human evaluation: The failure rate of human decision-makers who have access to the latent variable Z. Decision-makers with similar values of leniency are binned and treated as one hypothetical decision-maker. See algorithm \ref{alg:human_eval}.
-\item Contraction: See algorithm 1 of \cite{lakkaraju17}
+\item Contraction: See algorithm \ref{alg:contraction} from \cite{lakkaraju17}.
 \item Causal model: In essence, the empirical performance is calculated over the test set as $$\dfrac{1}{n}\sum_{(x, y)\in D}f(x)\delta(F(x) < r)$$ where $$f(x) = P(Y=0|T=1, X=x)$$ is a logistic regression model (see \ref{sec:model_fitting}) trained on the labeled data predicting Y from X and $$ F(x_0) = \int_{x\in\mathcal{X}} P(x)\delta(f(x) < f(x_0)) ~ dx.$$ All observations, even ones with missing outcome labels, can be used since empirical performance doesn't depend on them. $P(x)$ is Gaussian pdf from scipy.stats package and it is integrated over interval [-15, 15] with 40000 steps using si.simps function from scipy.integrate which uses Simpson's rule in estimating the value of the integral. (docs: \url{https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.simps.html}) \label{causal_cdf}
 \end{itemize}
 
@@ -241,6 +242,30 @@ The plotted curves are constructed using pseudo code presented in algorithm \ref
 \end{algorithmic}
 \end{algorithm}
 
+\begin{algorithm}[] 			% enter the algorithm environment
+\caption{Contraction algorithm \cite{lakkaraju17}} 		% give the algorithm a caption
+\label{alg:contraction} 			% and a label for \ref{} commands later in the document
+\begin{algorithmic}[1] 		% enter the algorithmic environment
+\REQUIRE Labeled test data $\mathcal{D}$ with probabilities $\mathcal{S}$ and \emph{missing outcome labels} for observations with $T=0$, acceptance rate r
+\ENSURE
+\STATE Let $q$ be the decision-maker with highest acceptance rate in $\mathcal{D}$.
+\STATE $\mathcal{D}_q = \{(x, j, t, y) \in \mathcal{D}|j=q\}$
+\STATE \hskip3.0em $\rhd$ $\mathcal{D}_q$ is the set of all observations judged by $q$
+\STATE
+\STATE $\mathcal{R}_q = \{(x, j, t, y) \in \mathcal{D}_q|t=1\}$
+\STATE \hskip3.0em $\rhd$ $\mathcal{R}_q$ is the set of observations in $\mathcal{D}_q$ with observed outcome labels
+\STATE
+\STATE Sort observations in $\mathcal{R}_q$ in descending order of confidence scores $\mathcal{S}$ and assign to $\mathcal{R}_q^{sort}$.
+\STATE \hskip3.0em $\rhd$ Observations deemed as high risk by the black-box model $\mathcal{B}$ are at the top of this list
+\STATE
+\STATE Remove the top $[(1.0-r)|\mathcal{D}_q |]-[|\mathcal{D}_q |-|\mathcal{R}_q |]$ observations of $\mathcal{R}_q^{sort}$ and call this list $\mathcal{R_B}$
+\STATE \hskip3.0em $\rhd$ $\mathcal{R_B}$ is the list of observations assigned to $t = 1$ by $\mathcal{B}$
+\STATE
+\STATE Compute $\mathbf{u}=\sum_{i=1}^{|\mathcal{R_B}|} \dfrac{\delta\{y_i=0\}}{| \mathcal{D}_q |}$.
+\RETURN $\mathbf{u}$
+\end{algorithmic}
+\end{algorithm}
+
 \begin{algorithm}[] 			% enter the algorithm environment
 \caption{Causal model, empirical performance (ep)} 		% give the algorithm a caption
 \label{alg:causal_model} 			% and a label for \ref{} commands later in the document