diff --git a/analysis_and_scripts/notes.tex b/analysis_and_scripts/notes.tex index a066f45c8950c9a5ed92aa67aa900fbdf5468da8..cb4dbcdbeffe4e9a29a8853bddbdd9702c12b759 100644 --- a/analysis_and_scripts/notes.tex +++ b/analysis_and_scripts/notes.tex @@ -13,6 +13,7 @@ \usepackage{algorithmic}% http://ctan.org/pkg/algorithms \renewcommand{\algorithmicrequire}{\textbf{Input:}} \renewcommand{\algorithmicensure}{\textbf{Procedure:}} +\renewcommand{\algorithmicreturn}{\textbf{Return}} \renewcommand{\descriptionlabel}[1]{\hspace{\labelsep}\textnormal{#1}} @@ -90,7 +91,7 @@ This document presents the implementations of RL in pseudocode level. First, I p Mnemonic rule for the binary coding: zero bad (crime or jail), one good! -\section{RL's notes about the selective labels paper (optional reading)} +\section{RL's notes about the selective labels paper (optional reading)} \label{sec:comments} \emph{This chapter is to present my comments and insight regarding the topic.} @@ -141,9 +142,9 @@ In the setting with unobservables Z, we first sample an acceptance rate r for al \ENSURE \STATE Sample acceptance rates for each M judges from $U(0.1; 0.9)$ and round to tenth decimal place. \STATE Sample features X, Z and W for each $N_{total}$ observations from standard Gaussian independently. -\STATE Calculate $P(Y=0|X, Z, W)$ for each observation +\STATE Calculate $P(Y=0|X, Z, W)$ for each observation. \STATE Set Y to 0 if $P(Y = 0| X, Z, W) \geq 0.5$ and to 1 otherwise. -\STATE Calculate $P(T=0|X, Z)$ for each observation +\STATE Calculate $P(T=0|X, Z)$ for each observation and attach to data. \STATE Sort the data by (1) the judges' and (2) by probabilities $P(T=0|X, Z)$ in descending order. \STATE \hskip3.0em $\rhd$ Now the most dangerous subjects for each of the judges are at the top. \STATE If subject belongs to the top $(1-r) \cdot 100 \%$ of observations assigned to that judge, set $T=0$ else set $T=1$. @@ -163,13 +164,13 @@ NB: The sklearn's regression model can not be fitted if the data includes missin \section{Plotting} -The following quantities are estimated from the data: +The following quantities are computed from the data: \begin{itemize} -\item True evaluation: The true failure rate of the model. Can only be calculated for synthetic data sets. See algorithm \ref{alg:true_eval}. +\item True evaluation: The true failure rate of the model. Can only be calculated for synthetic data sets. See algorithm \ref{alg:true_eval} and discussion in section \ref{sec:comments}. \item Labeled outcomes: The "traditional"/vanilla estimate of model performance. See algorithm \ref{alg:labeled_outcomes}. \item Human evaluation: The failure rate of human decision-makers who have access to the latent variable Z. Decision-makers with similar values of leniency are binned and treated as one hypothetical decision-maker. See algorithm \ref{alg:human_eval}. -\item Contraction: See algorithm 1 of \cite{lakkaraju17} +\item Contraction: See algorithm \ref{alg:contraction} from \cite{lakkaraju17}. \item Causal model: In essence, the empirical performance is calculated over the test set as $$\dfrac{1}{n}\sum_{(x, y)\in D}f(x)\delta(F(x) < r)$$ where $$f(x) = P(Y=0|T=1, X=x)$$ is a logistic regression model (see \ref{sec:model_fitting}) trained on the labeled data predicting Y from X and $$ F(x_0) = \int_{x\in\mathcal{X}} P(x)\delta(f(x) < f(x_0)) ~ dx.$$ All observations, even ones with missing outcome labels, can be used since empirical performance doesn't depend on them. $P(x)$ is Gaussian pdf from scipy.stats package and it is integrated over interval [-15, 15] with 40000 steps using si.simps function from scipy.integrate which uses Simpson's rule in estimating the value of the integral. (docs: \url{https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.simps.html}) \label{causal_cdf} \end{itemize} @@ -241,6 +242,30 @@ The plotted curves are constructed using pseudo code presented in algorithm \ref \end{algorithmic} \end{algorithm} +\begin{algorithm}[] % enter the algorithm environment +\caption{Contraction algorithm \cite{lakkaraju17}} % give the algorithm a caption +\label{alg:contraction} % and a label for \ref{} commands later in the document +\begin{algorithmic}[1] % enter the algorithmic environment +\REQUIRE Labeled test data $\mathcal{D}$ with probabilities $\mathcal{S}$ and \emph{missing outcome labels} for observations with $T=0$, acceptance rate r +\ENSURE +\STATE Let $q$ be the decision-maker with highest acceptance rate in $\mathcal{D}$. +\STATE $\mathcal{D}_q = \{(x, j, t, y) \in \mathcal{D}|j=q\}$ +\STATE \hskip3.0em $\rhd$ $\mathcal{D}_q$ is the set of all observations judged by $q$ +\STATE +\STATE $\mathcal{R}_q = \{(x, j, t, y) \in \mathcal{D}_q|t=1\}$ +\STATE \hskip3.0em $\rhd$ $\mathcal{R}_q$ is the set of observations in $\mathcal{D}_q$ with observed outcome labels +\STATE +\STATE Sort observations in $\mathcal{R}_q$ in descending order of confidence scores $\mathcal{S}$ and assign to $\mathcal{R}_q^{sort}$. +\STATE \hskip3.0em $\rhd$ Observations deemed as high risk by the black-box model $\mathcal{B}$ are at the top of this list +\STATE +\STATE Remove the top $[(1.0-r)|\mathcal{D}_q |]-[|\mathcal{D}_q |-|\mathcal{R}_q |]$ observations of $\mathcal{R}_q^{sort}$ and call this list $\mathcal{R_B}$ +\STATE \hskip3.0em $\rhd$ $\mathcal{R_B}$ is the list of observations assigned to $t = 1$ by $\mathcal{B}$ +\STATE +\STATE Compute $\mathbf{u}=\sum_{i=1}^{|\mathcal{R_B}|} \dfrac{\delta\{y_i=0\}}{| \mathcal{D}_q |}$. +\RETURN $\mathbf{u}$ +\end{algorithmic} +\end{algorithm} + \begin{algorithm}[] % enter the algorithm environment \caption{Causal model, empirical performance (ep)} % give the algorithm a caption \label{alg:causal_model} % and a label for \ref{} commands later in the document