Skip to content
Snippets Groups Projects
Commit c11bb7a8 authored by Riku-Laine's avatar Riku-Laine
Browse files

Results with MC computed and written

parent 4fc3921c
No related branches found
No related tags found
No related merge requests found
......@@ -56,7 +56,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
......@@ -221,7 +221,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
......
This diff is collapsed.
\documentclass[11pt,a4paper]{amsart}
\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots.
\usepackage[margin=1in]{geometry} % See geometry.pdf to learn the layout options. There are lots.
%\geometry{a4paper} % ... or letterpaper or a5paper or ...
%\geometry{landscape} % Activate for for rotated page geometry
\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent
%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent
\usepackage{graphicx}
\usepackage{amssymb}
\usepackage{epstopdf}
......@@ -249,29 +249,28 @@ Given the above framework, the goal is to create an evaluation algorithm that ca
\end{wrapfigure}
\emph{Below is the framework as was written on the whiteboard, then RL presents his own remarks on how he understood this.}
~ \\
\begin{description}
\item[Data generation:] ~ \\
\item[Data generation:]
~ \\
\hskip 3em \textbf{Input:} [none] \\ ~ \\
\textbf{Output:} $X, Z, W, Y$ as specified by $\M$
\hskip 3em \textbf{Input:} [none] \\
\textbf{Output:} $X, Z, W, Y$ as specified by $\M$~ \\
\item[Decider:] single vs. batch \\
\item[Decider:] single vs. batch
~ \\
\hskip 3em \textbf{Input:}
\begin{itemize}
\item one defendant
\item $\M$
\end{itemize}
\textbf{Output:}
\begin{itemize}
\item argmax likelihood $y$
\item $\pr(Y=0~|~input)$
\item order
\item order \\
\end{itemize}
\item[Evaluator:] ~ \\
\item[Evaluator:]
~ \\
\hskip 3em \textbf{Input:}
\begin{itemize}
......@@ -638,17 +637,59 @@ Given our framework defined in section \ref{sec:framework}, the results presente
\label{fig:random_predictions}
\end{figure}
\subsection{Modular framework -- Monte Carlo evaluator} \label{sec:modules_mc}
For these results, data was generated with module in algorithm \ref{alg:dg:coinflip_with_z} ("coin-flip results") and decisions were assigned using module in algorithm \ref{alg:decider:quantile}. Curves were computed with algorithms \ref{alg:eval:true_eval}, \ref{alg:eval:labeled_outcomes}, \ref{alg:eval:human_eval}, \ref{alg:eval:contraction} and \ref{alg:eval:mc} are presented in figure \ref{fig:modules_mc}. The corresponding MAEs are presented in table \ref{tab:modules_mc}.
\begin{table}[H]
\centering
\caption{Mean absolute error (MAE) w.r.t true evaluation. See modules used in section \ref{sec:modules_mc}}
\begin{tabular}{l | c c}
Method & MAE with Z \\ \hline
Labeled outcomes & 0.111075\\
Human evaluation & 0.027298\\
Contraction & 0.004206\\
Monte Carlo & 0.001292\\
\end{tabular}
\label{tab:modules_mc}
\end{table}
\begin{figure}[H]
\centering
\includegraphics[width=0.75\textwidth]{sl_with_Z_10iter_coinflip_quantile_defaults_mc}
\caption{Failure rate vs. acceptance rate with varying levels of leniency. Data was generated with unobservables. See modules used in section \ref{sec:modules_mc}}
\label{fig:modules_mc}
\end{figure}
%\begin{figure}[H]
% \centering
% \begin{subfigure}[b]{0.475\textwidth}
% \includegraphics[width=\textwidth]{sl_without_Z_10iter_coinflip_quantile_defaults_mc}
% \caption{Data without unobservables. PLACEHOLDER}
% \label{fig:modules_mc_without_Z}
% \end{subfigure}
% \quad %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc.
% %(or a blank line to force the subfigure onto a new line)
% \begin{subfigure}[b]{0.475\textwidth}
% \includegraphics[width=\textwidth]{sl_with_Z_10iter_coinflip_quantile_defaults_mc}
% \caption{Data with unobservables.}
% \label{fig:modules_mc_with_Z}
% \end{subfigure}
% \caption{Failure rate vs. acceptance rate with varying levels of leniency. See modules used in section \ref{sec:modules_mc}}
% \label{fig:modules_mc}
%\end{figure}z
\section{Modules}
Different types of modules are presented in this section. Summary table is presented last.
\subsection{Data generation modules}
Data generation modules usually take only some generative parameters as input.
\begin{itemize}
\item Data generation modules usually take only some generative parameters as input.
\end{itemize}
\begin{algorithm}[H] % enter the algorithm environment
\begin{algorithm}[] % enter the algorithm environment
\caption{Data generation module: "coin-flip results" without unobservables} % give the algorithm a caption
%\label{alg:} % and a label for \ref{} commands later in the document
\label{alg:dg:coinflip_without_z} % and a label for \ref{} commands later in the document
\begin{algorithmic}[1] % enter the algorithmic environment
\REQUIRE Parameters: Total number of subjects $N_{total}$
\ENSURE
......@@ -662,9 +703,9 @@ Data generation modules usually take only some generative parameters as input.
\end{algorithm}
\begin{algorithm}[H] % enter the algorithm environment
\begin{algorithm}[] % enter the algorithm environment
\caption{Data generation module: "results by threshold" with unobservables} % give the algorithm a caption
%\label{alg:} % and a label for \ref{} commands later in the document
\label{alg:dg:threshold_with_Z} % and a label for \ref{} commands later in the document
\begin{algorithmic}[1] % enter the algorithmic environment
\REQUIRE Parameters: Total number of subjects $N_{total},~\beta_X=1,~\beta_Z=1$ and $\beta_W=0.2$.
\ENSURE
......@@ -677,9 +718,9 @@ Data generation modules usually take only some generative parameters as input.
\end{algorithmic}
\end{algorithm}
\begin{algorithm}[H] % enter the algorithm environment
\begin{algorithm}[] % enter the algorithm environment
\caption{Data generation module: "coin-flip results" with unobservables} % give the algorithm a caption
%\label{alg:} % and a label for \ref{} commands later in the document
\label{alg:dg:coinflip_with_z} % and a label for \ref{} commands later in the document
\begin{algorithmic}[1] % enter the algorithmic environment
\REQUIRE Parameters: Total number of subjects $N_{total},~\beta_X=1,~\beta_Z=1$ and $\beta_W=0.2$.
\ENSURE
......@@ -692,17 +733,15 @@ Data generation modules usually take only some generative parameters as input.
\end{algorithmic}
\end{algorithm}
\subsection{Decider modules}
%For decider modules, input as terms of knowledge and parameters should be as explicitly specified as possible.
\begin{algorithm}[H] % enter the algorithm environment
\caption{Decider module: human judge as specified by Lakkaraju et al.} % give the algorithm a caption
%\label{alg:} % and a label for \ref{} commands later in the document
\begin{algorithm}[] % enter the algorithm environment
\caption{Decider module: human judge as specified by Lakkaraju et al. \cite{lakkaraju17}} % give the algorithm a caption
\label{alg:decider:human} % and a label for \ref{} commands later in the document
\begin{algorithmic}[1] % enter the algorithmic environment
\REQUIRE Data with features $X, Z$ of size $N_{total}$, knowledge that both of them affect the outcome Y and that they are independent / Parameters: $M=100, \beta_X=1, \beta_Z=1$.
\ENSURE
\STATE Sample acceptance rates for each M judges from $U(0.1; 0.9)$ and round to tenth decimal place.
\STATE Sample acceptance rates for each M judges from Uniform$(0.1; 0.9)$ and round to tenth decimal place.
\STATE Assign each observation to a judge at random.
\STATE Calculate $P(T=0|X, Z) = \sigma(\beta_XX+\beta_ZZ) + \epsilon$ for each observation and attach to data.
\STATE Sort the data by (1) the judges' and (2) by probabilities $P(T=0|X, Z)$ in descending order.
......@@ -713,9 +752,9 @@ Data generation modules usually take only some generative parameters as input.
\end{algorithmic}
\end{algorithm}
\begin{algorithm}[H] % enter the algorithm environment
\caption{Decider module: "coin-flip decisions"} % give the algorithm a caption
%\label{alg:} % and a label for \ref{} commands later in the document
\begin{algorithm}[] % enter the algorithm environment
\caption{Decider module: "coin-flip decisions" (pseudo-leniencies set at 0.5)} % give the algorithm a caption
\label{alg:decider:coinflip} % and a label for \ref{} commands later in the document
\begin{algorithmic}[1] % enter the algorithmic environment
\REQUIRE Data with features $X, Z$ of size $N_{total}$, knowledge that both of them affect the outcome Y and that they are independent / Parameters: $\beta_X=1, \beta_Z=1$.
\ENSURE
......@@ -728,11 +767,33 @@ Data generation modules usually take only some generative parameters as input.
\end{algorithmic}
\end{algorithm}
\subsection{Evaluator modules}
\begin{algorithm}[] % enter the algorithm environment
\caption{Decider module: "quantile decisions"} % give the algorithm a caption
\label{alg:decider:quantile} % and a label for \ref{} commands later in the document
\begin{algorithmic}[1] % enter the algorithmic environment
\REQUIRE Data with features $X, Z$ of size $N_{total}$, knowledge that both of them affect the outcome Y and that they are independent / Parameters: $\beta_X=1, \beta_Z=1$.
\ENSURE
\STATE Sample acceptance rates for each M judges from Uniform$(0.1; 0.9)$ and round to tenth decimal place.
\STATE Assign each observation to a judge at random.
\STATE Calculate $\pr(T=0|X, Z) = \sigma(\beta_XX+\beta_ZZ)$ for each observation and attach to data.
\FORALL{$i$ in $1, \ldots, N_{total}$}
\IF{$\sigma(\beta_XX+\beta_ZZ) \geq F^{-1}_{\pr(T=0|X, Z)}(r)$ \footnotemark} % Footnote text below algorithm
\STATE {set $t_i=0$}
\ELSE
\STATE{set $t_i=1$}
\ENDIF
\STATE Attach to data.
\ENDFOR
\STATE Set $Y=$ NA if decision is negative ($T=0$). \emph{Might not be performed.}
\RETURN data with decisions.
\end{algorithmic}
\end{algorithm}
\footnotetext{The inverse cumulative distribution function (or quantile function) $F^{-1}$ was constructed by first sampling $10^7$ observations from $N(0, 2)$ (sum of two Gaussians) and applying the inverse of logit function $\sigma(x)$. The value of $F^{-1}(r)$ was computed utilizing the previously computed array and numpy's \texttt{quantile} function.}
\begin{algorithm}[H] % enter the algorithm environment
\begin{algorithm}[] % enter the algorithm environment
\caption{Evaluator module: Contraction algorithm \cite{lakkaraju17}} % give the algorithm a caption
%\label{alg:} % and a label for \ref{} commands later in the document
\label{alg:eval:contraction} % and a label for \ref{} commands later in the document
\begin{algorithmic}[1] % enter the algorithmic environment
\REQUIRE Data $\D$ with properties $\{x_i, j_i, t_i, y_i\}$, acceptance rate r, knowledge that X affects Y
\ENSURE
......@@ -742,16 +803,12 @@ Data generation modules usually take only some generative parameters as input.
\STATE Let $q$ be the decision-maker with highest acceptance rate in $\D$.
\STATE $\D_q = \{(x, j, t, y) \in \D|j=q\}$
\STATE \hskip3.0em $\rhd$ $\D_q$ is the set of all observations judged by $q$
\STATE
\STATE $\RR_q = \{(x, j, t, y) \in \D_q|t=1\}$
\STATE \hskip3.0em $\rhd$ $\RR_q$ is the set of observations in $\D_q$ with observed outcome labels
\STATE
\STATE Sort observations in $\RR_q$ in descending order of confidence scores $\s$ and assign to $\RR_q^{sort}$.
\STATE \hskip3.0em $\rhd$ Observations deemed as high risk by the black-box model $\mathcal{B}$ are at the top of this list
\STATE
\STATE Remove the top $[(1.0-r)|\D_q |]-[|\D_q |-|\RR_q |]$ observations of $\RR_q^{sort}$ and call this list $\mathcal{R_B}$
\STATE \hskip3.0em $\rhd$ $\mathcal{R_B}$ is the list of observations assigned to $t = 1$ by $\mathcal{B}$
\STATE
\STATE Compute $\mathbf{u}=\sum_{i=1}^{|\mathcal{R_B}|} \dfrac{\delta\{y_i=0\}}{| \D_q |}$.
\RETURN $\mathbf{u}$
\end{algorithmic}
......@@ -759,7 +816,7 @@ Data generation modules usually take only some generative parameters as input.
\begin{algorithm}[] % enter the algorithm environment
\caption{Evaluator module: True evaluation} % give the algorithm a caption
%\label{alg:true_eval} % and a label for \ref{} commands later in the document
\label{alg:eval:true_eval} % and a label for \ref{} commands later in the document
\begin{algorithmic}[1] % enter the algorithmic environment
\REQUIRE Data $\D$ with properties $\{x_i, t_i, y_i\}$ and \emph{all outcome labels}, acceptance rate r, knowledge that X affects Y
\ENSURE
......@@ -775,7 +832,7 @@ Data generation modules usually take only some generative parameters as input.
\begin{algorithm}[] % enter the algorithm environment
\caption{Evaluator module: Labeled outcomes} % give the algorithm a caption
%\label{alg:labeled_outcomes} % and a label for \ref{} commands later in the document
\label{alg:eval:labeled_outcomes} % and a label for \ref{} commands later in the document
\begin{algorithmic}[1] % enter the algorithmic environment
\REQUIRE Data $\D$ with properties $\{x_i, t_i, y_i\}$, acceptance rate r, knowledge that X affects Y
\ENSURE
......@@ -792,7 +849,7 @@ Data generation modules usually take only some generative parameters as input.
\begin{algorithm}[] % enter the algorithm environment
\caption{Evaluator module: Human evaluation} % give the algorithm a caption
%\label{alg:human_eval} % and a label for \ref{} commands later in the document
\label{alg:eval:human_eval} % and a label for \ref{} commands later in the document
\begin{algorithmic}[1] % enter the algorithmic environment
\REQUIRE Data $\D$ with properties $\{x_i, j_i, t_i, y_i\}$, acceptance rate r
\ENSURE
......@@ -804,25 +861,72 @@ Data generation modules usually take only some generative parameters as input.
\end{algorithmic}
\end{algorithm}
\subsection{Summary}
\begin{algorithm}[] % enter the algorithm environment
\caption{Evaluator module: Causal evaluator (?)} % give the algorithm a caption
\label{alg:eval:causal_eval} % and a label for \ref{} commands later in the document
\begin{algorithmic}[1] % enter the algorithmic environment
\REQUIRE Data $\D$ with properties $\{x_i, t_i, y_i\}$, acceptance rate r
\ENSURE
\STATE Split data to test set and training set.
\STATE Train a predictive model $\B$ on training data.
\STATE Estimate probability scores $\s$ using $\B$ for all observations in test data and attach to test data.
\FORALL{$i$ in $1, \ldots, N_{total}$}
\STATE Evaluate $F(x_i) = \int_{x\in\mathcal{X}} P_X(x)\delta(f(x)<f(x_i)) ~dx$ and assign to $\mathcal{F}_{predictions}$
\ENDFOR
\STATE Create boolean array $T_{causal} = \mathcal{F}_{predictions} < r$.
\RETURN $\frac{1}{|\D_{test}|}\sum_{i=1}^{|\D_{test}|} \s_i \cdot T_{i, causal}$ which is equal to $\frac{1}{|\D|}\sum_{x\in\D} f(x)\delta(F(x) < r)$
\end{algorithmic}
\end{algorithm}
\begin{table}[H]
\begin{algorithm}[] % enter the algorithm environment
\caption{Evaluator module: Monte Carlo evaluator, imputation} % give the algorithm a caption
\label{alg:eval:mc} % and a label for \ref{} commands later in the document
\begin{algorithmic}[1] % enter the algorithmic environment
\REQUIRE Data $\D$ with properties $\{x_i, j_i, t_i, y_i\}$, acceptance rate r
\ENSURE
\STATE Split data to test set and training set.
\STATE Train a predictive model $\B$ on training data.
\STATE Estimate probability scores $\s$ using $\B$ for all observations in test data and attach to test data.
\STATE Sample $N_{sim}$ observations from a standard Gaussian and assign to Z.
\STATE Sample $N_{sim}$ observations from sum of two standard Gaussians (N(0, 2)) and assign to \texttt{quants}.
\STATE Transform the values of the samples in \texttt{quants} using the inverse of logit function.
\STATE Compute the values of the inverse cdf of the observations in \texttt{quants} for the acceptance rates r of each judge and assign to $Q_r$.
\FORALL{$i$ in $1, \ldots, N_{test}$}
\IF{$t_i = 0$}
\STATE {Take all $Z > logit(Q_{r,i})-x_i$ \footnotemark}
\ELSE
\STATE{Take all $Z < logit(Q_{r,i})-x_i$}
\ENDIF
\STATE Draw predictions $\hat{p}_{i,y}$ from Bernoulli($1-logit^{-1}(x_i+\bar{Z})$).
\ENDFOR
\STATE Impute missing observations using $\hat{p}_y$.
\STATE Sort the data by the probabilities $\s$ to ascending order.
\STATE \hskip3.0em $\rhd$ Now the most dangerous subjects are last.
\STATE Calculate the number to release $N_{free} = |\D_{test}| \cdot r$.
\RETURN Compute $\frac{1}{|\D_{test}|}\sum_{i=1}^{N_{free}}\delta\{y_i=0\}$ using the observed and imputed observations.
\end{algorithmic}
\end{algorithm}
\footnotetext{$logit^{-1}(x+z)>a \Leftrightarrow x+z > logit(a) \Leftrightarrow z > logit(a)-x$}
\begin{table}[h!]
\centering
\caption{Summary of modules (under construction)}
\begin{tabular}{lll}
\toprule
\multicolumn{3}{c}{Module type} \\[.5\normalbaselineskip]
\textbf{Data generator} & \textbf{Decider} & \textbf{Evaluator} \\
\midrule
Without unobservables & Independent decisions & {\ul Labeled outcomes} \\
& \tabitem $P(T=0|X, Z)$ & \tabitem Data $\D$ with properties $\{x_i, t_i, y_i\}$ \\
With unobservables & \tabitem "threshold rule" & \tabitem acceptance rate r \\
\tabitem $P(Y=0|X, Z, W)$ & & \tabitem knowledge that X affects Y \\[.5\normalbaselineskip]
{\ul Without unobservables} & Independent decisions & {\ul Labeled outcomes} \\
& 1. flip a coin by & \tabitem Data $\D$ with properties $\{x_i, t_i, y_i\}$ \\
{\ul With unobservables} & $P(T=0|X, Z)$ & \tabitem acceptance rate r \\
\tabitem $P(Y=0|X, Z, W)$ & 2. determine with $F^{-1}(r)$ & \tabitem knowledge that X affects Y \\[.5\normalbaselineskip]
With unobservables & & {\ul True evaluation} \\
\tabitem "threshold rule" & & \tabitem Data $\D$ with properties $\{x_i, t_i, y_i\}$ \\
& & and \emph{all outcome labels} \\
& & \tabitem acceptance rate r \\
& & \tabitem knowledge that X affects Y \\[.5\normalbaselineskip]
{\ul With unobservables} & Non-independent decisions & {\ul True evaluation} \\
\tabitem assign Y by & 3. sort by $P(T=0|X, Z)$ & \tabitem Data $\D$ with properties $\{x_i, t_i, y_i\}$ \\
"threshold rule" & and assign $t$ by $r$ & and \emph{all outcome labels} \\
& & \tabitem acceptance rate r \\
& & \tabitem knowledge that X affects Y \\[.5\normalbaselineskip]
& & {\ul Human evaluation} \\
& & \tabitem Data $\D$ with properties $\{x_i, j_i, t_i, y_i\}$ \\
......@@ -837,10 +941,14 @@ Data generation modules usually take only some generative parameters as input.
& & \tabitem Data $\D$ with properties $\{x_i, t_i, y_i\}$ \\
& & \tabitem acceptance rate r \\
& & \tabitem knowledge that X affects Y \\[.5\normalbaselineskip]
& & {\ul Monte Carlo evaluator} \\
& & \tabitem Data $\D$ with properties $\{x_i, j_i, t_i, y_i\}$ \\
& & \tabitem acceptance rate r \\
& & \tabitem knowledge that X affects Y \\[.5\normalbaselineskip]
\bottomrule
\end{tabular}
\caption{Summary table of modules (under construction)}
\label{tab:jotain}
\label{tab:modules}
\end{table}
\begin{thebibliography}{9}
......
figures/sl_with_Z_10iter_coinflip_quantile_defaults_mc.png

49.9 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment