diff --git a/analysis_and_scripts/notes.tex b/analysis_and_scripts/notes.tex index f9c105db79d4605e02126c668193242960ebe851..449aeb5d1eb3b8bc7d5e8112d3eab86921d369d7 100644 --- a/analysis_and_scripts/notes.tex +++ b/analysis_and_scripts/notes.tex @@ -358,8 +358,8 @@ where $\epsilon \sim N(0, 0.1)$. Next, the data is sorted for each judge by the \STATE Calculate $P(Y=0|X, Z, W)$ for each observation. \STATE Set Y to 0 if $P(Y = 0| X, Z, W) \geq 0.5$ and to 1 otherwise. \STATE Calculate $P(T=0|X, Z)$ for each observation and attach to data. -\STATE Sort the data by (1) the judges' and (2) by probabilities $P(T=0|X, Z)$ in descending order. -\STATE \hskip3.0em $\rhd$ Now the most dangerous subjects for each of the judges are at the top. +\STATE Sort the data by (1) the judges and (2) by probabilities $P(T=0|X, Z)$ in descending order. +\STATE \hskip3.0em $\rhd$ Now the most dangerous subjects for each judge are at the top. \STATE If subject belongs to the top $(1-r) \cdot 100 \%$ of observations assigned to that judge, set $T=0$ else set $T=1$. \STATE Halve the data to training and test sets at random. \STATE For both halves, set $Y=$ NA if decision is negative ($T=0$). @@ -691,51 +691,51 @@ Monte Carlo & 0.001292 & 0.016629 & 0.009429 & 0.0179825\\ Different types of modules (data generation, decider and evaluator) are presented in this section. Summary table is presented last. See section \ref{sec:modular_framework} for a more thorough break-down on the properties of each module. -\begin{algorithm}[] % enter the algorithm environment +\subsection{Data generation modules} + +We have three different kinds of data generating modules (DG modules). The differences of the DG modules are due to two factors: whether there are unobservables and whether the outcome will be drawn from Bernoulli distribution. The only algorithm generating data without unobservables is algorithm \ref{alg:dg:coinflip_without_z}, algorithms \ref{alg:dg:threshold_with_Z} and \ref{alg:dg:coinflip_with_z} generate data with unobservables. The outcome is drawn from a Bernoulli distribution in algorithms \ref{alg:dg:coinflip_without_z} and \ref{alg:dg:coinflip_with_z} and in algorithm \ref{alg:dg:threshold_with_Z} the outcome is set when a value exceeds a certain threshold. + +\begin{algorithm}[h] % enter the algorithm environment \caption{Data generation module: outcome from Bernoulli without unobservables} % give the algorithm a caption \label{alg:dg:coinflip_without_z} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment \REQUIRE Parameters: Total number of subjects $N_{total}$ \ENSURE -\FORALL{$i$ in $1, \ldots, N_{total}$} - \STATE Draw $x_i$ from from a standard Gaussian. - \STATE Draw $y_i$ from Bernoulli$(1-\sigma(x_i))$. - \STATE Attach to data. +\FORALL{observations} + \STATE Draw $x$ from from a standard Gaussian. + \STATE Draw $y$ from Bernoulli$(1-\sigma(x))$. \ENDFOR \RETURN data \end{algorithmic} \end{algorithm} - -\begin{algorithm}[] % enter the algorithm environment +\begin{algorithm}[h] % enter the algorithm environment \caption{Data generation module: outcome by threshold with unobservables} % give the algorithm a caption \label{alg:dg:threshold_with_Z} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment \REQUIRE Parameters: Total number of subjects $N_{total},~\beta_X=1,~\beta_Z=1$ and $\beta_W=0.2$. \ENSURE -\FORALL{$i$ in $1, \ldots, N_{total}$} - \STATE Draw $x_i, z_i$ and $w_i$ from from standard Gaussians independently. - \IF{$\sigma(\beta_Xx_i+\beta_Zz_i+\beta_Ww_i) \geq 0.5$} - \STATE {Set $y_i$ to 0.} +\FORALL{observations} + \STATE Draw $x, z$ and $w$ from from standard Gaussians independently. + \IF{$\sigma(\beta_Xx+\beta_Zz+\beta_Ww) \geq 0.5$} + \STATE {Set $y$ to 0.} \ELSE - \STATE {Set $y_i$ to 1.} + \STATE {Set $y$ to 1.} \ENDIF - \STATE Attach to data. \ENDFOR \RETURN data \end{algorithmic} \end{algorithm} -\begin{algorithm}[] % enter the algorithm environment +\begin{algorithm}[h] % enter the algorithm environment \caption{Data generation module: outcome from Bernoulli with unobservables} % give the algorithm a caption \label{alg:dg:coinflip_with_z} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment \REQUIRE Parameters: Total number of subjects $N_{total},~\beta_X=1,~\beta_Z=1$ and $\beta_W=0.2$. \ENSURE -\FORALL{$i$ in $1, \ldots, N_{total}$} - \STATE Draw $x_i, z_i$ and $w_i$ from from standard Gaussians independently. - \STATE Draw $y_i$ from Bernoulli$(1-\sigma(\beta_XX+\beta_ZZ+\beta_WW))$. - \STATE Attach to data. +\FORALL{observations} + \STATE Draw $x, z$ and $w$ from from standard Gaussians independently. + \STATE Draw $y$ from Bernoulli$(1-\sigma(\beta_Xx+\beta_Zz+\beta_Ww))$. \ENDFOR \RETURN data \end{algorithmic} @@ -743,63 +743,72 @@ Different types of modules (data generation, decider and evaluator) are presente %For decider modules, input as terms of knowledge and parameters should be as explicitly specified as possible. -\begin{algorithm}[] % enter the algorithm environment -\caption{Decider module: human judge as specified by Lakkaraju et al. \cite{lakkaraju17}} % give the algorithm a caption +\subsection{Decider modules} + +We have three different kinds of decider modules. Their distinctive feature is the decisions' independence, for example in algorithm \ref{alg:decider:human} the decisions of a decision-maker are dependent on the other subjects assigned to that decision-maker. + +Below is presented the human decision-maker \cite{lakkaraju17}. The human decision-maker (1) takes all the subjects as a batch, (2) makes an approximation of the subjects' probabilities for a negative outcome and (3) assigns the decisions by giving $r\cdot 100\%$ of the least likely to fail a positive decision. The resulting decisions are not independent as they depend on the presence of other observations. + +\begin{algorithm}[H] % enter the algorithm environment +\caption{Decider module: human decision-maker by Lakkaraju et al. \cite{lakkaraju17}} % give the algorithm a caption \label{alg:decider:human} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment -\REQUIRE Data with features $X, Z$ of size $N_{total}$, knowledge that both of them affect the outcome Y and that they are independent / Parameters: $M=100, \beta_X=1, \beta_Z=1$. +\REQUIRE Data with features $X, Z$, knowledge that both of them affect the outcome Y and that they are independent / Parameters: $M=100, \beta_X=1, \beta_Z=1$. \ENSURE \STATE Sample acceptance rates for each M judges from Uniform$(0.1; 0.9)$ and round to tenth decimal place. \STATE Assign each observation to a judge at random. \STATE Calculate $\pr(T=0|X, Z) = \sigma(\beta_XX+\beta_ZZ) + \epsilon$ for each observation and attach to data. \STATE Sort the data by (1) the judges and (2) by the probabilities in descending order. -\STATE \hskip3.0em $\rhd$ Now the most dangerous subjects for each of the judges are at the top. \STATE If subject belongs to the top $(1-r) \cdot 100 \%$ of observations assigned to that judge, set $T=0$ else set $T=1$. -\STATE Set $Y=$ NA if decision is negative ($T=0$). \emph{Might not be performed.} +\STATE Set $Y=$ NA if decision is negative ($T=0$). \emph{Optional.} \RETURN data with decisions. \end{algorithmic} \end{algorithm} -\begin{algorithm}[] % enter the algorithm environment -\caption{Decider module: decisions from Bernoulli (pseudo-leniencies set at 0.5)} % give the algorithm a caption +One discussed way of making the decisions independent was to "flip a coin at some probability". An implementation of that idea is presented below in algorithm \ref{alg:decider:coinflip}. As $\pr(T=0|X, Z) = \sigma(\beta_XX+\beta_ZZ)$ the parameter for the Bernoulli distribution is set to $1-\sigma(\beta_XX+\beta_ZZ)$. In the practical implementation, as some algorithms need to know the leniency of the decision, acceptance rate is then calculated then from the decisions. + +\begin{algorithm}[H] % enter the algorithm environment +\caption{Decider module: decisions from Bernoulli} % give the algorithm a caption \label{alg:decider:coinflip} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment -\REQUIRE Data with features $X, Z$ of size $N_{total}$, knowledge that both of them affect the outcome Y and that they are independent / Parameters: $\beta_X=1, \beta_Z=1$. +\REQUIRE Data with features $X, Z$, knowledge that both of them affect the outcome Y and that they are independent / Parameters: $\beta_X=1, \beta_Z=1$. \ENSURE -\FORALL{$i$ in $1, \ldots, N_{total}$} - \STATE Draw $t_i$ from Bernoulli$(\sigma(\beta_Xx_i+\beta_Zz_i))$. - \STATE Attach to data. -\ENDFOR -\STATE Set $Y=$ NA if decision is negative ($T=0$). \emph{Might not be performed.} +\STATE Draw $t$ from Bernoulli$(1-\sigma(\beta_Xx+\beta_Zz))$ for all observations. +\STATE Compute the acceptance rate. +\STATE Set $Y=$ NA if decision is negative ($T=0$). \emph{Optional.} \RETURN data with decisions. \end{algorithmic} \end{algorithm} -\begin{algorithm}[] % enter the algorithm environment +A quantile-based decider module is presented in algorithm \ref{alg:decider:quantile}. The algorithm tries to emulate the human decision-maker as in algorithm \ref{alg:decider:human} while giving out independent decisions. To achieve this, we first "train" the decision-maker by showing it a large number of subjects so that they can assess how high would a new subject rank in their probability for a negative outcome, whether they would be in the top 10\% or in the bottom 25\%. Then new decisions can be made using this rule with a guarantee that the fraction of positive decisions will converge to $r$. + +In practice, the pdf and subsequently the inverse cdf $F^{-1}$ is constructed by first sampling $10^7$ (i.e. many) observations from $\beta_XX+\beta_ZZ$ (where $X, Z \sim N(0, 1)$) and applying the inverse of logit function $\sigma(x)$. Now the decision-maker has a reference distribution against which to compare any new subjects. Whenever presented with a new subject, the decision-maker uses the reference distribution and makes a judgement based on the $r^{th}$ quantile. + +For example, a decision-maker with leniency 0.60 gets a new subject $\{x, z\}$ with a predicted probability $\sigma(\beta_Xx+\beta_Zz)\approx 0.7$ for a negative outcome with some coefficients $\beta$. Now, as the judge has leniency 0.6 their cut-point $F_{-1}(0.60)\approx0.65$. That is, the judge will not give a positive decision to anyone with failure probability greater than 0.65 so our example subject will receive a negative decision. Due to simulating a large number of instances for training the judge, we can say that in the long run the judge will give positive decisions to 60\% of subjects presented to them. + +\begin{algorithm}[H] % enter the algorithm environment \caption{Decider module: "quantile decisions"} % give the algorithm a caption \label{alg:decider:quantile} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment -\REQUIRE Data with features $X, Z$ of size $N_{total}$, knowledge that both of them affect the outcome Y and that they are independent / Parameters: $\beta_X=1, \beta_Z=1$. +\REQUIRE Data with features $X, Z$, knowledge that both of them affect the outcome Y and that they are independent / Parameters: $\beta_X=1, \beta_Z=1$. \ENSURE \STATE Sample acceptance rates for each M judges from Uniform$(0.1; 0.9)$ and round to tenth decimal place. \STATE Assign each observation to a judge at random. -\STATE Calculate $\pr(T=0|X, Z) = \sigma(\beta_XX+\beta_ZZ)$ for each observation and attach to data. -\FORALL{$i$ in $1, \ldots, N_{total}$} - \IF{$\sigma(\beta_Xx_i+\beta_Zz_i) \geq F^{-1}_{\pr(T=0|X, Z)}(r)$ \footnotemark} % Footnote text below algorithm - \STATE{Set $t_i=0$.} - \ELSE - \STATE{Set $t_i=1$.} - \ENDIF - \STATE Attach to data. -\ENDFOR -\STATE Set $Y=$ NA if decision is negative ($T=0$). \emph{Might not be performed.} +\STATE Construct the quantile function $F^{-1}(q)$. +\STATE Calculate $\pr(T=0|X, Z) = \sigma(\beta_XX+\beta_ZZ)$ for all observations. +\STATE If $\sigma(\beta_Xx+\beta_Zz) \geq F^{-1}(r)$ set $t=0$, otherwise set $t=1$. +\STATE Set $Y=$ NA if decision is negative ($T=0$). \emph{Optional.} \RETURN data with decisions. \end{algorithmic} \end{algorithm} -\footnotetext{The inverse cumulative distribution function (or quantile function) $F^{-1}$ was constructed by first sampling $10^7$ observations from $N(0, 2)$ (sum of two Gaussians) and applying the inverse of logit function $\sigma(x)$. The value of $F^{-1}(r)$ was computed utilizing the previously computed array and numpy's \texttt{quantile} function.} +\subsection{Evaluator modules} -\begin{algorithm}[] % enter the algorithm environment +Evaluator modules take some version of data as input and output an estimate of the failure given the input. + +Motivation for the contraction algorithm is presented in Lakkaraju's paper \cite{lakkaraju17}. The algorithm below is a slight modification of it to incorporate model $\B$. The original algorithm has been copied to algorithm \ref{alg:contraction}. + +\begin{algorithm}[H] % enter the algorithm environment \caption{Evaluator module: Contraction algorithm \cite{lakkaraju17}} % give the algorithm a caption \label{alg:eval:contraction} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment @@ -822,7 +831,11 @@ Different types of modules (data generation, decider and evaluator) are presente \end{algorithmic} \end{algorithm} -\begin{algorithm}[] % enter the algorithm environment +%te eroaa muista algoritmeista / eriyispiirteenä että se saa käyttöönsä myös piilotetut outcomet. + +True evaluation module computes the "true failure rate" of a predictive model had it been deployed to make independent decisions. The module first assigns each observation with a predicted + +\begin{algorithm}[H] % enter the algorithm environment \caption{Evaluator module: True evaluation} % give the algorithm a caption \label{alg:eval:true_eval} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment @@ -838,7 +851,9 @@ Different types of modules (data generation, decider and evaluator) are presente \end{algorithmic} \end{algorithm} -\begin{algorithm}[] % enter the algorithm environment +%lo kuten te, mutta määrätyt Y:t on piilotettu + +\begin{algorithm}[H] % enter the algorithm environment \caption{Evaluator module: Labeled outcomes} % give the algorithm a caption \label{alg:eval:labeled_outcomes} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment @@ -855,7 +870,7 @@ Different types of modules (data generation, decider and evaluator) are presente \end{algorithmic} \end{algorithm} -\begin{algorithm}[] % enter the algorithm environment +\begin{algorithm}[H] % enter the algorithm environment \caption{Evaluator module: Human evaluation} % give the algorithm a caption \label{alg:eval:human_eval} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment @@ -869,7 +884,10 @@ Different types of modules (data generation, decider and evaluator) are presente \end{algorithmic} \end{algorithm} -\begin{algorithm}[] % enter the algorithm environment + +%kausaali evaluaattori kuten MM ensimmäisenä sen esitti + +\begin{algorithm}[H] % enter the algorithm environment \caption{Evaluator module: Causal evaluator (?)} % give the algorithm a caption \label{alg:eval:causal_eval} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment @@ -886,7 +904,9 @@ Different types of modules (data generation, decider and evaluator) are presente \end{algorithmic} \end{algorithm} -\begin{algorithm}[] % enter the algorithm environment +%alla oleva montecarlo perusajatus ennustaa Z ja sen perusteella imputoida Y. selitä kaikki ja yksinkertaista + +\begin{algorithm}[H] % enter the algorithm environment \caption{Evaluator module: Monte Carlo evaluator, imputation} % give the algorithm a caption \label{alg:eval:mc} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment @@ -901,8 +921,8 @@ Different types of modules (data generation, decider and evaluator) are presente \STATE Compute the values of the inverse cdf of the observations in \texttt{quants} for the acceptance rates r of each judge and assign to $Q_r$. \FORALL{$i$ in $1, \ldots, N_{test}$} \IF{$t_i = 0$} - \STATE{Take all $Z + \epsilon > logit(Q_{r,i})-x_i$ , \footnotemark~where $\epsilon \sim N(0, 0.1)$.} - \ELSE + \STATE{Take all $Z + \epsilon > logit(Q_{r,i})-x_i$ , where $\epsilon \sim N(0, 0.1)$.} + \ELSE \STATE{Take all $Z + \epsilon < logit(Q_{r,i})-x_i$ , where $\epsilon \sim N(0, 0.1)$.} \ENDIF \STATE Compute $\bar{z}=\frac{1}{n}\sum z$ @@ -916,9 +936,11 @@ Different types of modules (data generation, decider and evaluator) are presente \end{algorithmic} \end{algorithm} -\footnotetext{$logit^{-1}(x+z)>a \Leftrightarrow x+z > logit(a) \Leftrightarrow z > logit(a)-x$} +\subsection{Summary table} -\begin{table}[h!] +Summary table of different modules. + +\begin{table}[H] \centering \caption{Summary of modules (under construction)} \begin{tabular}{lll} @@ -927,8 +949,8 @@ Different types of modules (data generation, decider and evaluator) are presente \textbf{Data generator} & \textbf{Decider} & \textbf{Evaluator} \\ \midrule {\ul Without unobservables} & Independent decisions & {\ul Labeled outcomes} \\ - & 1. flip a coin by & \tabitem Data $\D$ with properties $\{x_i, t_i, y_i\}$ \\ - {\ul With unobservables} & $P(T=0|X, Z)$ & \tabitem acceptance rate r \\ + & 1. draw T from a Bernoulli & \tabitem Data $\D$ with properties $\{x_i, t_i, y_i\}$ \\ + {\ul With unobservables} & with $P(T=0|X, Z)$ & \tabitem acceptance rate r \\ \tabitem $P(Y=0|X, Z, W)$ & 2. determine with $F^{-1}(r)$ & \tabitem knowledge that X affects Y \\[.5\normalbaselineskip] {\ul With unobservables} & Non-independent decisions & {\ul True evaluation} \\ @@ -961,7 +983,7 @@ Different types of modules (data generation, decider and evaluator) are presente \label{tab:modules} \end{table} -\begin{thebibliography}{9} +\begin{thebibliography}{9} % Might have been apa \bibitem{dearteaga18} De-Arteaga, Maria. Learning Under Selective Labels in the Presence of Expert Consistency. 2018.