diff --git a/analysis_and_scripts/Analysis_07MAY2019_old.ipynb b/analysis_and_scripts/Analysis_07MAY2019_old.ipynb index afd68c061c69998237840fac2a892b6ef9aae3b8..e0e53d27d177d03e940d0fa27d88eb79f434a520 100644 --- a/analysis_and_scripts/Analysis_07MAY2019_old.ipynb +++ b/analysis_and_scripts/Analysis_07MAY2019_old.ipynb @@ -56,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -221,7 +221,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ diff --git a/analysis_and_scripts/Analysis_25JUN2019_modular.ipynb b/analysis_and_scripts/Analysis_25JUN2019_modular.ipynb index 3f501023444e62555540e0a8378515e2be1d76c2..7af029a0470e6ce72887d5341f611e62970fb013 100644 --- a/analysis_and_scripts/Analysis_25JUN2019_modular.ipynb +++ b/analysis_and_scripts/Analysis_25JUN2019_modular.ipynb @@ -7,7 +7,7 @@ }, "source": [ "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n", - "<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Data-generation-modules\" data-toc-modified-id=\"Data-generation-modules-1\"><span class=\"toc-item-num\">1 </span>Data generation modules</a></span></li><li><span><a href=\"#Decider-modules\" data-toc-modified-id=\"Decider-modules-2\"><span class=\"toc-item-num\">2 </span>Decider modules</a></span></li><li><span><a href=\"#Evaluator-modules\" data-toc-modified-id=\"Evaluator-modules-3\"><span class=\"toc-item-num\">3 </span>Evaluator modules</a></span><ul class=\"toc-item\"><li><span><a href=\"#Convenience-functions\" data-toc-modified-id=\"Convenience-functions-3.1\"><span class=\"toc-item-num\">3.1 </span>Convenience functions</a></span></li><li><span><a href=\"#Contraction-algorithm\" data-toc-modified-id=\"Contraction-algorithm-3.2\"><span class=\"toc-item-num\">3.2 </span>Contraction algorithm</a></span></li><li><span><a href=\"#Evaluators\" data-toc-modified-id=\"Evaluators-3.3\"><span class=\"toc-item-num\">3.3 </span>Evaluators</a></span></li></ul></li><li><span><a href=\"#Performance-comparison\" data-toc-modified-id=\"Performance-comparison-4\"><span class=\"toc-item-num\">4 </span>Performance comparison</a></span><ul class=\"toc-item\"><li><span><a href=\"#Without-unobservables-in-the-data\" data-toc-modified-id=\"Without-unobservables-in-the-data-4.1\"><span class=\"toc-item-num\">4.1 </span>Without unobservables in the data</a></span></li><li><span><a href=\"#With-unobservables-in-the-data\" data-toc-modified-id=\"With-unobservables-in-the-data-4.2\"><span class=\"toc-item-num\">4.2 </span>With unobservables in the data</a></span></li></ul></li></ul></div>" + "<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Data-generation-modules\" data-toc-modified-id=\"Data-generation-modules-1\"><span class=\"toc-item-num\">1 </span>Data generation modules</a></span></li><li><span><a href=\"#Decider-modules\" data-toc-modified-id=\"Decider-modules-2\"><span class=\"toc-item-num\">2 </span>Decider modules</a></span></li><li><span><a href=\"#Evaluator-modules\" data-toc-modified-id=\"Evaluator-modules-3\"><span class=\"toc-item-num\">3 </span>Evaluator modules</a></span><ul class=\"toc-item\"><li><span><a href=\"#Convenience-functions\" data-toc-modified-id=\"Convenience-functions-3.1\"><span class=\"toc-item-num\">3.1 </span>Convenience functions</a></span></li><li><span><a href=\"#Contraction-algorithm\" data-toc-modified-id=\"Contraction-algorithm-3.2\"><span class=\"toc-item-num\">3.2 </span>Contraction algorithm</a></span></li><li><span><a href=\"#Evaluators\" data-toc-modified-id=\"Evaluators-3.3\"><span class=\"toc-item-num\">3.3 </span>Evaluators</a></span></li></ul></li><li><span><a href=\"#Performance-comparison\" data-toc-modified-id=\"Performance-comparison-4\"><span class=\"toc-item-num\">4 </span>Performance comparison</a></span><ul class=\"toc-item\"><li><span><a href=\"#Without-unobservables-in-the-data\" data-toc-modified-id=\"Without-unobservables-in-the-data-4.1\"><span class=\"toc-item-num\">4.1 </span>Without unobservables in the data</a></span></li><li><span><a href=\"#With-unobservables-in-the-data\" data-toc-modified-id=\"With-unobservables-in-the-data-4.2\"><span class=\"toc-item-num\">4.2 </span>With unobservables in the data</a></span></li></ul></li><li><span><a href=\"#Bayesian-sampling\" data-toc-modified-id=\"Bayesian-sampling-5\"><span class=\"toc-item-num\">5 </span>Bayesian sampling</a></span></li></ul></div>" ] }, { @@ -19,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -68,14 +68,18 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 94, "metadata": {}, "outputs": [], "source": [ - "def sigmoid(x):\n", - " '''Return value of sigmoid function (inverse of logit) at x.'''\n", + "def inv_logit(x):\n", + " '''Return value of inverse of logit function (logistic / sigmoid) at x.'''\n", + "\n", + " return 1.0 / (1.0 + np.exp(-1.0 * x))\n", "\n", - " return 1 / (1 + np.exp(-1 * x))\n", + "\n", + "def logit(p):\n", + " return np.log(p) - np.log(1 - p)\n", "\n", "\n", "def coinFlipDGWithoutUnobservables(N_total=50000):\n", @@ -85,11 +89,11 @@ " # Sample feature X from standard Gaussian distribution, N(0, 1).\n", " df = df.assign(X=npr.normal(size=N_total))\n", "\n", - " # Calculate P(Y=0|X=x) = 1 / (1 + exp(-X)) = sigmoid(X)\n", - " df = df.assign(probabilities_Y=sigmoid(df.X))\n", + " # Calculate P(Y=0|X=x) = 1 / (1 + exp(-X)) = inv_logit(X)\n", + " df = df.assign(probabilities_Y=inv_logit(df.X))\n", "\n", - " # Draw Y ~ Bernoulli(1 - sigmoid(X))\n", - " # Note: P(Y=1|X=x) = 1 - P(Y=0|X=x) = 1 - sigmoid(X)\n", + " # Draw Y ~ Bernoulli(1 - inv_logit(X))\n", + " # Note: P(Y=1|X=x) = 1 - P(Y=0|X=x) = 1 - inv_logit(X)\n", " results = npr.binomial(n=1, p=1 - df.probabilities_Y, size=N_total)\n", "\n", " df = df.assign(result_Y=results)\n", @@ -107,7 +111,7 @@ " df = df.assign(W=npr.normal(size=N_total))\n", "\n", " # Calculate P(Y=0|X, Z, W)\n", - " probabilities_Y = sigmoid(beta_X * df.X + beta_Z * df.Z + beta_W * df.W)\n", + " probabilities_Y = inv_logit(beta_X * df.X + beta_Z * df.Z + beta_W * df.W)\n", "\n", " df = df.assign(probabilities_Y=probabilities_Y)\n", "\n", @@ -129,15 +133,13 @@ " df = df.assign(Z=npr.normal(size=N_total))\n", " df = df.assign(W=npr.normal(size=N_total))\n", "\n", - " # Calculate P(Y=0|X=x) = 1 / (1 + exp(-X)) = sigmoid(X)\n", - " probabilities_Y = sigmoid(beta_X * df.X + beta_Z * df.Z + beta_W * df.W)\n", + " # Calculate P(Y=0|X=x) = 1 / (1 + exp(-X)) = inv_logit(X)\n", + " probabilities_Y = inv_logit(beta_X * df.X + beta_Z * df.Z + beta_W * df.W)\n", "\n", " df = df.assign(probabilities_Y=probabilities_Y)\n", "\n", " # Draw Y from Bernoulli distribution\n", - " results = npr.binomial(n=1,\n", - " p=1 - df.probabilities_Y,\n", - " size=N_total)\n", + " results = npr.binomial(n=1, p=1 - df.probabilities_Y, size=N_total)\n", "\n", " df = df.assign(result_Y=results)\n", "\n", @@ -153,17 +155,18 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 93, "metadata": {}, "outputs": [], "source": [ "def humanDeciderLakkaraju(df,\n", " result_Y,\n", " featureX_col,\n", - " featureZ_col,\n", + " featureZ_col=None,\n", " nJudges_M=100,\n", " beta_X=1,\n", " beta_Z=1,\n", + " add_epsilon=True,\n", " hide_unobserved=True):\n", "\n", " # Assert that every judge will have the same number of subjects.\n", @@ -182,8 +185,14 @@ " # Replicate the rates so they can be attached to the corresponding judge ID.\n", " df = df.assign(acceptanceRate_R=np.repeat(acceptance_rates, nSubjects_N))\n", "\n", - " probabilities_T = sigmoid(beta_X * df[featureX_col] + beta_Z * df[featureZ_col])\n", - " probabilities_T += np.sqrt(0.1) * npr.normal(size=nJudges_M * nSubjects_N)\n", + " if featureZ_col is None:\n", + " probabilities_T = inv_logit(beta_X * df[featureX_col])\n", + " else:\n", + " probabilities_T = inv_logit(\n", + " beta_X * df[featureX_col] + beta_Z * df[featureZ_col])\n", + "\n", + " if add_epsilon:\n", + " probabilities_T += np.sqrt(0.1) * npr.normal(size=df.shape[0])\n", "\n", " df = df.assign(probabilities_T=probabilities_T)\n", "\n", @@ -203,7 +212,7 @@ " df['decision_T'] = np.where((df.index.values % nSubjects_N) <\n", " ((1 - df['acceptanceRate_R']) * nSubjects_N),\n", " 0, 1)\n", - " \n", + "\n", " if hide_unobserved:\n", " df.loc[df.decision_T == 0, result_Y] = np.nan\n", "\n", @@ -212,10 +221,11 @@ "\n", "def coinFlipDecider(df,\n", " featureX_col,\n", - " featureZ_col,\n", + " featureZ_col=None,\n", " nJudges_M=100,\n", " beta_X=1,\n", " beta_Z=1,\n", + " add_epsilon=False,\n", " hide_unobserved=True):\n", "\n", " # Assert that every judge will have the same number of subjects.\n", @@ -227,18 +237,20 @@ " # Assign judge IDs as running numbering from 0 to nJudges_M - 1\n", " df = df.assign(judgeID_J=np.repeat(range(0, nJudges_M), nSubjects_N))\n", "\n", - " # Sample acceptance rates uniformly from a closed interval\n", - " # from 0.1 to 0.9 and round to tenth decimal place.\n", - " #acceptance_rates = np.round(npr.uniform(.1, .9, nJudges_M), 10)\n", - " \n", - " # No real leniency here???\n", - " acceptance_rates = np.ones(nJudges_M)*0.5\n", - " \n", + " # No real leniency here -> set to 0.5.\n", + " acceptance_rates = np.ones(nJudges_M) * 0.5\n", + "\n", " # Replicate the rates so they can be attached to the corresponding judge ID.\n", " df = df.assign(acceptanceRate_R=np.repeat(acceptance_rates, nSubjects_N))\n", "\n", - " probabilities_T = sigmoid(beta_X * df[featureX_col] + beta_Z * df[featureZ_col])\n", - " #probabilities_T += np.sqrt(0.1) * npr.normal(size=nJudges_M * nSubjects_N)\n", + " if featureZ_col is None:\n", + " probabilities_T = inv_logit(beta_X * df[featureX_col])\n", + " else:\n", + " probabilities_T = inv_logit(\n", + " beta_X * df[featureX_col] + beta_Z * df[featureZ_col])\n", + "\n", + " if add_epsilon:\n", + " probabilities_T += np.sqrt(0.1) * npr.normal(size=df.shape[0])\n", "\n", " df = df.assign(probabilities_T=probabilities_T)\n", "\n", @@ -246,10 +258,64 @@ " decisions = npr.binomial(n=1, p=1 - df.probabilities_T, size=df.shape[0])\n", "\n", " df = df.assign(decision_T=decisions)\n", - " \n", + "\n", " if hide_unobserved:\n", " df.loc[df.decision_T == 0, 'result_Y'] = np.nan\n", + "\n", + " return df\n", + "\n", + "\n", + "def quantileDecider(df,\n", + " featureX_col,\n", + " featureZ_col=None,\n", + " nJudges_M=100,\n", + " beta_X=1,\n", + " beta_Z=1,\n", + " add_epsilon=False,\n", + " hide_unobserved=True,\n", + " N_sim = int(1e7)):\n", + "\n", + " # Assert that every judge will have the same number of subjects.\n", + " assert df.shape[0] % nJudges_M == 0, \"Can't assign subjets evenly!\"\n", + "\n", + " # Compute the number of subjects allocated for each judge.\n", + " nSubjects_N = int(df.shape[0] / nJudges_M)\n", + "\n", + " # Assign judge IDs as running numbering from 0 to nJudges_M - 1\n", + " df = df.assign(judgeID_J=np.repeat(range(0, nJudges_M), nSubjects_N))\n", + "\n", + " # Sample acceptance rates uniformly from a closed interval\n", + " # from 0.1 to 0.9 and round to tenth decimal place.\n", + " acceptance_rates = np.round(npr.uniform(.1, .9, nJudges_M), 10)\n", + "\n", + " # Replicate the rates so they can be attached to the corresponding judge ID.\n", + " df = df.assign(acceptanceRate_R=np.repeat(acceptance_rates, nSubjects_N))\n", + "\n", + " if featureZ_col is None:\n", + " probabilities_T = inv_logit(beta_X * df[featureX_col])\n", + " else:\n", + " probabilities_T = inv_logit(\n", + " beta_X * df[featureX_col] + beta_Z * df[featureZ_col])\n", + "\n", + " if add_epsilon:\n", + " probabilities_T += np.sqrt(0.1) * npr.normal(size=df.shape[0])\n", + "\n", + " df = df.assign(probabilities_T=probabilities_T)\n", + "\n", + " # Construct the quantile function by sampling from two Gaussians and using\n", + " # their sum through the inverse of logit.\n", + " probs = inv_logit(npr.normal(size=N_sim) + npr.normal(size=N_sim))\n", + "\n", + " # Now if P(Y=0|X, Z) (named 'probabilities_T') is greater than Q(r), a.k.a\n", + " # the rth quantile of 'probs', T will be 0 and 1 otherwise.\n", + " df = df.assign(quantile_bounds = np.quantile(probs, df.acceptanceRate_R))\n", " \n", + " df = df.assign(decision_T=np.where(\n", + " df.probabilities_T >= df.quantile_bounds, 0, 1))\n", + "\n", + " if hide_unobserved:\n", + " df.loc[df.decision_T == 0, 'result_Y'] = np.nan\n", + "\n", " return df" ] }, @@ -264,7 +330,7 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -385,7 +451,7 @@ " y_values = scs.norm.pdf(x_values)\n", "\n", " results = np.zeros(x_0.shape[0])\n", - " print(\"en loop\")\n", + "\n", " for i in range(x_0.shape[0]):\n", "\n", " y_copy = y_values.copy()\n", @@ -393,7 +459,7 @@ " y_copy[x_preds > prediction_x_0[i]] = 0\n", " \n", " results[i] = si.simps(y_copy, x=x_values)\n", - " print(\"jlk loop\")\n", + "\n", " return results\n", "\n", "\n", @@ -457,7 +523,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -519,7 +585,7 @@ }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 218, "metadata": {}, "outputs": [], "source": [ @@ -559,7 +625,7 @@ "\n", " test.sort_values(by='B_prob_0_model', inplace=True, ascending=True)\n", "\n", - " to_release = int(round(test.shape[0] * r / 10))\n", + " to_release = int(round(test.shape[0] * r))\n", "\n", " return np.sum(test[resultY_col][0:to_release] == 0) / test.shape[0]\n", "\n", @@ -581,7 +647,7 @@ " inplace=False,\n", " ascending=True)\n", "\n", - " to_release = int(round(test_observed.shape[0] * r / 10))\n", + " to_release = int(round(test_observed.shape[0] * r))\n", "\n", " return np.sum(\n", " test_observed[resultY_col][0:to_release] == 0) / test.shape[0]\n", @@ -591,7 +657,7 @@ " accRateR_col, r):\n", "\n", " # Get judges with correct leniency as list\n", - " is_correct_leniency = df[accRateR_col].round(1) == r / 10\n", + " is_correct_leniency = df[accRateR_col].round(1) == r\n", "\n", " correct_leniency_list = df.loc[is_correct_leniency, judgeIDJ_col]\n", "\n", @@ -614,9 +680,58 @@ "\n", " test = test.assign(B_prob_0_model=predictions)\n", "\n", - " released = cdf(test[featureX_col], B_model, 0) < r / 10\n", + " released = cdf(test[featureX_col], B_model, 0) < r\n", "\n", - " return np.mean(test.B_prob_0_model * released)" + " return np.mean(test.B_prob_0_model * released)\n", + "\n", + "\n", + "def monteCarloEvaluator(df,\n", + " featureX_col,\n", + " decisionT_col,\n", + " resultY_col,\n", + " accRateR_col,\n", + " r,\n", + " N_sim=int(1e6)):\n", + "\n", + " train, test = train_test_split(df, test_size=0.5)\n", + "\n", + " B_model, predictions = fitPredictiveModel(\n", + " train.loc[train[decisionT_col] == 1, featureX_col],\n", + " train.loc[train[decisionT_col] == 1, resultY_col], test[featureX_col],\n", + " 0)\n", + "\n", + " test = test.assign(B_prob_0_model=predictions)\n", + "\n", + " quants = inv_logit(npr.normal(size=N_sim) + npr.normal(size=N_sim))\n", + "\n", + " Z = npr.normal(size=N_sim)\n", + " e = np.sqrt(0.1) * npr.normal(size=N_sim)\n", + " test = test.assign(predicted_Y=np.zeros_like(test[featureX_col]))\n", + "\n", + " q_r = np.quantile(quants, test[accRateR_col])\n", + "\n", + " test = test.assign(bounds=logit(q_r) - test[featureX_col])\n", + "\n", + " for i in range(test.shape[0]):\n", + " Zp = []\n", + " if test[decisionT_col].iloc[i] == 0:\n", + " Zp = Z[Z + e > test.bounds.iloc[i]]\n", + " else:\n", + " Zp = Z[Z + e < test.bounds.iloc[i]]\n", + "\n", + " test.iloc[i, test.columns == 'predicted_Y'] = inv_logit(\n", + " test[featureX_col].iloc[i] + np.mean(Zp))\n", + "\n", + " predictions = npr.binomial(n=1, p=1-test.predicted_Y, size=test.shape[0])\n", + "\n", + " test[resultY_col] = np.where(\n", + " test[decisionT_col] == 0, predictions, test[resultY_col])\n", + "\n", + " test.sort_values(by='B_prob_0_model', inplace=True, ascending=True)\n", + "\n", + " to_release = int(round(test.shape[0] * r))\n", + "\n", + " return np.sum(test[resultY_col][0:to_release] == 0) / test.shape[0]" ] }, { @@ -630,186 +745,182 @@ }, { "cell_type": "markdown", - "metadata": { - "heading_collapsed": true - }, + "metadata": {}, "source": [ - "### Without unobservables in the data\n", - "\n", - "The underlying figure is attached to the preliminary paper. When conducting finalization, last analysis should be conducted with a preset random seed." + "### Without unobservables in the data" ] }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 217, "metadata": { - "hidden": true, "scrolled": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1] 0 1 2 3 4 5 6 7 8 9 [2] 0 1 2 3 4 5 6 7 8 9 [3] 0 1 2 3 4 5 6 7 8 9 [4] 0 1 2 3 4 5 6 7 8 9 [5] 0 1 2 3 4 5 6 7 8 9 [6] 0 1 2 3 4 5 6 7 8 9 [7] 0 1 2 3 4 5 6 7 8 9 [8] 0 1 2 3 4 5 6 7 8 9 " + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 720x432 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0.015424 0.005856 0.00700873 0.01609152 0.015964 ]\n", + " [0.042124 0.015944 0.02125723 0.04435839 0.043916 ]\n", + " [0.075464 0.02872 0.04810652 0.07724867 0.08046 ]\n", + " [0.1153 0.043196 0.10059008 0.10665967 0.124604 ]\n", + " [0.163496 0.0588 0.16363783 0.15052531 0.177884 ]\n", + " [0.216236 0.079128 0.24141272 0.21342228 0.236504 ]\n", + " [0.275828 0.101944 0.31497867 0.28211075 0.299696 ]\n", + " [0.341464 0.1261 0.40679724 0.33790021 0.37086 ]]\n", + "\n", + "Mean absolute errors:\n", + "0.09820600000000002\n", + "0.025143988486187003\n", + "0.004869733260249362\n", + "0.013068999999999989\n" + ] + } + ], "source": [ - "# f_rates = np.zeros((8, 5))\n", - "# f_sems = np.zeros((8, 5))\n", + "failure_rates = np.zeros((8, 5))\n", + "failure_sems = np.zeros((8, 5))\n", "\n", - "# nIter = 15\n", + "nIter = 10\n", "\n", - "# #npr.seed(0)\n", + "for r in np.arange(1, 9):\n", "\n", - "# for r in np.arange(1, 9):\n", + " print(\"[\", r, \"]\", sep='', end=\" \")\n", "\n", - "# print(\"[\", r, \"]\", sep='', end=\" \")\n", + " f_rate_true = np.zeros(nIter)\n", + " f_rate_label = np.zeros(nIter)\n", + " f_rate_human = np.zeros(nIter)\n", + " f_rate_cont = np.zeros(nIter)\n", + " f_rate_caus = np.zeros(nIter)\n", "\n", - "# s_f_rate_true = np.zeros(nIter)\n", - "# s_f_rate_labeled = np.zeros(nIter)\n", - "# s_f_rate_human = np.zeros(nIter)\n", - "# s_f_rate_cont = np.zeros(nIter)\n", - "# s_f_rate_caus = np.zeros(nIter)\n", + " for i in range(nIter):\n", "\n", - "# for i in range(nIter):\n", + " print(i, end=\" \")\n", + "\n", + " # Create data\n", + " df = coinFlipDGWithoutUnobservables()\n", + "\n", + " # Decider\n", + " df_labeled = quantileDecider(df,\n", + " featureX_col=\"X\",\n", + " featureZ_col=None,\n", + " nJudges_M=100,\n", + " beta_X=1,\n", + " beta_Z=1,\n", + " hide_unobserved=True)\n", + "\n", + " df_unlabeled = quantileDecider(df,\n", + " featureX_col=\"X\",\n", + " featureZ_col=None,\n", + " nJudges_M=100,\n", + " beta_X=1,\n", + " beta_Z=1,\n", + " hide_unobserved=False)\n", + "\n", + " # True evaluation\n", "\n", - "# print(i, end=\" \")\n", + " f_rate_true[i] = trueEvaluationEvaluator(df_unlabeled, 'X',\n", + " 'decision_T', 'result_Y',\n", + " r / 10)\n", "\n", - "# s_train_labeled, s_train, s_test_labeled, s_test, s_df = dataWithoutUnobservables(sigma=2)\n", + " # Labeled outcomes only\n", "\n", - "# s_logreg, predictions = fitPredictiveModel(\n", - "# s_train_labeled.dropna().X,\n", - "# s_train_labeled.dropna().result_Y, s_test.X, 0)\n", - "# s_test = s_test.assign(B_prob_0_model=predictions)\n", + " f_rate_label[i] = labeledOutcomesEvaluator(df_labeled, 'X',\n", + " 'decision_T', 'result_Y',\n", + " r / 10)\n", "\n", - "# s_logreg, predictions_labeled = fitPredictiveModel(\n", - "# s_train_labeled.dropna().X,\n", - "# s_train_labeled.dropna().result_Y, s_test_labeled.X, 0)\n", - "# s_test_labeled = s_test_labeled.assign(\n", - "# B_prob_0_model=predictions_labeled)\n", + " # Human evaluation\n", "\n", - "# #### True evaluation\n", - "# # Sort by actual failure probabilities, subjects with the smallest risk are first.\n", - "# s_sorted = s_test.sort_values(by='B_prob_0_model',\n", - "# inplace=False,\n", - "# ascending=True)\n", - "\n", - "# to_release = int(round(s_sorted.shape[0] * r / 10))\n", - "\n", - "# # Calculate failure rate as the ratio of failures to successes among those\n", - "# # who were given a positive decision, i.e. those whose probability of negative\n", - "# # outcome was low enough.\n", - "# s_f_rate_true[i] = np.sum(\n", - "# s_sorted.result_Y[0:to_release] == 0) / s_sorted.shape[0]\n", - "\n", - "# #### Labeled outcomes\n", - "# # Sort by estimated failure probabilities, subjects with the smallest risk are first.\n", - "# s_sorted = s_test_labeled.sort_values(by='B_prob_0_model',\n", - "# inplace=False,\n", - "# ascending=True)\n", - "\n", - "# to_release = int(round(s_test_labeled.dropna().shape[0] * r / 10))\n", - "\n", - "# # Calculate failure rate as the ratio of failures to successes among those\n", - "# # who were given a positive decision, i.e. those whose probability of negative\n", - "# # outcome was low enough.\n", - "# s_f_rate_labeled[i] = np.sum(\n", - "# s_sorted.result_Y[0:to_release] == 0) / s_sorted.shape[0]\n", - "\n", - "# #### Human error rate\n", - "# # Get judges with correct leniency as list\n", - "# correct_leniency_list = s_test_labeled.judgeID_J[\n", - "# s_test_labeled['acceptanceRate_R'].round(1) == r / 10].values\n", - "\n", - "# # Released are the people they judged and released, T = 1\n", - "# released = s_test_labeled[\n", - "# s_test_labeled.judgeID_J.isin(correct_leniency_list)\n", - "# & (s_test_labeled.decision_T == 1)]\n", - "\n", - "# # Get their failure rate, aka ratio of reoffenders to number of people judged in total\n", - "# s_f_rate_human[i] = np.sum(\n", - "# released.result_Y == 0) / correct_leniency_list.shape[0]\n", - "\n", - "# #### Contraction\n", - "# s_f_rate_cont[i] = contraction(s_test_labeled, 'judgeID_J',\n", - "# 'decision_T', 'result_Y',\n", - "# 'B_prob_0_model', 'acceptanceRate_R',\n", - "# r / 10)\n", - "# #### Causal model\n", - "\n", - "# #released = bailIndicator(r * 10, s_logreg, s_train.X, s_test.X)\n", - "# released=0\n", - "# #released = cdf(s_test.X, s_logreg, 0) < r / 10\n", - "\n", - "# s_f_rate_caus[i] = np.mean(s_test.B_prob_0_model * released)\n", - "\n", - "# ########################\n", - "# #percentiles = estimatePercentiles(s_train_labeled.X, s_logreg)\n", - "\n", - "# #def releaseProbability(x):\n", - "# # return calcReleaseProbabilities(r * 10,\n", - "# # s_train_labeled.X,\n", - "# # x,\n", - "# # s_logreg,\n", - "# # percentileMatrix=percentiles)\n", - "\n", - "# #def integrand(x):\n", - "# # p_y0 = s_logreg.predict_proba(x.reshape(-1, 1))[:, 0]\n", - "\n", - "# # p_t1 = releaseProbability(x)\n", - "\n", - "# # p_x = scs.norm.pdf(x)\n", - "\n", - "# # return p_y0 * p_t1 * p_x\n", - "\n", - "# #s_f_rate_caus[i] = si.quad(lambda x: integrand(np.ones((1, 1)) * x),\n", - "# # -10, 10)[0]\n", - "\n", - "# f_rates[r - 1, 0] = np.mean(s_f_rate_true)\n", - "# f_rates[r - 1, 1] = np.mean(s_f_rate_labeled)\n", - "# f_rates[r - 1, 2] = np.mean(s_f_rate_human)\n", - "# f_rates[r - 1, 3] = np.mean(s_f_rate_cont)\n", - "# f_rates[r - 1, 4] = np.mean(s_f_rate_caus)\n", - "\n", - "# f_sems[r - 1, 0] = scs.sem(s_f_rate_true)\n", - "# f_sems[r - 1, 1] = scs.sem(s_f_rate_labeled)\n", - "# f_sems[r - 1, 2] = scs.sem(s_f_rate_human)\n", - "# f_sems[r - 1, 3] = scs.sem(s_f_rate_cont)\n", - "# f_sems[r - 1, 4] = scs.sem(s_f_rate_caus)\n", - "\n", - "# x_ax = np.arange(0.1, 0.9, 0.1)\n", - "\n", - "# plt.errorbar(x_ax,\n", - "# f_rates[:, 0],\n", - "# label='True Evaluation',\n", - "# c='green',\n", - "# yerr=f_sems[:, 0])\n", - "# plt.errorbar(x_ax,\n", - "# f_rates[:, 1],\n", - "# label='Labeled outcomes',\n", - "# c='magenta',\n", - "# yerr=f_sems[:, 1])\n", - "# plt.errorbar(x_ax,\n", - "# f_rates[:, 2],\n", - "# label='Human evaluation',\n", - "# c='red',\n", - "# yerr=f_sems[:, 2])\n", - "# plt.errorbar(x_ax,\n", - "# f_rates[:, 3],\n", - "# label='Contraction, log.',\n", - "# c='blue',\n", - "# yerr=f_sems[:, 3])\n", - "# # plt.errorbar(x_ax,\n", - "# # f_rates[:, 4],\n", - "# # label='Causal model, ep',\n", - "# # c='black',\n", - "# # yerr=f_sems[:, 4])\n", - "\n", - "# plt.title('Failure rate vs. Acceptance rate without unobservables')\n", - "# plt.xlabel('Acceptance rate')\n", - "# plt.ylabel('Failure rate')\n", - "# plt.legend()\n", - "# plt.grid()\n", - "# plt.show()\n", - "\n", - "# print(f_rates)\n", - "# print(\"\\nMean absolute errors:\")\n", - "# for i in range(1, f_rates.shape[1]):\n", - "# print(np.mean(np.abs(f_rates[:, 0] - f_rates[:, i])))" + " f_rate_human[i] = humanEvaluationEvaluator(df_labeled, 'judgeID_J',\n", + " 'decision_T', 'result_Y',\n", + " 'acceptanceRate_R', r / 10)\n", + "\n", + " # Contraction\n", + "\n", + " f_rate_cont[i] = contractionEvaluator(df_labeled, 'X', 'judgeID_J',\n", + " 'decision_T', 'result_Y',\n", + " 'acceptanceRate_R', r / 10)\n", + "\n", + " # Causal model - empirical performance\n", + "\n", + " #f_rate_caus[i] = causalEvaluator(df_labeled, 'X', 'decision_T',\n", + " # 'result_Y', r / 10)\n", + "\n", + " f_rate_caus[i] = monteCarloEvaluator(df_labeled, 'X', 'decision_T',\n", + " 'result_Y', 'acceptanceRate_R',\n", + " r / 10, N_sim=int(1e5))\n", + "\n", + " failure_rates[r - 1, 0] = np.mean(f_rate_true)\n", + " failure_rates[r - 1, 1] = np.mean(f_rate_label)\n", + " failure_rates[r - 1, 2] = np.mean(f_rate_human)\n", + " failure_rates[r - 1, 3] = np.mean(f_rate_cont)\n", + " failure_rates[r - 1, 4] = np.mean(f_rate_caus)\n", + "\n", + " failure_sems[r - 1, 0] = scs.sem(f_rate_true)\n", + " failure_sems[r - 1, 1] = scs.sem(f_rate_label)\n", + " failure_sems[r - 1, 2] = scs.sem(f_rate_human)\n", + " failure_sems[r - 1, 3] = scs.sem(f_rate_cont)\n", + " failure_sems[r - 1, 4] = scs.sem(f_rate_caus)\n", + "\n", + "x_ax = np.arange(0.1, 0.9, 0.1)\n", + "\n", + "plt.errorbar(x_ax,\n", + " failure_rates[:, 0],\n", + " label='True Evaluation',\n", + " c='green',\n", + " yerr=failure_sems[:, 0])\n", + "plt.errorbar(x_ax,\n", + " failure_rates[:, 1],\n", + " label='Labeled outcomes',\n", + " c='magenta',\n", + " yerr=failure_sems[:, 1])\n", + "plt.errorbar(x_ax,\n", + " failure_rates[:, 2],\n", + " label='Human evaluation',\n", + " c='red',\n", + " yerr=failure_sems[:, 2])\n", + "plt.errorbar(x_ax,\n", + " failure_rates[:, 3],\n", + " label='Contraction',\n", + " c='blue',\n", + " yerr=failure_sems[:, 3])\n", + "plt.errorbar(x_ax,\n", + " failure_rates[:, 4],\n", + " label='Monte Carlo',\n", + " c='black',\n", + " yerr=failure_sems[:, 4])\n", + "\n", + "plt.title('Failure rate vs. Acceptance rate without Z (modular)')\n", + "plt.xlabel('Acceptance rate')\n", + "plt.ylabel('Failure rate')\n", + "plt.legend()\n", + "plt.grid()\n", + "plt.show()\n", + "\n", + "print(failure_rates)\n", + "print(\"\\nMean absolute errors:\")\n", + "for i in range(1, failure_rates.shape[1]):\n", + " print(np.mean(np.abs(failure_rates[:, 0] - failure_rates[:, i])))" ] }, { @@ -823,7 +934,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 216, "metadata": { "scrolled": false }, @@ -832,44 +943,39 @@ "name": "stdout", "output_type": "stream", "text": [ - "[1] 0 en loop\n" + "[1] 0 1 2 3 4 5 6 7 8 9 [2] 0 1 2 3 4 5 6 7 8 9 [3] 0 1 2 3 4 5 6 7 8 9 [4] 0 1 2 3 4 5 6 7 8 9 [5] 0 1 2 3 4 5 6 7 8 9 [6] 0 1 2 3 4 5 6 7 8 9 [7] 0 1 2 3 4 5 6 7 8 9 [8] 0 1 2 3 4 5 6 7 8 9 " ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/rikulain/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:78: RuntimeWarning: invalid value encountered in long_scalars\n" - ] + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 720x432 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ - "jlk loop\n", - "1 en loop\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/rikulain/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:78: RuntimeWarning: invalid value encountered in long_scalars\n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-97-03cd8a3c6103>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 66\u001b[0m f_rate_caus[i] = causalEvaluator(df_labeled, 'X', 'decision_T',\n\u001b[0;32m---> 67\u001b[0;31m 'result_Y', r / 10)\n\u001b[0m\u001b[1;32m 68\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0mfailure_rates\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mr\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf_rate_true\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m<ipython-input-96-4ef6c6b281a2>\u001b[0m in \u001b[0;36mcausalEvaluator\u001b[0;34m(df, featureX_col, decisionT_col, resultY_col, r)\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0mtest\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massign\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mB_prob_0_model\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpredictions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 92\u001b[0;31m \u001b[0mreleased\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcdf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfeatureX_col\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mB_model\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mr\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 93\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mB_prob_0_model\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mreleased\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m<ipython-input-94-f0303c92af6c>\u001b[0m in \u001b[0;36mcdf\u001b[0;34m(x_0, model, class_value)\u001b[0m\n\u001b[1;32m 123\u001b[0m \u001b[0my_copy\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mx_preds\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0mprediction_x_0\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 125\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msimps\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_copy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mx_values\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 126\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"jlk loop\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 127\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/scipy/integrate/quadrature.py\u001b[0m in \u001b[0;36msimps\u001b[0;34m(y, x, dx, axis, even)\u001b[0m\n\u001b[1;32m 477\u001b[0m \u001b[0mfirst_dx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtuple\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mslice2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtuple\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mslice1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 478\u001b[0m \u001b[0mval\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m0.5\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mfirst_dx\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mslice2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mslice1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 479\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0m_basic_simps\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mN\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 480\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0meven\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'avg'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 481\u001b[0m \u001b[0mval\u001b[0m \u001b[0;34m/=\u001b[0m \u001b[0;36m2.0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/scipy/integrate/quadrature.py\u001b[0m in \u001b[0;36m_basic_simps\u001b[0;34m(y, start, stop, x, dx, axis)\u001b[0m\n\u001b[1;32m 358\u001b[0m \u001b[0mh0divh1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mh0\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mh1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 359\u001b[0m tmp = hsum/6.0 * (y[slice0]*(2-1.0/h0divh1) +\n\u001b[0;32m--> 360\u001b[0;31m \u001b[0my\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mslice1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mhsum\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mhsum\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0mhprod\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 361\u001b[0m y[slice2]*(2-h0divh1))\n\u001b[1;32m 362\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtmp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + "[[0.019332 0.007024 0.01164503 0.02060147 0.0186 ]\n", + " [0.049196 0.01716 0.02757158 0.04587469 0.048304 ]\n", + " [0.0849 0.029032 0.05524133 0.0797729 0.083684 ]\n", + " [0.12758 0.04208 0.09012099 0.1207086 0.127128 ]\n", + " [0.175592 0.055572 0.14012835 0.17227871 0.173824 ]\n", + " [0.22782 0.076412 0.19436587 0.22492832 0.227844 ]\n", + " [0.285572 0.09602 0.25768682 0.28237595 0.286128 ]\n", + " [0.349356 0.107448 0.32420604 0.34169562 0.354052 ]]\n", + "\n", + "Mean absolute errors:\n", + "0.111075\n", + "0.02729774880878927\n", + "0.004206334614321109\n", + "0.0012920000000000015\n" ] } ], @@ -877,7 +983,7 @@ "failure_rates = np.zeros((8, 5))\n", "failure_sems = np.zeros((8, 5))\n", "\n", - "nIter = 8\n", + "nIter = 10\n", "\n", "for r in np.arange(1, 9):\n", "\n", @@ -897,7 +1003,7 @@ " df = coinFlipDGWithUnobservables()\n", "\n", " # Decider\n", - " df_labeled = coinFlipDecider(df,\n", + " df_labeled = quantileDecider(df,\n", " featureX_col=\"X\",\n", " featureZ_col=\"Z\",\n", " nJudges_M=100,\n", @@ -905,7 +1011,7 @@ " beta_Z=1,\n", " hide_unobserved=True)\n", "\n", - " df_unlabeled = coinFlipDecider(df,\n", + " df_unlabeled = quantileDecider(df,\n", " featureX_col=\"X\",\n", " featureZ_col=\"Z\",\n", " nJudges_M=100,\n", @@ -939,8 +1045,12 @@ "\n", " # Causal model - empirical performance\n", "\n", - " f_rate_caus[i] = causalEvaluator(df_labeled, 'X', 'decision_T',\n", - " 'result_Y', r / 10)\n", + " #f_rate_caus[i] = causalEvaluator(df_labeled, 'X', 'decision_T',\n", + " # 'result_Y', r / 10)\n", + "\n", + " f_rate_caus[i] = monteCarloEvaluator(df_labeled, 'X', 'decision_T',\n", + " 'result_Y', 'acceptanceRate_R',\n", + " r / 10, N_sim=int(1e5))\n", "\n", " failure_rates[r - 1, 0] = np.mean(f_rate_true)\n", " failure_rates[r - 1, 1] = np.mean(f_rate_label)\n", @@ -973,16 +1083,16 @@ " yerr=failure_sems[:, 2])\n", "plt.errorbar(x_ax,\n", " failure_rates[:, 3],\n", - " label='Contraction, log.',\n", + " label='Contraction',\n", " c='blue',\n", " yerr=failure_sems[:, 3])\n", "plt.errorbar(x_ax,\n", " failure_rates[:, 4],\n", - " label='Causal model, ep',\n", + " label='Monte Carlo',\n", " c='black',\n", " yerr=failure_sems[:, 4])\n", "\n", - "plt.title('Failure rate vs. Acceptance rate with unobservables')\n", + "plt.title('Failure rate vs. Acceptance rate with Z (modular)')\n", "plt.xlabel('Acceptance rate')\n", "plt.ylabel('Failure rate')\n", "plt.legend()\n", @@ -995,75 +1105,101 @@ " print(np.mean(np.abs(failure_rates[:, 0] - failure_rates[:, i])))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Bayesian sampling" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": { - "scrolled": true + "scrolled": false }, "outputs": [], "source": [ - "import pystan\n", - "\n", - "code = \"\"\"\n", - "functions{\n", - " // below taken from https://discourse.mc-stan.org/t/quantile-function-in-stan/3642/13\n", - " // as Stan doesn't have a quantile function nor supports real-to-int conversion.\n", - " int ub(real x) {\n", - " int ub = 1;\n", - " while (ub < x) ub *= 2;\n", - " return ub;\n", - " }\n", - "\n", - " int closest(real x, int a, int b) {\n", - " return fabs(x - a) < fabs(x - b) ? a : b;\n", - " }\n", - "\n", - " // L <= x <= U\n", - " int to_int_bsearch(real x, int L, int U);\n", - "\n", - " int to_int_bsearch(real x, int L, int U) {\n", - " int mid = (L + U) / 2;\n", - " if (L == U) return L;\n", - " if (L + 1 == U) return closest(x, L, U);\n", - " return x <= mid? to_int_bsearch(x, L, mid) : to_int_bsearch(x, mid, U);\n", - " }\n", - "\n", - " int to_int(real x);\n", - "\n", - " int to_int(real x) {\n", - " if (fabs(x) >= 2^31) reject(\"to_int arugment must be < 2^31, found x = \", x);\n", - " if (x < 0) return -to_int(-x);\n", - " return to_int_bsearch(x, 0, ub(x));\n", - " }\n", - "}\n", - "\n", - "data {\n", - " int<lower=0> N;\n", - " int<lower=0> N_quantiles;\n", - " real<lower=0, upper=1> r;\n", - " int<lower=0, upper=1> decision[N];\n", - " real X[N];\n", - " real<lower=0, upper=1> quantiles[N_quantiles];\n", - "}\n", - "\n", - "parameters {\n", - " real Z[N];\n", - "}\n", - "\n", - "model {\n", - " Z ~ normal(0, 1);\n", + "# import pystan\n", + "\n", + "# code = \"\"\"\n", + "# functions{\n", + "# // below taken from https://discourse.mc-stan.org/t/quantile-function-in-stan/3642/13\n", + "# // as Stan doesn't have a quantile function nor supports real-to-int conversion.\n", + "# int ub(real x) {\n", + "# int ub = 1;\n", + "# while (ub < x) ub *= 2;\n", + "# return ub;\n", + "# }\n", + "\n", + "# int closest(real x, int a, int b) {\n", + "# return fabs(x - a) < fabs(x - b) ? a : b;\n", + "# }\n", + "\n", + "# // L <= x <= U\n", + "# int to_int_bsearch(real x, int L, int U);\n", + "\n", + "# int to_int_bsearch(real x, int L, int U) {\n", + "# int mid = (L + U) / 2;\n", + "# if (L == U) return L;\n", + "# if (L + 1 == U) return closest(x, L, U);\n", + "# return x <= mid? to_int_bsearch(x, L, mid) : to_int_bsearch(x, mid, U);\n", + "# }\n", + "\n", + "# int to_int(real x);\n", + "\n", + "# int to_int(real x) {\n", + "# if (fabs(x) >= 2^31) reject(\"to_int arugment must be < 2^31, found x = \", x);\n", + "# if (x < 0) return -to_int(-x);\n", + "# return to_int_bsearch(x, 0, ub(x));\n", + "# }\n", + "# }\n", + "\n", + "# data {\n", + "# int<lower=0> N;\n", + "# int<lower=0> N_quantiles;\n", + "# real<lower=0, upper=1> r[N];\n", + "# int<lower=0, upper=1> decision[N];\n", + "# real X[N];\n", + "# real<lower=0, upper=1> quantiles[N_quantiles];\n", + "# }\n", + "\n", + "# parameters {\n", + "# real Z[N];\n", + "# real<lower=0, upper=1> p;\n", + "# }\n", + "\n", + "# model {\n", + "# Z ~ normal(0, 1);\n", " \n", - " for(i in 1:N){\n", - " decision ~ bernoulli(inv_logit(X[i] + Z[i]) >= quantiles[to_int(r*N_quantiles)] ? 1 : 0);\n", - " }\n", - "}\n", - "\"\"\"\n", - "\n", - "dat = dict()\n", - "\n", - "sm = pystan.StanModel(model_code=code)\n", - "fit = sm.sampling(data=dat, iter=4000, chains=4)" + "# for(i in 1:N){\n", + "# if (inv_logit(X[i] + Z[i]) <= quantiles[to_int(r[i]*N_quantiles)])\n", + "# decision ~ bernoulli(p);\n", + "# else\n", + "# decision ~ bernoulli(1-p);\n", + "# }\n", + "# }\n", + "# \"\"\"\n", + "# # Create data\n", + "# df = coinFlipDGWithUnobservables(N_total=500)\n", + "\n", + "# # Decider\n", + "# df = quantileDecider(df, featureX_col=\"X\", featureZ_col=\"Z\", nJudges_M=10,\n", + "# beta_X=1, beta_Z=1, hide_unobserved=True)\n", + "\n", + "# N_sim = int(1e8)\n", + "\n", + "# quants = inv_logit(npr.normal(size=N_sim) + npr.normal(size=N_sim))\n", + "\n", + "# dat = dict(N = df.shape[0],\n", + "# N_quantiles = 100001,\n", + "# r = df.acceptanceRate_R,\n", + "# decision = df.decision_T,\n", + "# X = df.X,\n", + "# quantiles = np.quantile(quants, np.linspace(0, 1, 100001)))\n", + "\n", + "# sm = pystan.StanModel(model_code=code)\n", + "# fit = sm.sampling(data=dat, iter=4000, chains=4)" ] } ], diff --git a/analysis_and_scripts/notes.tex b/analysis_and_scripts/notes.tex index 3721511329f047e294ac8c4a992f45a34e8972b5..96667c968e2dd65a9805c2d682c8123362bd6413 100644 --- a/analysis_and_scripts/notes.tex +++ b/analysis_and_scripts/notes.tex @@ -1,8 +1,8 @@ \documentclass[11pt,a4paper]{amsart} -\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots. +\usepackage[margin=1in]{geometry} % See geometry.pdf to learn the layout options. There are lots. %\geometry{a4paper} % ... or letterpaper or a5paper or ... %\geometry{landscape} % Activate for for rotated page geometry -\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent +%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent \usepackage{graphicx} \usepackage{amssymb} \usepackage{epstopdf} @@ -249,29 +249,28 @@ Given the above framework, the goal is to create an evaluation algorithm that ca \end{wrapfigure} \emph{Below is the framework as was written on the whiteboard, then RL presents his own remarks on how he understood this.} - +~ \\ \begin{description} -\item[Data generation:] ~ \\ +\item[Data generation:] ~ \\ - \hskip 3em \textbf{Input:} [none] \\ ~ \\ - \textbf{Output:} $X, Z, W, Y$ as specified by $\M$ + \hskip 3em \textbf{Input:} [none] \\ + \textbf{Output:} $X, Z, W, Y$ as specified by $\M$~ \\ -\item[Decider:] single vs. batch \\ +\item[Decider:] single vs. batch ~ \\ \hskip 3em \textbf{Input:} \begin{itemize} \item one defendant \item $\M$ \end{itemize} - \textbf{Output:} \begin{itemize} \item argmax likelihood $y$ \item $\pr(Y=0~|~input)$ - \item order + \item order \\ \end{itemize} -\item[Evaluator:] ~ \\ +\item[Evaluator:] ~ \\ \hskip 3em \textbf{Input:} \begin{itemize} @@ -638,17 +637,59 @@ Given our framework defined in section \ref{sec:framework}, the results presente \label{fig:random_predictions} \end{figure} +\subsection{Modular framework -- Monte Carlo evaluator} \label{sec:modules_mc} + +For these results, data was generated with module in algorithm \ref{alg:dg:coinflip_with_z} ("coin-flip results") and decisions were assigned using module in algorithm \ref{alg:decider:quantile}. Curves were computed with algorithms \ref{alg:eval:true_eval}, \ref{alg:eval:labeled_outcomes}, \ref{alg:eval:human_eval}, \ref{alg:eval:contraction} and \ref{alg:eval:mc} are presented in figure \ref{fig:modules_mc}. The corresponding MAEs are presented in table \ref{tab:modules_mc}. + +\begin{table}[H] +\centering +\caption{Mean absolute error (MAE) w.r.t true evaluation. See modules used in section \ref{sec:modules_mc}} +\begin{tabular}{l | c c} +Method & MAE with Z \\ \hline +Labeled outcomes & 0.111075\\ +Human evaluation & 0.027298\\ +Contraction & 0.004206\\ +Monte Carlo & 0.001292\\ +\end{tabular} +\label{tab:modules_mc} +\end{table} + +\begin{figure}[H] + \centering + \includegraphics[width=0.75\textwidth]{sl_with_Z_10iter_coinflip_quantile_defaults_mc} + \caption{Failure rate vs. acceptance rate with varying levels of leniency. Data was generated with unobservables. See modules used in section \ref{sec:modules_mc}} + \label{fig:modules_mc} +\end{figure} + +%\begin{figure}[H] +% \centering +% \begin{subfigure}[b]{0.475\textwidth} +% \includegraphics[width=\textwidth]{sl_without_Z_10iter_coinflip_quantile_defaults_mc} +% \caption{Data without unobservables. PLACEHOLDER} +% \label{fig:modules_mc_without_Z} +% \end{subfigure} +% \quad %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. +% %(or a blank line to force the subfigure onto a new line) +% \begin{subfigure}[b]{0.475\textwidth} +% \includegraphics[width=\textwidth]{sl_with_Z_10iter_coinflip_quantile_defaults_mc} +% \caption{Data with unobservables.} +% \label{fig:modules_mc_with_Z} +% \end{subfigure} +% \caption{Failure rate vs. acceptance rate with varying levels of leniency. See modules used in section \ref{sec:modules_mc}} +% \label{fig:modules_mc} +%\end{figure}z + \section{Modules} Different types of modules are presented in this section. Summary table is presented last. -\subsection{Data generation modules} - -Data generation modules usually take only some generative parameters as input. +\begin{itemize} +\item Data generation modules usually take only some generative parameters as input. +\end{itemize} -\begin{algorithm}[H] % enter the algorithm environment +\begin{algorithm}[] % enter the algorithm environment \caption{Data generation module: "coin-flip results" without unobservables} % give the algorithm a caption -%\label{alg:} % and a label for \ref{} commands later in the document +\label{alg:dg:coinflip_without_z} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment \REQUIRE Parameters: Total number of subjects $N_{total}$ \ENSURE @@ -662,9 +703,9 @@ Data generation modules usually take only some generative parameters as input. \end{algorithm} -\begin{algorithm}[H] % enter the algorithm environment +\begin{algorithm}[] % enter the algorithm environment \caption{Data generation module: "results by threshold" with unobservables} % give the algorithm a caption -%\label{alg:} % and a label for \ref{} commands later in the document +\label{alg:dg:threshold_with_Z} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment \REQUIRE Parameters: Total number of subjects $N_{total},~\beta_X=1,~\beta_Z=1$ and $\beta_W=0.2$. \ENSURE @@ -677,9 +718,9 @@ Data generation modules usually take only some generative parameters as input. \end{algorithmic} \end{algorithm} -\begin{algorithm}[H] % enter the algorithm environment +\begin{algorithm}[] % enter the algorithm environment \caption{Data generation module: "coin-flip results" with unobservables} % give the algorithm a caption -%\label{alg:} % and a label for \ref{} commands later in the document +\label{alg:dg:coinflip_with_z} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment \REQUIRE Parameters: Total number of subjects $N_{total},~\beta_X=1,~\beta_Z=1$ and $\beta_W=0.2$. \ENSURE @@ -692,17 +733,15 @@ Data generation modules usually take only some generative parameters as input. \end{algorithmic} \end{algorithm} -\subsection{Decider modules} - %For decider modules, input as terms of knowledge and parameters should be as explicitly specified as possible. -\begin{algorithm}[H] % enter the algorithm environment -\caption{Decider module: human judge as specified by Lakkaraju et al.} % give the algorithm a caption -%\label{alg:} % and a label for \ref{} commands later in the document +\begin{algorithm}[] % enter the algorithm environment +\caption{Decider module: human judge as specified by Lakkaraju et al. \cite{lakkaraju17}} % give the algorithm a caption +\label{alg:decider:human} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment \REQUIRE Data with features $X, Z$ of size $N_{total}$, knowledge that both of them affect the outcome Y and that they are independent / Parameters: $M=100, \beta_X=1, \beta_Z=1$. \ENSURE -\STATE Sample acceptance rates for each M judges from $U(0.1; 0.9)$ and round to tenth decimal place. +\STATE Sample acceptance rates for each M judges from Uniform$(0.1; 0.9)$ and round to tenth decimal place. \STATE Assign each observation to a judge at random. \STATE Calculate $P(T=0|X, Z) = \sigma(\beta_XX+\beta_ZZ) + \epsilon$ for each observation and attach to data. \STATE Sort the data by (1) the judges' and (2) by probabilities $P(T=0|X, Z)$ in descending order. @@ -713,9 +752,9 @@ Data generation modules usually take only some generative parameters as input. \end{algorithmic} \end{algorithm} -\begin{algorithm}[H] % enter the algorithm environment -\caption{Decider module: "coin-flip decisions"} % give the algorithm a caption -%\label{alg:} % and a label for \ref{} commands later in the document +\begin{algorithm}[] % enter the algorithm environment +\caption{Decider module: "coin-flip decisions" (pseudo-leniencies set at 0.5)} % give the algorithm a caption +\label{alg:decider:coinflip} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment \REQUIRE Data with features $X, Z$ of size $N_{total}$, knowledge that both of them affect the outcome Y and that they are independent / Parameters: $\beta_X=1, \beta_Z=1$. \ENSURE @@ -728,11 +767,33 @@ Data generation modules usually take only some generative parameters as input. \end{algorithmic} \end{algorithm} -\subsection{Evaluator modules} +\begin{algorithm}[] % enter the algorithm environment +\caption{Decider module: "quantile decisions"} % give the algorithm a caption +\label{alg:decider:quantile} % and a label for \ref{} commands later in the document +\begin{algorithmic}[1] % enter the algorithmic environment +\REQUIRE Data with features $X, Z$ of size $N_{total}$, knowledge that both of them affect the outcome Y and that they are independent / Parameters: $\beta_X=1, \beta_Z=1$. +\ENSURE +\STATE Sample acceptance rates for each M judges from Uniform$(0.1; 0.9)$ and round to tenth decimal place. +\STATE Assign each observation to a judge at random. +\STATE Calculate $\pr(T=0|X, Z) = \sigma(\beta_XX+\beta_ZZ)$ for each observation and attach to data. +\FORALL{$i$ in $1, \ldots, N_{total}$} + \IF{$\sigma(\beta_XX+\beta_ZZ) \geq F^{-1}_{\pr(T=0|X, Z)}(r)$ \footnotemark} % Footnote text below algorithm + \STATE {set $t_i=0$} + \ELSE + \STATE{set $t_i=1$} + \ENDIF + \STATE Attach to data. +\ENDFOR +\STATE Set $Y=$ NA if decision is negative ($T=0$). \emph{Might not be performed.} +\RETURN data with decisions. +\end{algorithmic} +\end{algorithm} + +\footnotetext{The inverse cumulative distribution function (or quantile function) $F^{-1}$ was constructed by first sampling $10^7$ observations from $N(0, 2)$ (sum of two Gaussians) and applying the inverse of logit function $\sigma(x)$. The value of $F^{-1}(r)$ was computed utilizing the previously computed array and numpy's \texttt{quantile} function.} -\begin{algorithm}[H] % enter the algorithm environment +\begin{algorithm}[] % enter the algorithm environment \caption{Evaluator module: Contraction algorithm \cite{lakkaraju17}} % give the algorithm a caption -%\label{alg:} % and a label for \ref{} commands later in the document +\label{alg:eval:contraction} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment \REQUIRE Data $\D$ with properties $\{x_i, j_i, t_i, y_i\}$, acceptance rate r, knowledge that X affects Y \ENSURE @@ -742,16 +803,12 @@ Data generation modules usually take only some generative parameters as input. \STATE Let $q$ be the decision-maker with highest acceptance rate in $\D$. \STATE $\D_q = \{(x, j, t, y) \in \D|j=q\}$ \STATE \hskip3.0em $\rhd$ $\D_q$ is the set of all observations judged by $q$ -\STATE \STATE $\RR_q = \{(x, j, t, y) \in \D_q|t=1\}$ \STATE \hskip3.0em $\rhd$ $\RR_q$ is the set of observations in $\D_q$ with observed outcome labels -\STATE \STATE Sort observations in $\RR_q$ in descending order of confidence scores $\s$ and assign to $\RR_q^{sort}$. \STATE \hskip3.0em $\rhd$ Observations deemed as high risk by the black-box model $\mathcal{B}$ are at the top of this list -\STATE \STATE Remove the top $[(1.0-r)|\D_q |]-[|\D_q |-|\RR_q |]$ observations of $\RR_q^{sort}$ and call this list $\mathcal{R_B}$ \STATE \hskip3.0em $\rhd$ $\mathcal{R_B}$ is the list of observations assigned to $t = 1$ by $\mathcal{B}$ -\STATE \STATE Compute $\mathbf{u}=\sum_{i=1}^{|\mathcal{R_B}|} \dfrac{\delta\{y_i=0\}}{| \D_q |}$. \RETURN $\mathbf{u}$ \end{algorithmic} @@ -759,7 +816,7 @@ Data generation modules usually take only some generative parameters as input. \begin{algorithm}[] % enter the algorithm environment \caption{Evaluator module: True evaluation} % give the algorithm a caption -%\label{alg:true_eval} % and a label for \ref{} commands later in the document +\label{alg:eval:true_eval} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment \REQUIRE Data $\D$ with properties $\{x_i, t_i, y_i\}$ and \emph{all outcome labels}, acceptance rate r, knowledge that X affects Y \ENSURE @@ -775,7 +832,7 @@ Data generation modules usually take only some generative parameters as input. \begin{algorithm}[] % enter the algorithm environment \caption{Evaluator module: Labeled outcomes} % give the algorithm a caption -%\label{alg:labeled_outcomes} % and a label for \ref{} commands later in the document +\label{alg:eval:labeled_outcomes} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment \REQUIRE Data $\D$ with properties $\{x_i, t_i, y_i\}$, acceptance rate r, knowledge that X affects Y \ENSURE @@ -792,7 +849,7 @@ Data generation modules usually take only some generative parameters as input. \begin{algorithm}[] % enter the algorithm environment \caption{Evaluator module: Human evaluation} % give the algorithm a caption -%\label{alg:human_eval} % and a label for \ref{} commands later in the document +\label{alg:eval:human_eval} % and a label for \ref{} commands later in the document \begin{algorithmic}[1] % enter the algorithmic environment \REQUIRE Data $\D$ with properties $\{x_i, j_i, t_i, y_i\}$, acceptance rate r \ENSURE @@ -804,25 +861,72 @@ Data generation modules usually take only some generative parameters as input. \end{algorithmic} \end{algorithm} -\subsection{Summary} +\begin{algorithm}[] % enter the algorithm environment +\caption{Evaluator module: Causal evaluator (?)} % give the algorithm a caption +\label{alg:eval:causal_eval} % and a label for \ref{} commands later in the document +\begin{algorithmic}[1] % enter the algorithmic environment +\REQUIRE Data $\D$ with properties $\{x_i, t_i, y_i\}$, acceptance rate r +\ENSURE +\STATE Split data to test set and training set. +\STATE Train a predictive model $\B$ on training data. +\STATE Estimate probability scores $\s$ using $\B$ for all observations in test data and attach to test data. +\FORALL{$i$ in $1, \ldots, N_{total}$} + \STATE Evaluate $F(x_i) = \int_{x\in\mathcal{X}} P_X(x)\delta(f(x)<f(x_i)) ~dx$ and assign to $\mathcal{F}_{predictions}$ +\ENDFOR +\STATE Create boolean array $T_{causal} = \mathcal{F}_{predictions} < r$. +\RETURN $\frac{1}{|\D_{test}|}\sum_{i=1}^{|\D_{test}|} \s_i \cdot T_{i, causal}$ which is equal to $\frac{1}{|\D|}\sum_{x\in\D} f(x)\delta(F(x) < r)$ +\end{algorithmic} +\end{algorithm} -\begin{table}[H] +\begin{algorithm}[] % enter the algorithm environment +\caption{Evaluator module: Monte Carlo evaluator, imputation} % give the algorithm a caption +\label{alg:eval:mc} % and a label for \ref{} commands later in the document +\begin{algorithmic}[1] % enter the algorithmic environment +\REQUIRE Data $\D$ with properties $\{x_i, j_i, t_i, y_i\}$, acceptance rate r +\ENSURE +\STATE Split data to test set and training set. +\STATE Train a predictive model $\B$ on training data. +\STATE Estimate probability scores $\s$ using $\B$ for all observations in test data and attach to test data. +\STATE Sample $N_{sim}$ observations from a standard Gaussian and assign to Z. +\STATE Sample $N_{sim}$ observations from sum of two standard Gaussians (N(0, 2)) and assign to \texttt{quants}. +\STATE Transform the values of the samples in \texttt{quants} using the inverse of logit function. +\STATE Compute the values of the inverse cdf of the observations in \texttt{quants} for the acceptance rates r of each judge and assign to $Q_r$. +\FORALL{$i$ in $1, \ldots, N_{test}$} + \IF{$t_i = 0$} + \STATE {Take all $Z > logit(Q_{r,i})-x_i$ \footnotemark} + \ELSE + \STATE{Take all $Z < logit(Q_{r,i})-x_i$} + \ENDIF + \STATE Draw predictions $\hat{p}_{i,y}$ from Bernoulli($1-logit^{-1}(x_i+\bar{Z})$). +\ENDFOR +\STATE Impute missing observations using $\hat{p}_y$. +\STATE Sort the data by the probabilities $\s$ to ascending order. +\STATE \hskip3.0em $\rhd$ Now the most dangerous subjects are last. +\STATE Calculate the number to release $N_{free} = |\D_{test}| \cdot r$. +\RETURN Compute $\frac{1}{|\D_{test}|}\sum_{i=1}^{N_{free}}\delta\{y_i=0\}$ using the observed and imputed observations. +\end{algorithmic} +\end{algorithm} + +\footnotetext{$logit^{-1}(x+z)>a \Leftrightarrow x+z > logit(a) \Leftrightarrow z > logit(a)-x$} + +\begin{table}[h!] \centering + \caption{Summary of modules (under construction)} \begin{tabular}{lll} \toprule \multicolumn{3}{c}{Module type} \\[.5\normalbaselineskip] \textbf{Data generator} & \textbf{Decider} & \textbf{Evaluator} \\ \midrule - Without unobservables & Independent decisions & {\ul Labeled outcomes} \\ - & \tabitem $P(T=0|X, Z)$ & \tabitem Data $\D$ with properties $\{x_i, t_i, y_i\}$ \\ - With unobservables & \tabitem "threshold rule" & \tabitem acceptance rate r \\ - \tabitem $P(Y=0|X, Z, W)$ & & \tabitem knowledge that X affects Y \\[.5\normalbaselineskip] + {\ul Without unobservables} & Independent decisions & {\ul Labeled outcomes} \\ + & 1. flip a coin by & \tabitem Data $\D$ with properties $\{x_i, t_i, y_i\}$ \\ + {\ul With unobservables} & $P(T=0|X, Z)$ & \tabitem acceptance rate r \\ + \tabitem $P(Y=0|X, Z, W)$ & 2. determine with $F^{-1}(r)$ & \tabitem knowledge that X affects Y \\[.5\normalbaselineskip] - With unobservables & & {\ul True evaluation} \\ - \tabitem "threshold rule" & & \tabitem Data $\D$ with properties $\{x_i, t_i, y_i\}$ \\ - & & and \emph{all outcome labels} \\ - & & \tabitem acceptance rate r \\ - & & \tabitem knowledge that X affects Y \\[.5\normalbaselineskip] + {\ul With unobservables} & Non-independent decisions & {\ul True evaluation} \\ + \tabitem assign Y by & 3. sort by $P(T=0|X, Z)$ & \tabitem Data $\D$ with properties $\{x_i, t_i, y_i\}$ \\ + "threshold rule" & and assign $t$ by $r$ & and \emph{all outcome labels} \\ + & & \tabitem acceptance rate r \\ + & & \tabitem knowledge that X affects Y \\[.5\normalbaselineskip] & & {\ul Human evaluation} \\ & & \tabitem Data $\D$ with properties $\{x_i, j_i, t_i, y_i\}$ \\ @@ -837,10 +941,14 @@ Data generation modules usually take only some generative parameters as input. & & \tabitem Data $\D$ with properties $\{x_i, t_i, y_i\}$ \\ & & \tabitem acceptance rate r \\ & & \tabitem knowledge that X affects Y \\[.5\normalbaselineskip] + + & & {\ul Monte Carlo evaluator} \\ + & & \tabitem Data $\D$ with properties $\{x_i, j_i, t_i, y_i\}$ \\ + & & \tabitem acceptance rate r \\ + & & \tabitem knowledge that X affects Y \\[.5\normalbaselineskip] \bottomrule \end{tabular} - \caption{Summary table of modules (under construction)} - \label{tab:jotain} + \label{tab:modules} \end{table} \begin{thebibliography}{9} diff --git a/figures/sl_with_Z_10iter_coinflip_quantile_defaults_mc.png b/figures/sl_with_Z_10iter_coinflip_quantile_defaults_mc.png new file mode 100644 index 0000000000000000000000000000000000000000..33df95cc24abfecaa60fb0e241033f236d3dfa58 Binary files /dev/null and b/figures/sl_with_Z_10iter_coinflip_quantile_defaults_mc.png differ