Analysis_07MAY2019_old.ipynb

    "#                                              residuals_2=tmp[:, 1])\n",
    "\n",
    "#         # Regress Y on X and residuals from step 2.\n",
    "#         lr_y, __ = fitPredictiveModel(\n",
    "#             train_labeled.dropna()[['X', 'residuals_1', 'residuals_2']],\n",
    "#             train_labeled.dropna().result_Y, np.ones((1, 3)), 0)\n",
    "#         # With the test data, predict Y by\n",
    "#         # repeating steps 1 and 2\n",
    "#         # (Regress T on X)\n",
    "#         lr_t, __ = fitPredictiveModel(test.X,\n",
    "#                                          test.decision_T, np.ones(1),\n",
    "#                                          1)\n",
    "\n",
    "#         # (Calculate the residuals from previous regression)\n",
    "#         residuals_T = test.decision_T - \\\n",
    "#             lr_t.predict(test.X.values.reshape(-1, 1))\n",
    "#         test = test.assign(residuals_T=residuals_T)\n",
    "\n",
    "#         # (Convert residuals from -1, 0 and 1 values to one-hot-encoded.\n",
    "#         # this way there will be separate betas for each type of residual.)\n",
    "#         enc = OneHotEncoder(categories='auto')\n",
    "#         resid_tf = test.residuals_T.values.reshape(-1, 1)\n",
    "#         tmp = enc.fit_transform(resid_tf).toarray()\n",
    "#         test = test.assign(residuals_1=tmp[:, 0], residuals_2=tmp[:, 1])\n",
    "\n",
    "#         # by using the model from step 3 with X and the residuals from 4.a. as input\n",
    "\n",
    "#         preds = getProbabilityForClass(\n",
    "#             test[['X', 'residuals_1', 'residuals_2']], lr_y, 0)\n",
    "\n",
    "#         test = test.assign(preds=preds)\n",
    "\n",
    "        # True evaluation\n",
    "        #\n",
    "        # Sort by failure probabilities, subjects with the smallest risk are first.\n",
    "        test.sort_values(by='B_prob_0_model', inplace=True, ascending=True)\n",
    "\n",
    "        to_release = int(round(test.shape[0] * r / 10))\n",
    "\n",
    "        # Calculate failure rate as the ratio of failures to those who were given a\n",
    "        # positive decision, i.e. those whose probability of negative outcome was\n",
    "        # low enough.\n",
    "        f_rate_true[i] = np.sum(\n",
    "            test.result_Y[0:to_release] == 0) / test.shape[0]\n",
    "\n",
    "        # Labeled outcomes only\n",
    "        #\n",
    "        # Sort by failure probabilities, subjects with the smallest risk are first.\n",
    "        test_labeled.sort_values(by='B_prob_0_model',\n",
    "                                 inplace=True,\n",
    "                                 ascending=True)\n",
    "\n",
    "        to_release = int(round(test_labeled.shape[0] * r / 10))\n",
    "\n",
    "        f_rate_label[i] = np.sum(\n",
    "            test_labeled.result_Y[0:to_release] == 0) / test_labeled.shape[0]\n",
    "\n",
    "        # Human evaluation\n",
    "        #\n",
    "        # Get judges with correct leniency as list\n",
    "        correct_leniency_list = test_labeled.judgeID_J[\n",
    "            test_labeled['acceptanceRate_R'].round(1) == r / 10].values\n",
    "\n",
    "        # Released are the people they judged and released, T = 1\n",
    "        released = test_labeled[\n",
    "            test_labeled.judgeID_J.isin(correct_leniency_list)\n",
    "            & (test_labeled.decision_T == 1)]\n",
    "\n",
    "        # Get their failure rate, aka ratio of reoffenders to number of people judged in total\n",
    "        f_rate_human[i] = np.sum(\n",
    "            released.result_Y == 0) / correct_leniency_list.shape[0]\n",
    "\n",
    "        # Contraction, logistic regression\n",
    "        #\n",
    "        f_rate_cont[i] = contraction(test_labeled, 'judgeID_J', 'decision_T',\n",
    "                                     'result_Y', 'B_prob_0_model',\n",
    "                                     'acceptanceRate_R', r / 10)\n",
    "\n",
    "        # Causal model - empirical performance\n",
    "\n",
    "#         released = bailIndicator(\n",
    "#             r * 10, lr_y, train_labeled[['X', 'residuals_1', 'residuals_2']],\n",
    "#             test[['X', 'residuals_1', 'residuals_2']])\n",
    "        \n",
    "        released = bailIndicator(r * 10, logreg, train_labeled.X, test.X)\n",
    "        \n",
    "        #released = cdf(test.X, logreg, 0) < r / 10\n",
    "\n",
    "#         released = npr.choice([True, False], size = test.X.shape, p=[r/10, 1-r/10])\n",
    "        f_rate_caus[i] = np.mean(test.B_prob_0_model * released)\n",
    "\n",
    "        #percentiles = estimatePercentiles(train_labeled.X, logreg, N_sample=train_labeled.shape[0])\n",
    "\n",
    "        # def releaseProbability(x):\n",
    "        #    return calcReleaseProbabilities(r*10, train_labeled.X, x, logreg, percentileMatrix=percentiles)\n",
    "\n",
    "        # def integraali(x):\n",
    "        #    p_y0 = logreg.predict_proba(x.reshape(-1, 1))[:, 0]\n",
    "\n",
    "        #    p_t1 = releaseProbability(x)\n",
    "\n",
    "        #    p_x = scs.norm.pdf(x)\n",
    "\n",
    "        #    return p_y0 * p_t1 * p_x\n",
    "\n",
    "        #f_rate_caus[i] = si.quad(lambda x: integraali(np.ones((1, 1))*x), -10, 10)[0]\n",
    "\n",
    "    failure_rates[r - 1, 0] = np.mean(f_rate_true)\n",
    "    failure_rates[r - 1, 1] = np.mean(f_rate_label)\n",
    "    failure_rates[r - 1, 2] = np.mean(f_rate_human)\n",
    "    failure_rates[r - 1, 3] = np.mean(f_rate_cont)\n",
    "    failure_rates[r - 1, 4] = np.mean(f_rate_caus)\n",
    "\n",
    "    failure_sems[r - 1, 0] = scs.sem(f_rate_true)\n",
    "    failure_sems[r - 1, 1] = scs.sem(f_rate_label)\n",
    "    failure_sems[r - 1, 2] = scs.sem(f_rate_human)\n",
    "    failure_sems[r - 1, 3] = scs.sem(f_rate_cont)\n",
    "    failure_sems[r - 1, 4] = scs.sem(f_rate_caus)\n",
    "\n",
    "x_ax = np.arange(0.1, 0.9, 0.1)\n",
    "\n",
    "plt.errorbar(x_ax,\n",
    "             failure_rates[:, 0],\n",
    "             label='True Evaluation',\n",
    "             c='green',\n",
    "             yerr=failure_sems[:, 0])\n",
    "plt.errorbar(x_ax,\n",
    "             failure_rates[:, 1],\n",
    "             label='Labeled outcomes',\n",
    "             c='magenta',\n",
    "             yerr=failure_sems[:, 1])\n",
    "plt.errorbar(x_ax,\n",
    "             failure_rates[:, 2],\n",
    "             label='Human evaluation',\n",
    "             c='red',\n",
    "             yerr=failure_sems[:, 2])\n",
    "plt.errorbar(x_ax,\n",
    "             failure_rates[:, 3],\n",
    "             label='Contraction, log.',\n",
    "             c='blue',\n",
    "             yerr=failure_sems[:, 3])\n",
    "plt.errorbar(x_ax,\n",
    "             failure_rates[:, 4],\n",
    "             label='Causal model, ep',\n",
    "             c='black',\n",
    "             yerr=failure_sems[:, 4])\n",
    "\n",
    "plt.title('Failure rate vs. Acceptance rate with unobservables')\n",
    "plt.xlabel('Acceptance rate')\n",
    "plt.ylabel('Failure rate')\n",
    "plt.legend()\n",
    "plt.grid()\n",
    "plt.show()\n",
    "\n",
    "print(failure_rates)\n",
    "print(\"\\nMean absolute errors:\")\n",
    "for i in range(1, failure_rates.shape[1]):\n",
    "    print(np.mean(np.abs(failure_rates[:, 0] - failure_rates[:, i])))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": true,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": true,
   "toc_position": {
    "height": "1084px",
    "left": "228px",
    "top": "111.133px",
    "width": "300.7px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "position": {
    "height": "352.85px",
    "left": "1070px",
    "right": "20px",
    "top": "120px",
    "width": "350px"
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}