Skip to content
Snippets Groups Projects
Analysis_07MAY2019_old.ipynb 182 KiB
Newer Older
  • Learn to ignore specific revisions
  •     "#                                              residuals_2=tmp[:, 1])\n",
        "\n",
        "#         # Regress Y on X and residuals from step 2.\n",
        "#         lr_y, __ = fitPredictiveModel(\n",
        "#             train_labeled.dropna()[['X', 'residuals_1', 'residuals_2']],\n",
        "#             train_labeled.dropna().result_Y, np.ones((1, 3)), 0)\n",
        "#         # With the test data, predict Y by\n",
        "#         # repeating steps 1 and 2\n",
        "#         # (Regress T on X)\n",
        "#         lr_t, __ = fitPredictiveModel(test.X,\n",
        "#                                          test.decision_T, np.ones(1),\n",
        "#                                          1)\n",
        "\n",
        "#         # (Calculate the residuals from previous regression)\n",
        "#         residuals_T = test.decision_T - \\\n",
        "#             lr_t.predict(test.X.values.reshape(-1, 1))\n",
        "#         test = test.assign(residuals_T=residuals_T)\n",
        "\n",
        "#         # (Convert residuals from -1, 0 and 1 values to one-hot-encoded.\n",
        "#         # this way there will be separate betas for each type of residual.)\n",
        "#         enc = OneHotEncoder(categories='auto')\n",
        "#         resid_tf = test.residuals_T.values.reshape(-1, 1)\n",
        "#         tmp = enc.fit_transform(resid_tf).toarray()\n",
        "#         test = test.assign(residuals_1=tmp[:, 0], residuals_2=tmp[:, 1])\n",
        "\n",
        "#         # by using the model from step 3 with X and the residuals from 4.a. as input\n",
        "\n",
        "#         preds = getProbabilityForClass(\n",
        "#             test[['X', 'residuals_1', 'residuals_2']], lr_y, 0)\n",
        "\n",
        "#         test = test.assign(preds=preds)\n",
        "\n",
        "        # True evaluation\n",
        "        #\n",
        "        # Sort by failure probabilities, subjects with the smallest risk are first.\n",
        "        test.sort_values(by='B_prob_0_model', inplace=True, ascending=True)\n",
        "\n",
        "        to_release = int(round(test.shape[0] * r / 10))\n",
        "\n",
        "        # Calculate failure rate as the ratio of failures to those who were given a\n",
        "        # positive decision, i.e. those whose probability of negative outcome was\n",
        "        # low enough.\n",
        "        f_rate_true[i] = np.sum(\n",
        "            test.result_Y[0:to_release] == 0) / test.shape[0]\n",
        "\n",
        "        # Labeled outcomes only\n",
        "        #\n",
        "        # Sort by failure probabilities, subjects with the smallest risk are first.\n",
        "        test_labeled.sort_values(by='B_prob_0_model',\n",
        "                                 inplace=True,\n",
        "                                 ascending=True)\n",
        "\n",
        "        to_release = int(round(test_labeled.shape[0] * r / 10))\n",
        "\n",
        "        f_rate_label[i] = np.sum(\n",
        "            test_labeled.result_Y[0:to_release] == 0) / test_labeled.shape[0]\n",
        "\n",
        "        # Human evaluation\n",
        "        #\n",
        "        # Get judges with correct leniency as list\n",
        "        correct_leniency_list = test_labeled.judgeID_J[\n",
        "            test_labeled['acceptanceRate_R'].round(1) == r / 10].values\n",
        "\n",
        "        # Released are the people they judged and released, T = 1\n",
        "        released = test_labeled[\n",
        "            test_labeled.judgeID_J.isin(correct_leniency_list)\n",
        "            & (test_labeled.decision_T == 1)]\n",
        "\n",
        "        # Get their failure rate, aka ratio of reoffenders to number of people judged in total\n",
        "        f_rate_human[i] = np.sum(\n",
        "            released.result_Y == 0) / correct_leniency_list.shape[0]\n",
        "\n",
        "        # Contraction, logistic regression\n",
        "        #\n",
        "        f_rate_cont[i] = contraction(test_labeled, 'judgeID_J', 'decision_T',\n",
        "                                     'result_Y', 'B_prob_0_model',\n",
        "                                     'acceptanceRate_R', r / 10)\n",
        "\n",
        "        # Causal model - empirical performance\n",
        "\n",
        "#         released = bailIndicator(\n",
        "#             r * 10, lr_y, train_labeled[['X', 'residuals_1', 'residuals_2']],\n",
        "#             test[['X', 'residuals_1', 'residuals_2']])\n",
        "        \n",
        "        released = bailIndicator(r * 10, logreg, train_labeled.X, test.X)\n",
        "        \n",
        "        #released = cdf(test.X, logreg, 0) < r / 10\n",
        "\n",
        "#         released = npr.choice([True, False], size = test.X.shape, p=[r/10, 1-r/10])\n",
        "        f_rate_caus[i] = np.mean(test.B_prob_0_model * released)\n",
        "\n",
        "        #percentiles = estimatePercentiles(train_labeled.X, logreg, N_sample=train_labeled.shape[0])\n",
        "\n",
        "        # def releaseProbability(x):\n",
        "        #    return calcReleaseProbabilities(r*10, train_labeled.X, x, logreg, percentileMatrix=percentiles)\n",
        "\n",
        "        # def integraali(x):\n",
        "        #    p_y0 = logreg.predict_proba(x.reshape(-1, 1))[:, 0]\n",
        "\n",
        "        #    p_t1 = releaseProbability(x)\n",
        "\n",
        "        #    p_x = scs.norm.pdf(x)\n",
        "\n",
        "        #    return p_y0 * p_t1 * p_x\n",
        "\n",
        "        #f_rate_caus[i] = si.quad(lambda x: integraali(np.ones((1, 1))*x), -10, 10)[0]\n",
        "\n",
        "    failure_rates[r - 1, 0] = np.mean(f_rate_true)\n",
        "    failure_rates[r - 1, 1] = np.mean(f_rate_label)\n",
        "    failure_rates[r - 1, 2] = np.mean(f_rate_human)\n",
        "    failure_rates[r - 1, 3] = np.mean(f_rate_cont)\n",
        "    failure_rates[r - 1, 4] = np.mean(f_rate_caus)\n",
        "\n",
        "    failure_sems[r - 1, 0] = scs.sem(f_rate_true)\n",
        "    failure_sems[r - 1, 1] = scs.sem(f_rate_label)\n",
        "    failure_sems[r - 1, 2] = scs.sem(f_rate_human)\n",
        "    failure_sems[r - 1, 3] = scs.sem(f_rate_cont)\n",
        "    failure_sems[r - 1, 4] = scs.sem(f_rate_caus)\n",
        "\n",
        "x_ax = np.arange(0.1, 0.9, 0.1)\n",
        "\n",
        "plt.errorbar(x_ax,\n",
        "             failure_rates[:, 0],\n",
        "             label='True Evaluation',\n",
        "             c='green',\n",
        "             yerr=failure_sems[:, 0])\n",
        "plt.errorbar(x_ax,\n",
        "             failure_rates[:, 1],\n",
        "             label='Labeled outcomes',\n",
        "             c='magenta',\n",
        "             yerr=failure_sems[:, 1])\n",
        "plt.errorbar(x_ax,\n",
        "             failure_rates[:, 2],\n",
        "             label='Human evaluation',\n",
        "             c='red',\n",
        "             yerr=failure_sems[:, 2])\n",
        "plt.errorbar(x_ax,\n",
        "             failure_rates[:, 3],\n",
        "             label='Contraction, log.',\n",
        "             c='blue',\n",
        "             yerr=failure_sems[:, 3])\n",
        "plt.errorbar(x_ax,\n",
        "             failure_rates[:, 4],\n",
        "             label='Causal model, ep',\n",
        "             c='black',\n",
        "             yerr=failure_sems[:, 4])\n",
        "\n",
        "plt.title('Failure rate vs. Acceptance rate with unobservables')\n",
        "plt.xlabel('Acceptance rate')\n",
        "plt.ylabel('Failure rate')\n",
        "plt.legend()\n",
        "plt.grid()\n",
        "plt.show()\n",
        "\n",
        "print(failure_rates)\n",
        "print(\"\\nMean absolute errors:\")\n",
        "for i in range(1, failure_rates.shape[1]):\n",
        "    print(np.mean(np.abs(failure_rates[:, 0] - failure_rates[:, i])))"
       ]
      }
     ],
     "metadata": {
      "kernelspec": {
       "display_name": "Python 3",
       "language": "python",
       "name": "python3"
      },
      "language_info": {
       "codemirror_mode": {
        "name": "ipython",
        "version": 3
       },
       "file_extension": ".py",
       "mimetype": "text/x-python",
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
       "version": "3.7.3"
      },
      "toc": {
       "base_numbering": 1,
       "nav_menu": {},
       "number_sections": true,
       "sideBar": true,
       "skip_h1_title": true,
       "title_cell": "Table of Contents",
       "title_sidebar": "Contents",
       "toc_cell": true,
       "toc_position": {
        "height": "1084px",
        "left": "228px",
        "top": "111.133px",
        "width": "300.7px"
       },
       "toc_section_display": true,
       "toc_window_display": true
      },
      "varInspector": {
       "cols": {
        "lenName": 16,
        "lenType": 16,
        "lenVar": 40
       },
       "kernels_config": {
        "python": {
         "delete_cmd_postfix": "",
         "delete_cmd_prefix": "del ",
         "library": "var_list.py",
         "varRefreshCmd": "print(var_dic_list())"
        },
        "r": {
         "delete_cmd_postfix": ") ",
         "delete_cmd_prefix": "rm(",
         "library": "var_list.r",
         "varRefreshCmd": "cat(var_dic_list()) "
        }
       },
       "position": {
        "height": "352.85px",
        "left": "1070px",
        "right": "20px",
        "top": "120px",
        "width": "350px"
       },
       "types_to_exclude": [
        "module",
        "function",
        "builtin_function_or_method",
        "instance",
        "_Feature"
       ],
       "window_display": false
      }
     },
     "nbformat": 4,
     "nbformat_minor": 2
    }