Add data with unobservables to derivation validation

2ecbbb9f · Riku-Laine · e357e897 · 2ecbbb9f · 2ecbbb9f
Commit 2ecbbb9f authored 5 years ago by Riku-Laine
--- a/analysis_and_scripts/Analysis_07MAY2019_new.ipynb
+++ b/analysis_and_scripts/Analysis_07MAY2019_new.ipynb
--- a/analysis_and_scripts/derivation_validation.ipynb
+++ b/analysis_and_scripts/derivation_validation.ipynb
@@ -71,22 +71,22 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Bias: (0.07558160888120039, 8.391244241812189e-16)\n",
-      "Bias: (0.03647337206409673, 4.0493577451280163e-16)\n",
+      "Bias: (0.07131297477417739, 7.917330654880471e-16)\n",
+      "Bias: (0.035957051207116175, 3.9920346147766154e-16)\n",
      "Analytical: 0.03709585053394618\n",
-      "Estimated:  0.03910840191792689\n",
-      "Difference: -0.0020125513839807097\n",
+      "Estimated:  0.035378945320598335\n",
+      "Difference: 0.001716905213347844\n",
      "\n",
      "Values for P(y=0|do(r=1)) and P(y=0|do(r=0))\n",
      "\n",
      "Analytical: 0.14973849934787756 0.11264264881393138\n",
-      "Estimated:  0.15185984231839383 0.11275144040046695\n"
+      "Estimated:  0.1432745402973986 0.10789559497680028\n"
     ]
    }
   ],
   "source": [
    "###\n",
-    "# Synthetic data\n",
+    "# Synthetic data 1\n",
    "\n",
    "# R ~ U(0,1)\n",
    "# X ~ N(0,1)\n",
@@ -179,166 +179,130 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0.0\n",
-      "0.0006666666666666666\n",
-      "0.00125\n",
-      "0.0015833333333333333\n",
-      "0.0025833333333333333\n",
-      "0.0035\n",
-      "0.004583333333333333\n",
-      "0.006333333333333333\n",
-      "0.007416666666666667\n",
-      "0.007833333333333333\n",
-      "0.008916666666666666\n",
-      "0.00925\n",
-      "0.009916666666666667\n",
-      "0.010833333333333334\n",
-      "0.012333333333333333\n",
-      "0.0135\n",
-      "0.01425\n",
-      "0.015166666666666667\n",
-      "0.016\n",
-      "0.017\n",
-      "0.017666666666666667\n",
-      "0.01875\n",
-      "0.02\n",
-      "0.021083333333333332\n",
-      "0.022416666666666668\n",
-      "0.0235\n",
-      "0.024083333333333335\n",
-      "0.025166666666666667\n",
-      "0.026166666666666668\n",
-      "0.02775\n",
-      "0.029\n",
-      "0.029916666666666668\n",
-      "0.03158333333333333\n",
-      "0.03283333333333333\n",
-      "0.03383333333333333\n",
-      "0.034833333333333334\n",
-      "0.036\n",
-      "0.037\n",
-      "0.03808333333333333\n",
-      "0.03941666666666667\n",
-      "0.04058333333333333\n",
-      "0.042083333333333334\n",
-      "0.043666666666666666\n",
-      "0.044583333333333336\n",
-      "0.046\n",
-      "0.04716666666666667\n",
-      "0.04841666666666666\n",
-      "0.050166666666666665\n",
-      "0.0515\n",
-      "0.053\n",
-      "0.05491666666666667\n",
-      "0.05575\n",
-      "0.05708333333333333\n",
-      "0.058166666666666665\n",
-      "0.059416666666666666\n",
-      "0.06091666666666667\n",
-      "0.06233333333333333\n",
-      "0.06383333333333334\n",
-      "0.06491666666666666\n",
-      "0.06575\n",
-      "0.06725\n",
-      "0.06891666666666667\n",
-      "0.07\n",
-      "0.07141666666666667\n",
-      "0.07283333333333333\n",
-      "0.07425\n",
-      "0.07691666666666666\n",
-      "0.07791666666666666\n",
-      "0.07933333333333334\n",
-      "0.0805\n",
-      "0.08225\n",
-      "0.08425\n",
-      "0.08541666666666667\n",
-      "0.08666666666666667\n",
-      "0.08841666666666667\n",
-      "0.08975\n",
-      "0.09141666666666666\n",
-      "0.09266666666666666\n",
-      "0.09391666666666666\n",
-      "0.095\n",
-      "0.09633333333333334\n",
-      "0.09791666666666667\n",
-      "0.09916666666666667\n",
-      "0.10066666666666667\n",
-      "0.10158333333333333\n",
-      "0.10358333333333333\n",
-      "0.10466666666666667\n",
-      "0.10608333333333334\n",
-      "0.1075\n",
-      "0.10908333333333334\n",
-      "0.11033333333333334\n",
-      "0.11216666666666666\n",
-      "0.1135\n",
-      "0.11525\n",
-      "0.1175\n",
-      "0.12008333333333333\n",
-      "0.12133333333333333\n",
-      "0.12308333333333334\n",
-      "0.12475\n",
-      "0.12741666666666668\n",
-      "0.12941666666666668\n"
+      "Bias: (0.048298659980495415, 5.362228436888762e-16)\n",
+      "Bias: (0.02685101277580984, 2.981061261820832e-16)\n",
+      "Analytical: 0.030762705619782782\n",
+      "Estimated:  0.021455460896022127\n",
+      "Difference: 0.009307244723760655\n",
+      "\n",
+      "Values for P(y=0|do(r=1)) and P(y=0|do(r=0))\n",
+      "\n",
+      "Analytical: 0.16344054312353337 0.1326778375037506\n",
+      "Estimated:  0.15963912402042202 0.1381836631243999\n"
     ]
    }
   ],
   "source": [
-    "r_, x_, t_, y_ = generateData()\n",
+    "###\n",
+    "# Synthetic data 2 - with unobservable Z\n",
    "\n",
+    "# R ~ U(0,1)\n",
+    "# X ~ N(0,1)\n",
+    "# T ~ Bin(1, sigmoid(a * r + b * x))\n",
+    "# Y ~ Bin(1, sigmoid(c * t + d * x))\n",
    "\n",
-    "def bailIndicator(r, y_model, x_train, x_test):\n",
-    "    '''\n",
-    "    Indicator function for whether a judge will bail or jail a suspect.\n",
-    "    \n",
-    "    Algorithm:\n",
-    "    ----------\n",
-    "    \n",
-    "    (1) Calculate recidivism probabilities from training set with a trained \n",
-    "        model and assign them to predictions_train.\n",
-    "    \n",
-    "    (2) Calculate recidivism probabilities from test set with the trained \n",
-    "        model and assign them to predictions_test.\n",
-    "    \n",
-    "    (3) Construct a cumulative distribution function of the probabilities in\n",
-    "        in predictions_train.\n",
+    "# Weights:\n",
+    "# R -> T: a\n",
+    "# X -> T: b\n",
+    "# T -> Y: c\n",
+    "# X -> Y: d\n",
+    "# Z -> T: e\n",
+    "# Z -> Y: f\n",
+    "\n",
+    "N = 12000\n",
+    "a = 1\n",
+    "b = 1\n",
+    "c = .5\n",
+    "d = 1\n",
+    "e = 1\n",
+    "f = 1\n",
+    "\n",
+    "def generateDataWithZ(N=N, a=a, b=b, c=c, d=d, e=e, f=f):\n",
+    "\n",
+    "    r = npr.uniform(size=N)\n",
+    "    x = npr.uniform(size=N)\n",
    "    \n",
-    "    (4)\n",
-    "    For pred in predictions_test:\n",
+    "    z = npr.uniform(size=N)\n",
+    "\n",
+    "    t = npr.binomial(n=1, p=sigmoid(a * r + b * x + e * z), size=N)\n",
+    "\n",
+    "    y = npr.binomial(n=1, p=sigmoid(c * t + d * x + f * z), size=N)\n",
+    "\n",
+    "    y[t == 0] = 1\n",
+    "\n",
+    "    return r, x, z, t, y\n",
+    "\n",
+    "\n",
+    "def analytic_R_on_Y(r, a, b, c, d, e, f):\n",
+    "    '''Return probability of Y=0 with given parameters.'''\n",
+    "\n",
+    "    # P(y=0|t=1, x) * p(t=1|r, x) * p(x)\n",
+    "    def integrable(t, x, z):\n",
+    "        p_y0 = 1 - sigmoid(c * t + d * x + f * z)\n",
    "        \n",
-    "        if pred belongs to a percentile (computed from step (3)) lower than r\n",
-    "            return True\n",
-    "        else\n",
-    "            return False\n",
+    "        if t == 1:\n",
+    "            p_t = sigmoid(a * r + b * x + e * z)\n",
+    "        elif t == 0:\n",
+    "            p_t = 1 - sigmoid(a * r + b * x + e * z)\n",
+    "        \n",
+    "        p_x = 1\n",
+    "        \n",
+    "        return p_y0 * p_t * p_x\n",
    "    \n",
-    "    Returns:\n",
-    "    --------\n",
-    "    (1) Boolean list indicating a bail decision (bail = True).\n",
-    "    '''\n",
+    "    t_1 = si.dblquad(lambda x, z: integrable(1, x, z), 0, 1, lambda x: 0, lambda x: 1)\n",
+    "\n",
+    "    # P(y=0|t=0, x) * p(t=0|r, x) * p(x)\n",
+    "    # Now equal to 0, hence commented out. See data generation.\n",
+    "    t_0 = si.dblquad(lambda x, z: integrable(0, x, z), 0, 1, lambda x: 0, lambda x: 1)\n",
+    "\n",
+    "    return t_1[0]  #+ t_0[0]\n",
+    "\n",
+    "\n",
+    "r, x, z, t, y = generateDataWithZ()\n",
    "\n",
-    "    predictions_train = y_model.predict_proba(x_train)[:, 0]\n",
+    "# Fit predictive models.\n",
+    "lr_t = LogisticRegression(solver='lbfgs')\n",
+    "lr_y = LogisticRegression(solver='lbfgs')\n",
+    "\n",
+    "lr_t = lr_t.fit(np.array([r, x]).T, t)\n",
+    "lr_y = lr_y.fit(np.array([t[t == 1], x[t == 1]]).T, y[t == 1])\n",
+    "\n",
+    "\n",
+    "def causal_effect_of_R_on_Y(r):\n",
    "\n",
-    "    predictions_test = y_model.predict_proba(x_test)[:, 0]\n",
+    "    # Equal to integration of P(Y=0|T=0, X=x) * P(T=0|R=r, X=x) * f(x)\n",
+    "    # from 0 to 1\n",
+    "    print(\"Bias:\",\n",
+    "        si.quad(\n",
+    "            lambda x: lr_y.predict_proba(np.array([[0, x]]))[0, 0] * lr_t.\n",
+    "            predict_proba(np.array([[r, x]]))[0, 0], 0, 1))\n",
    "\n",
-    "    return [\n",
-    "        scs.percentileofscore(predictions_train, pred, kind='weak') < r\n",
-    "        for pred in predictions_test\n",
-    "    ]\n",
+    "    # Integrate P(Y=0|T=1, X=x) * P(T=1|R=r, X=x) * f(x) from 0 to 1\n",
+    "    return (si.quad(\n",
+    "        lambda x: lr_y.predict_proba(np.array([[1, x]]))[0, 0] * lr_t.\n",
+    "        predict_proba(np.array([[r, x]]))[0, 1], 0, 1))\n",
    "\n",
    "\n",
-    "recidivated = y_ == 0\n",
+    "r0 = causal_effect_of_R_on_Y(0)\n",
+    "r1 = causal_effect_of_R_on_Y(1)\n",
    "\n",
-    "for r__ in range(101):\n",
-    "    released_for_bail = bailIndicator(r__, lr_y, np.array([t, x]).T, np.array([t_, x_]).T)\n",
+    "analytical = analytic_R_on_Y(1, a, b, c, d, e, f) - analytic_R_on_Y(0, a, b, c, d, e, f)\n",
    "\n",
-    "    print(np.sum(recidivated & released_for_bail) / y_.shape[0])"
+    "print(\"Analytical:\", analytical)\n",
+    "print(\"Estimated: \", r1[0] - r0[0])\n",
+    "print(\"Difference:\", analytical - r1[0] + r0[0])\n",
+    "print()\n",
+    "print(\"Values for P(y=0|do(r=1)) and P(y=0|do(r=0))\\n\")\n",
+    "print(\"Analytical:\", analytic_R_on_Y(1, a, b, c, d, e, f),\n",
+    "      analytic_R_on_Y(0, a, b, c, d, e, f))\n",
+    "print(\"Estimated: \", r1[0], r0[0])"
   ]
  }
 ],

 %% Cell type:code id: tags:

 ``` python
 # Imports

 import numpy as np
 import pandas as pd
 from datetime import datetime
 import matplotlib.pyplot as plt
 import scipy.stats as scs
 import scipy.integrate as si
 import seaborn as sns
 import numpy.random as npr
 from sklearn.preprocessing import OneHotEncoder
 from sklearn.linear_model import LinearRegression, LogisticRegression
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import PolynomialFeatures


 # Settings

 %matplotlib inline

 plt.rcParams.update({'font.size': 16})
 plt.rcParams.update({'figure.figsize': (14, 7)})

 # Suppress deprecation warnings.

 import warnings


 def fxn():
    warnings.warn("deprecated", DeprecationWarning)


 with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    fxn()

 def sigmoid(x):
    '''Return value of sigmoid function (inverse of logit) at x.'''

    return 1 / (1 + np.exp(-x))
 ```

 %% Cell type:markdown id: tags:

 In the code below

 $$P(Y=0|do(R=r))=\int_x P(Y=0|T=1, X=x)P(T=1|R=r, X=x)f_x(x)~dx$$

 where $f_x(x)$ is the pdf of x. (Now $X \sim U(0, 1)$, so $f_x(x)=1$.)

 %% Cell type:code id: tags:

 ``` python
 ###
-# Synthetic data
+# Synthetic data 1

 # R ~ U(0,1)
 # X ~ N(0,1)
 # T ~ Bin(1, sigmoid(a * r + b * x))
 # Y ~ Bin(1, sigmoid(c * t + d * x))

 # Weights:
 # R -> T: a
 # X -> T: b
 # T -> Y: c
 # X -> Y: d

 N = 12000
 a = 1
 b = 1
 c = 1
 d = 1


 def generateData(N=N, a=a, b=b, c=c, d=d):

    r = npr.uniform(size=N)
    x = npr.uniform(size=N)

    t = npr.binomial(n=1, p=sigmoid(a * r + b * x), size=N)

    y = npr.binomial(n=1, p=sigmoid(c * t + d * x), size=N)

    y[t == 0] = 1

    return r, x, t, y


 def analytic_R_on_Y(r, a, b, c, d):
    '''Return probability of Y=0 with given parameters.'''

    # P(y=0|t=1, x) * p(t=1|r, x) * p(x)
    t_1 = si.quad(
        lambda x: (1 - sigmoid(c * 1 + d * x)) * sigmoid(a * r + b * x) * 1, 0,
        1)

    # P(y=0|t=0, x) * p(t=0|r, x) * p(x)
    # Now equal to 0, hence commented out. See data generation.
    t_0 = si.quad(
        lambda x: (1 - sigmoid(c * 0 + d * x)) * (1 - sigmoid(a * r + b * x)) *
        1, 0, 1)

    return t_1[0]  #+ t_0[0]


 r, x, t, y = generateData()

 # Fit predictive models.
 lr_t = LogisticRegression(solver='lbfgs')
 lr_y = LogisticRegression(solver='lbfgs')

 lr_t = lr_t.fit(np.array([r, x]).T, t)
 lr_y = lr_y.fit(np.array([t[t == 1], x[t == 1]]).T, y[t == 1])


 def causal_effect_of_R_on_Y(r):

    # Equal to integration of P(Y=0|T=0, X=x) * P(T=0|R=r, X=x) * f(x)
    # from 0 to 1
    print("Bias:",
        si.quad(
            lambda x: lr_y.predict_proba(np.array([[0, x]]))[0, 0] * lr_t.
            predict_proba(np.array([[r, x]]))[0, 0], 0, 1))

    # Integrate P(Y=0|T=1, X=x) * P(T=1|R=r, X=x) * f(x) from 0 to 1
    return (si.quad(
        lambda x: lr_y.predict_proba(np.array([[1, x]]))[0, 0] * lr_t.
        predict_proba(np.array([[r, x]]))[0, 1], 0, 1))


 r0 = causal_effect_of_R_on_Y(0)
 r1 = causal_effect_of_R_on_Y(1)

 analytical = analytic_R_on_Y(1, a, b, c, d) - analytic_R_on_Y(0, a, b, c, d)

 print("Analytical:", analytical)
 print("Estimated: ", r1[0] - r0[0])
 print("Difference:", analytical - r1[0] + r0[0])
 print()
 print("Values for P(y=0|do(r=1)) and P(y=0|do(r=0))\n")
 print("Analytical:", analytic_R_on_Y(1, a, b, c, d),
      analytic_R_on_Y(0, a, b, c, d))
 print("Estimated: ", r1[0], r0[0])
 ```

 %% Output

-    Bias: (0.07558160888120039, 8.391244241812189e-16)
-    Bias: (0.03647337206409673, 4.0493577451280163e-16)
+    Bias: (0.07131297477417739, 7.917330654880471e-16)
+    Bias: (0.035957051207116175, 3.9920346147766154e-16)
    Analytical: 0.03709585053394618
-    Estimated:  0.03910840191792689
-    Difference: -0.0020125513839807097
+    Estimated:  0.035378945320598335
+    Difference: 0.001716905213347844
    
    Values for P(y=0|do(r=1)) and P(y=0|do(r=0))
    
    Analytical: 0.14973849934787756 0.11264264881393138
-    Estimated:  0.15185984231839383 0.11275144040046695
+    Estimated:  0.1432745402973986 0.10789559497680028

 %% Cell type:code id: tags:

 ``` python
-r_, x_, t_, y_ = generateData()
+###
+# Synthetic data 2 - with unobservable Z
+
+# R ~ U(0,1)
+# X ~ N(0,1)
+# T ~ Bin(1, sigmoid(a * r + b * x))
+# Y ~ Bin(1, sigmoid(c * t + d * x))
+
+# Weights:
+# R -> T: a
+# X -> T: b
+# T -> Y: c
+# X -> Y: d
+# Z -> T: e
+# Z -> Y: f
+
+N = 12000
+a = 1
+b = 1
+c = .5
+d = 1
+e = 1
+f = 1
+
+def generateDataWithZ(N=N, a=a, b=b, c=c, d=d, e=e, f=f):
+
+    r = npr.uniform(size=N)
+    x = npr.uniform(size=N)
+
+    z = npr.uniform(size=N)
+
+    t = npr.binomial(n=1, p=sigmoid(a * r + b * x + e * z), size=N)
+
+    y = npr.binomial(n=1, p=sigmoid(c * t + d * x + f * z), size=N)
+
+    y[t == 0] = 1
+
+    return r, x, z, t, y
+
+
+def analytic_R_on_Y(r, a, b, c, d, e, f):
+    '''Return probability of Y=0 with given parameters.'''
+
+    # P(y=0|t=1, x) * p(t=1|r, x) * p(x)
+    def integrable(t, x, z):
+        p_y0 = 1 - sigmoid(c * t + d * x + f * z)

+        if t == 1:
+            p_t = sigmoid(a * r + b * x + e * z)
+        elif t == 0:
+            p_t = 1 - sigmoid(a * r + b * x + e * z)

-def bailIndicator(r, y_model, x_train, x_test):
-    '''
-    Indicator function for whether a judge will bail or jail a suspect.
+        p_x = 1

-    Algorithm:
-    ----------
+        return p_y0 * p_t * p_x

-    (1) Calculate recidivism probabilities from training set with a trained
-        model and assign them to predictions_train.
+    t_1 = si.dblquad(lambda x, z: integrable(1, x, z), 0, 1, lambda x: 0, lambda x: 1)

-    (2) Calculate recidivism probabilities from test set with the trained
-        model and assign them to predictions_test.
+    # P(y=0|t=0, x) * p(t=0|r, x) * p(x)
+    # Now equal to 0, hence commented out. See data generation.
+    t_0 = si.dblquad(lambda x, z: integrable(0, x, z), 0, 1, lambda x: 0, lambda x: 1)
+
+    return t_1[0]  #+ t_0[0]

-    (3) Construct a cumulative distribution function of the probabilities in
-        in predictions_train.

-    (4)
-    For pred in predictions_test:
+r, x, z, t, y = generateDataWithZ()
+
+# Fit predictive models.
+lr_t = LogisticRegression(solver='lbfgs')
+lr_y = LogisticRegression(solver='lbfgs')

-        if pred belongs to a percentile (computed from step (3)) lower than r
-            return True
-        else
-            return False
+lr_t = lr_t.fit(np.array([r, x]).T, t)
+lr_y = lr_y.fit(np.array([t[t == 1], x[t == 1]]).T, y[t == 1])

-    Returns:
-    --------
-    (1) Boolean list indicating a bail decision (bail = True).
-    '''

-    predictions_train = y_model.predict_proba(x_train)[:, 0]
+def causal_effect_of_R_on_Y(r):

-    predictions_test = y_model.predict_proba(x_test)[:, 0]
+    # Equal to integration of P(Y=0|T=0, X=x) * P(T=0|R=r, X=x) * f(x)
+    # from 0 to 1
+    print("Bias:",
+        si.quad(
+            lambda x: lr_y.predict_proba(np.array([[0, x]]))[0, 0] * lr_t.
+            predict_proba(np.array([[r, x]]))[0, 0], 0, 1))

-    return [
-        scs.percentileofscore(predictions_train, pred, kind='weak') < r
-        for pred in predictions_test
-    ]
+    # Integrate P(Y=0|T=1, X=x) * P(T=1|R=r, X=x) * f(x) from 0 to 1
+    return (si.quad(
+        lambda x: lr_y.predict_proba(np.array([[1, x]]))[0, 0] * lr_t.
+        predict_proba(np.array([[r, x]]))[0, 1], 0, 1))


-recidivated = y_ == 0
+r0 = causal_effect_of_R_on_Y(0)
+r1 = causal_effect_of_R_on_Y(1)

-for r__ in range(101):
-    released_for_bail = bailIndicator(r__, lr_y, np.array([t, x]).T, np.array([t_, x_]).T)
+analytical = analytic_R_on_Y(1, a, b, c, d, e, f) - analytic_R_on_Y(0, a, b, c, d, e, f)

-    print(np.sum(recidivated & released_for_bail) / y_.shape[0])
+print("Analytical:", analytical)
+print("Estimated: ", r1[0] - r0[0])
+print("Difference:", analytical - r1[0] + r0[0])
+print()
+print("Values for P(y=0|do(r=1)) and P(y=0|do(r=0))\n")
+print("Analytical:", analytic_R_on_Y(1, a, b, c, d, e, f),
+      analytic_R_on_Y(0, a, b, c, d, e, f))
+print("Estimated: ", r1[0], r0[0])
 ```

 %% Output

-    0.0
-    0.0006666666666666666
-    0.00125
-    0.0015833333333333333
-    0.0025833333333333333
-    0.0035
-    0.004583333333333333
-    0.006333333333333333
-    0.007416666666666667
-    0.007833333333333333
-    0.008916666666666666
-    0.00925
-    0.009916666666666667
-    0.010833333333333334
-    0.012333333333333333
-    0.0135
-    0.01425
-    0.015166666666666667
-    0.016
-    0.017
-    0.017666666666666667
-    0.01875
-    0.02
-    0.021083333333333332
-    0.022416666666666668
-    0.0235
-    0.024083333333333335
-    0.025166666666666667
-    0.026166666666666668
-    0.02775
-    0.029
-    0.029916666666666668
-    0.03158333333333333
-    0.03283333333333333
-    0.03383333333333333
-    0.034833333333333334
-    0.036
-    0.037
-    0.03808333333333333
-    0.03941666666666667
-    0.04058333333333333
-    0.042083333333333334
-    0.043666666666666666
-    0.044583333333333336
-    0.046
-    0.04716666666666667
-    0.04841666666666666
-    0.050166666666666665
-    0.0515
-    0.053
-    0.05491666666666667
-    0.05575
-    0.05708333333333333
-    0.058166666666666665
-    0.059416666666666666
-    0.06091666666666667
-    0.06233333333333333
-    0.06383333333333334
-    0.06491666666666666
-    0.06575
-    0.06725
-    0.06891666666666667
-    0.07
-    0.07141666666666667
-    0.07283333333333333
-    0.07425
-    0.07691666666666666
-    0.07791666666666666
-    0.07933333333333334
-    0.0805
-    0.08225
-    0.08425
-    0.08541666666666667
-    0.08666666666666667
-    0.08841666666666667
-    0.08975
-    0.09141666666666666
-    0.09266666666666666
-    0.09391666666666666
-    0.095
-    0.09633333333333334
-    0.09791666666666667
-    0.09916666666666667
-    0.10066666666666667
-    0.10158333333333333
-    0.10358333333333333
-    0.10466666666666667
-    0.10608333333333334
-    0.1075
-    0.10908333333333334
-    0.11033333333333334
-    0.11216666666666666
-    0.1135
-    0.11525
-    0.1175
-    0.12008333333333333
-    0.12133333333333333
-    0.12308333333333334
-    0.12475
-    0.12741666666666668
-    0.12941666666666668
+    Bias: (0.048298659980495415, 5.362228436888762e-16)
+    Bias: (0.02685101277580984, 2.981061261820832e-16)
+    Analytical: 0.030762705619782782
+    Estimated:  0.021455460896022127
+    Difference: 0.009307244723760655
+    
+    Values for P(y=0|do(r=1)) and P(y=0|do(r=0))
+    
+    Analytical: 0.16344054312353337 0.1326778375037506
+    Estimated:  0.15963912402042202 0.1381836631243999