diff --git a/.gitignore b/.gitignore index cde006900de80075ac87a6047ad3d26dfd695f1c..7bf42561d7a277cd5e24d9a6a9839fc01e917321 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ .DS_Store .RData .Rhistory -.ipynb_checkpoints +analysis_and_scripts/.ipynb_checkpoints sources_ignored \ No newline at end of file diff --git a/Kandi.pdf b/Kandi.pdf index 9eda4ee7669d08ec9b9b8b9f64c52a787a3e7d58..1facc56b54cca32436a80631a974e6dc84ad3841 100644 Binary files a/Kandi.pdf and b/Kandi.pdf differ diff --git a/Kandi.synctex.gz b/Kandi.synctex.gz index f312ea141e71cb34ffc0370ee740a942515eff00..dc722e5833bb2d2381319c53779cb24561b2a1e9 100644 Binary files a/Kandi.synctex.gz and b/Kandi.synctex.gz differ diff --git a/Kandi.tex b/Kandi.tex index 4b597c1c3a5eb59745775b37747ac76119441434..c95be60260ea7dabab50d5be22a06b74e5b304e1 100644 --- a/Kandi.tex +++ b/Kandi.tex @@ -44,6 +44,7 @@ \newcommand{\pr}{\mathbb{P}} % tn merkki \newcommand{\D}{\mathcal{D}} % aineisto \newcommand{\s}{\mathcal{S}} % tn merkki +\newcommand{\M}{\mathcal{M}} % tn merkki \newcommand{\R}{\mathbb{R}} \newcommand{\C}{\mathbb{C}} @@ -74,8 +75,8 @@ \addtolength{\voffset}{0.45cm} \addtolength{\textheight}{-0.9cm} -\title{Kandidaatin tutkielma\\ {\Large Rikoksenuusinnan ennustaminen kausaalipäättelyllä}} % Parempi otsikko -\author{Riku Laine\\ Valtiotieteellinen tiedekunta, Helsingin yliopisto} +\title{Kandidaatintutkielma\\ {\Large Kausaalipäättely ja valikoitumisharha}} % Parempi otsikko +\author{Riku Laine\\ Valtiotieteellinen tiedekunta \\ Helsingin yliopisto} \date{\today} %%%%%%%%%%%%%% @@ -103,7 +104,7 @@ Tämän tutkielman on tarkastanut XYZ. Haluan kiittää kaikkia edellä mainittu \bigskip -\rightline{Helsingissä \today} +\rightline{Helsingissä \today,} \rightline{Riku Laine} \bigskip @@ -114,56 +115,35 @@ Tämän tutkielman on tarkastanut XYZ. Haluan kiittää kaikkia edellä mainittu \noindent I would like to wholeheartedly thank assistant professor Michael Mathioudakis from University of Helsinki's Department of Computer Science for numerous things. He provided me this extremely interesting thesis topic and provided insightful and encouraging comments throughout the process. Antti Hyttinen from the same department also gave important insight in the causal modelling and commented on the content. - -%%%%%%%%% -%%%%%%%%% -%%%%%%%%% - -\chapter{Tiivistelmä -- Kypsyysnäyte?}\label{tiiv} - -% refillä pelkät numerot - -\emph{\nameref{johd}}-luvussa esittelen ongelman asettelun ja tilanteen yleisen viitekehyksen. Keskustelemme rikoksenuusinnan ennustamisesta yhdysvaltalaisessa oikeusjärjestelmässä. Esitän kappaleessa yleisen kuvauksen takuukäsittelyn etenemisestä oikeusprosessina, jonka jälkeen pohdin hieman takuukäsittelyn yhteiskunnallista merkitystä ja motivaatiota hyvään ennusteeseen. Kappaleen lopussa kirjoitan hieman kausaalipäättelystä uutena tilastotieteellisenä paradigmana \cite{pearl10}. - -Kappaleessa \emph{\ref{aineisto}} esittelen käyttämäni aineistolähteet ja niiden ominaispiirteet. Esitän COMPAS-tietojen ominaispiirteet ja \emph{jotain muuta}. Esitän myös kuinka olen luonut analyyseissä myöhemmin käytettävän aineistosetin mukaillen Lakkarajun vuoden 2017 konferenssijulkaisua \cite{lakkaraju17}. - -\emph{\nameref{metodit}}-kappaleessa esitän käyttämäni mallit ja menetelmät. Teen lyhyen katsauksen aikaisempaan kirjallisuuteen ja tutkimuksiin tällä sovellusalalla. Käyn lisäksi läpi tässä tutkielmassa myöhemmin käytettäviä matemaattisia sekä verkkoteoreettisia merkintöjä ja määritelmiä. Teen joitakin osoituksia ja osoitan kuinka mallimme ei riipu havaitsemattomista (unobservables) muuttujista. % Mallin robustius? - -Luvussa \emph{\ref{tulokset}} esitän algoritmillani saavuttamani tulokset ja vertailen niitä Lakkarajun \cite{lakkaraju17} saavuttamiin. Olen eritellyt erillisiin alalukuihin synteettisellä ja COMPAS-aineistoseteillä saavutetut tulokset. - -Viimeisessä kappaleessa \emph{\nameref{diskussio}} esitän mallien ja tutkielmani virhelähteet ja muut ongelmat sekä keskustelen tulosten mahdollisesta vaikutuksesta, sikäli niitä sovellettaisiin sikäläisen oikeuslaitoksen toimintaan. - -%%%%%%%%% -%%%%%%%%% -%%%%%%%%% - \chapter{Johdanto}\label{johd} -Tässä kappaleessa esittelen tutkielman taustaa ja yhdysvaltalaisen oikeuslaitoksen takuukäsittelyprosessin yleisellä tasolla. Sen jälkeen paneudun hieman vangitsemispäätöksen yhteiskunnalliseen merkitykseen: minkä takia ihmisiä vangitaan ja mitä perusteita on vangitsemattajättämispäätökselle. Pyrin luvun aikana myös hieman selvittämään takuujärjestelmän käyttöä Suomessa ja kappaleen lopussa pohdin hieman kausaalipäättelyä paradigman muutoksena tilastotieteen kentällä. Jätän kuitenkin tarvittavien merkintöjen esittämisen kappaleeseen \emph{\nameref{kausaalimerk}} ja mallin esittelyn \emph{\nameref{kausaalimalli}}-lukuun. +Tämän tutkielman tavoitteena on luoda kausaalipäättelyn avulla algoritmi, jolla voimme arvioida ennustavien mallien tarkkuutta, kun käytettävissä on ainoastaan valikoitumisharhasta kärsivää aineistoa. Samankaltaista asetelmaa ovat julkaisuissaan käsitelleet muun muassa Lakkaraju ja Madras \cite{lakkaraju17, madras18}. Pyrin tutkielmassani luomaan joustavamman ja tarkemman vaihtoehdon Lakkarajun luomalle supistusalgoritmille, mutta esitän ensin yleistä taustaa kausaalipäättelystä ja valikoitumisharhasta. -% https://julkaisut.valtioneuvosto.fi/bitstream/handle/10024/76171/omkm_2009_2.pdf - -%%%%%%%%% - -\section{Takuukäsittely prosessina}\label{pros} +%Tässä kappaleessa esittelen tutkielman taustaa ja yhdysvaltalaisen oikeuslaitoksen takuukäsittelyprosessin yleisellä tasolla. Sen jälkeen paneudun hieman vangitsemispäätöksen yhteiskunnalliseen merkitykseen: minkä takia ihmisiä vangitaan ja mitä perusteita on vangitsemattajättämispäätökselle. Pyrin luvun aikana myös hieman selvittämään takuujärjestelmän käyttöä Suomessa ja kappaleen lopussa pohdin hieman kausaalipäättelyä paradigman muutoksena tilastotieteen kentällä. Jätän kuitenkin tarvittavien merkintöjen esittämisen kappaleeseen \emph{\nameref{kausaalimerk}} ja mallin esittelyn \emph{\nameref{kausaalimalli}}-lukuun. -% Johdanto, yhdysvallat, Suomi, kritiikki - -Yhdysvalloissa, kuten monissa muissa anglosaksisissa maissa, on käytössä järjestelmä, jota nimitetään takuu- tai vakuusjärjestelmäksi. Takuujärjestelmä on epäillyn vaihtoehto tutkintavankeudelle hänen odottaessaan oikeudenkäyntiä ja Yhdysvalloissa oikeus takuuseen periytyy maan perustamisen ajalta \cite{okm, zaniewski14}. Suomen oikeus- ja sisäasiainministeriön alaisen esitutkinta- ja pakkokeinotoimikunnan mukaan takuujärjestelmiä on kolmenlaisia: kahdessa niistä epäilty maksaa itse käteisellä vakuuden tai asettaa omaisuuttaan vakuudeksi ja kolmannessa jokin ulkopuolinen taho ''menee takuuseen epäillyn velvollisuuksien täyttämisestä'' \cite{okm}. - -Yhdysvalloissa epäillyn pidätyksen jälkeen hänet viedään paikallisen oikeusviranomaisen järjestämään takuukuulemiseen (bail hearing) \cite{zaniewski14}. Kuulemisessa päätetään takuun myöntämisestä, eli voidaanko epäilty vapauttaa, vai halutaanko hänet asettaa vankeuteen ennen oikeudenkäyntiä. Kuulemisessa päätetään myös mahdollisen takuun määrästä sekä vapauttamisen ehdoista \cite{zaniewski14}. Takuu voidaan suorittaa taattuna tai takaamattomana maksusitoumuksena tai maksaa suoraan -- erityistapauksissa epäilty voidaan vapauttaa myös pelkällä kirjallisella sitoumuksella (release on personal recognizance (ROR)) \cite{zaniewski14}. - -% Tilastoja? +% https://julkaisut.valtioneuvosto.fi/bitstream/handle/10024/76171/omkm_2009_2.pdf %%%%%%%%% -\section{Yhteiskunnallinen merkitys ja kritiikki}\label{ykmerk} - -Zaniewski toteaa lyhyessä kirjallisuuskatsauksessaan, että takuujärjestelmän vuoden 1982 uudistus ei onnistunut laskemaan tarpeettomia vangitsemisia -- päinvastoin niiden suhteellinen määrä kaksinkertaistui 22\%:sta 49\%:iin vuodesta 1984 vuoteen 2007. Nykyisellään sikäläinen oikeusjärjestelmä suosii suoraan rahalla maksettavia tai taatuilla maksusitoumuksilla hoidettuja takuita, mikä asettaa huonossa taloustilanteessa olevat epäillyt eri tilanteeseen. \cite{zaniewski14} - -Suomessa vakuusjärjestelmää ei ole käytetty, vaikka aiemmin mainittu toimikunta toteaakin sen sisältyvän tullilain 44 §:ään. Kyseisessä pykälässä ''- - säädetään mahdollisuudesta asettaa pidätetyn tai vangitun vapaaksi päästämi[s]en ehdoksi, että hän asettaa vakuuden, jonka harkitaan takaavan hänen saapumisensa oikeudenkäyntiin ja ehkä tuomittavien seuraamusten suorittamisen''. Kuten he tarkentavat, lisäksi usein edellytetään, että epäilty ei asu Suomessa, ja epäillään hänen pakenevan maasta ennen oikeudenkäyntiä tai rangaistusta \cite{okm}. Sekä yhdysvaltalaiselle että suomalaiselle järjestelmälle on yhteistä, että takuu tuomitaan menetettäväksi valtiolle, jos vapauden ehtoja rikotaan. - -Kritiikkiä on esitetty molemmissa maissa osaltaan samoihin asioihin. Suomessa pykälää ei ole sovellettu, koska luultavasti sen tulkintaohjeet ovat niin niukat, kuten myös sääntely \cite{okm}. Yhdistävänä kritiikkinä sekä Zaniewski että esitutkinta- ja pakkokeinotoimikunta mainitsevat muun muassa sen, kuinka takuumaksujen toimeenpano vaikuttaa tai Suomen tapauksessa vaikuttaisi pienituloisten taloustilanteeseen \cite{zaniewski14, okm}. Suomalainen toimikunta esittää lisäksi monia muitakin ongelmakohtia, sikäli takuujärjestelmä haluttaisiin ottaa Suomessa käyttöön, esimerkkinä he toteavat, että vakuusmaksujen maksamiseen tulisi todennäköisesti liittymään ''epätoivottavia lieveilmiöitä'' \cite{okm}. Tähän ongelmaan on Yhdysvalloissa jo osittain reagoitukin, sillä esimerkiksi Californian osavaltio päätti viime vuonna poistaa takuumaksut käytöstä \cite{cnn}. +%\section{Takuukäsittely prosessina}\label{pros} +% +%% Johdanto, yhdysvallat, Suomi, kritiikki +% +%Yhdysvalloissa, kuten monissa muissa anglosaksisissa maissa, on käytössä järjestelmä, jota nimitetään takuu- tai vakuusjärjestelmäksi. Takuujärjestelmä on epäillyn vaihtoehto tutkintavankeudelle hänen odottaessaan oikeudenkäyntiä ja Yhdysvalloissa oikeus takuuseen periytyy maan perustamisen ajalta \cite{okm, zaniewski14}. Suomen oikeus- ja sisäasiainministeriön alaisen esitutkinta- ja pakkokeinotoimikunnan mukaan takuujärjestelmiä on kolmenlaisia: kahdessa niistä epäilty maksaa itse käteisellä vakuuden tai asettaa omaisuuttaan vakuudeksi ja kolmannessa jokin ulkopuolinen taho ''menee takuuseen epäillyn velvollisuuksien täyttämisestä'' \cite{okm}. +% +%Yhdysvalloissa epäillyn pidätyksen jälkeen hänet viedään paikallisen oikeusviranomaisen järjestämään takuukuulemiseen (bail hearing) \cite{zaniewski14}. Kuulemisessa päätetään takuun myöntämisestä, eli voidaanko epäilty vapauttaa, vai halutaanko hänet asettaa vankeuteen ennen oikeudenkäyntiä. Kuulemisessa päätetään myös mahdollisen takuun määrästä sekä vapauttamisen ehdoista \cite{zaniewski14}. Takuu voidaan suorittaa taattuna tai takaamattomana maksusitoumuksena tai maksaa suoraan -- erityistapauksissa epäilty voidaan vapauttaa myös pelkällä kirjallisella sitoumuksella (release on personal recognizance (ROR)) \cite{zaniewski14}. +% +%% Tilastoja? +% +%%%%%%%%%% +% +%\section{Yhteiskunnallinen merkitys ja kritiikki}\label{ykmerk} +% +%Zaniewski toteaa lyhyessä kirjallisuuskatsauksessaan, että takuujärjestelmän vuoden 1982 uudistus ei onnistunut laskemaan tarpeettomia vangitsemisia -- päinvastoin niiden suhteellinen määrä kaksinkertaistui 22\%:sta 49\%:iin vuodesta 1984 vuoteen 2007. Nykyisellään sikäläinen oikeusjärjestelmä suosii suoraan rahalla maksettavia tai taatuilla maksusitoumuksilla hoidettuja takuita, mikä asettaa huonossa taloustilanteessa olevat epäillyt eri tilanteeseen. \cite{zaniewski14} +% +%Suomessa vakuusjärjestelmää ei ole käytetty, vaikka aiemmin mainittu toimikunta toteaakin sen sisältyvän tullilain 44 §:ään. Kyseisessä pykälässä ''- - säädetään mahdollisuudesta asettaa pidätetyn tai vangitun vapaaksi päästämi[s]en ehdoksi, että hän asettaa vakuuden, jonka harkitaan takaavan hänen saapumisensa oikeudenkäyntiin ja ehkä tuomittavien seuraamusten suorittamisen''. Kuten he tarkentavat, lisäksi usein edellytetään, että epäilty ei asu Suomessa, ja epäillään hänen pakenevan maasta ennen oikeudenkäyntiä tai rangaistusta \cite{okm}. Sekä yhdysvaltalaiselle että suomalaiselle järjestelmälle on yhteistä, että takuu tuomitaan menetettäväksi valtiolle, jos vapauden ehtoja rikotaan. +% +%Kritiikkiä on esitetty molemmissa maissa osaltaan samoihin asioihin. Suomessa pykälää ei ole sovellettu, koska luultavasti sen tulkintaohjeet ovat niin niukat, kuten myös sääntely \cite{okm}. Yhdistävänä kritiikkinä sekä Zaniewski että esitutkinta- ja pakkokeinotoimikunta mainitsevat muun muassa sen, kuinka takuumaksujen toimeenpano vaikuttaa tai Suomen tapauksessa vaikuttaisi pienituloisten taloustilanteeseen \cite{zaniewski14, okm}. Suomalainen toimikunta esittää lisäksi monia muitakin ongelmakohtia, sikäli takuujärjestelmä haluttaisiin ottaa Suomessa käyttöön, esimerkkinä he toteavat, että vakuusmaksujen maksamiseen tulisi todennäköisesti liittymään ''epätoivottavia lieveilmiöitä'' \cite{okm}. Tähän ongelmaan on Yhdysvalloissa jo osittain reagoitukin, sillä esimerkiksi Californian osavaltio päätti viime vuonna poistaa takuumaksut käytöstä \cite{cnn}. %Kritiikkiä on esitetty niin itse takuun rahallisesta määrästä (lähde?) kuin perusteista (propublica). @@ -174,69 +154,87 @@ Kritiikkiä on esitetty molemmissa maissa osaltaan samoihin asioihin. Suomessa p \section{''Kausaalipäättely uutena paradigmana''}\label{para} % miksi halutaan siirtyä (frekventistisen/bayes-ppäättelyn ongelmat), edut, esiintyminen, erot, käyttö -Kuten Pearl ja Mackenzie esittävät kirjassaan Miksi, ihmisillä on luontainen kausaalisen päättelyn taito \cite{miksi}. Tavalliset tilastollisen päättelyn menetelmät eivät tarjoa tapaa määritellä kausaalista yhteyttä: aineistosta voidaan päätellä erilaisia \emph{korrelaatioita}, mutta päättely \emph{A johtuu B:stä} vaatii uudenlaista lähestymistapaa. Käytännön tutkimuksessa kausaaliset yhteydet kiinnostavat erityisesti lääketieteen alalla. Kuten Kalisch toteaa, aiemmin päättely on perustunut jonkin biomarkkerin ja taudin samanaikaiseen ilmaantumiseen. Jos markkeri ja tauti ilmaantuvat samanaikaisesti, voidaanko markkerin arvoa muuttamalla hoitaa tautia? \cite{kalisch14} - -Syy-seuraussuhteen vahvuuden matemaattinen määrittely vaatii uutta lähestymistä myös todennäköisyyslaskennan merkintöihin. Pearl käyttää alkuperäisessä, englanninkielisessä kirjallisuudessa merkintää 'do' ilmaisemaan interventiota. Merkinnällä halutaan erottaa tavanomainen ehdollinen todennäköisyys $\pr(Y|X=x)$ interventiosta, jossa pakotamme muuttujan $X$ arvoon $x$: $\pr(Y|\text{do}(X=x))$. Kimmo Pietiläinen käyttää kirjan suomennoksessa do-operaattorista käännöstä \emph{tee}, mutta seuraan tässä tutkielmassa Pearlin merkintöjä, ellen erikseen muuta mainitse \cite{miksi}. Esittelen käyttämäni merkinnät tarkemmin kappaleessa \ref{kausaalimerk}. - - -* Esimerkkejä Miksi-kirjasta väärin määritellyistä malleista? Esimerkkejä aloista, joila jo käytetty, oleellisimmat pointit historiasta -%%%%%%%%% - -\section{Valikoitumisharha}\label{sl} +Kuten Pearl ja Mackenzie esittävät kirjassaan Miksi, ihmisillä on luontainen kausaalisen päättelyn taito \cite{miksi}. Tavalliset tilastollisen päättelyn menetelmät eivät tarjoa tapaa määritellä kausaalista yhteyttä: aineistosta voidaan päätellä erilaisia \emph{korrelaatioita}, mutta päättely \emph{A johtuu B:stä} vaatii uudenlaista lähestymistapaa. Käytännön tutkimuksessa kausaaliset yhteydet kiinnostavat erityisesti lääketieteen alalla. Kuten Kalisch toteaa, aiemmin kausaalisuuden päättely on perustunut korrelaatioiden havaitsemiseen. On hypotetisoitu, että biomarkkerin ja taudin samanaikainen ilmaantuminen viittaisi siihen, että markkeri aiheuttaa taudin. Voimmeko siis markkeria käsittelemällä vaikuttaa tautiin tai jopa parantaa se? \cite{kalisch14} -% aiempaa tutkimusta, miten voidaan muissa tutkimuksissa tassoittaa -> Tässä tutkimkssa +Syy-seuraussuhteen matemaattinen määrittely vaatii uutta lähestymistä myös todennäköisyyslaskennan merkintöihin. Pearl käyttää alkuperäisessä, englanninkielisessä kirjallisuudessa merkintää 'do' ilmaisemaan interventiota. Merkinnällä halutaan erottaa tavanomainen ehdollinen todennäköisyys $\pr(Y|X=x)$ interventiosta, jossa asetamme muuttujan $X$ arvoon $x$: $\pr(Y|\text{do}(X=x))$. Kimmo Pietiläinen käyttää kirjan suomennoksessa do-operaattorista käännöstä \emph{tee}, mutta seuraan tässä tutkielmassa Pearlin merkintöjä, ellen erikseen muuta mainitse \cite{miksi}. Alalla käytetään myös muita, alaindekseillä rikastettuja merkintätapoja \cite{pearl10}. Esittelen käyttämäni merkinnät tarkemmin kappaleessa \ref{kausaalimerk}. -Tässä tutkielmassa yritän määrittää rakenteen, jonka avulla voidaan tehdä ennusteita aineston harhaisuudesta huolimatta. Meidän tapauksessamme harha syntyy tuomarien päätöksistä -- jos tuomari päättää evätä epäillyltä takuut, emme voi tehdä havaintoja epäillyn rikoksen uusinnastaan. Tällöin voidaan puhua ei-satunnaisesta puuttuneisuudesta, koska on selvää että tulosten puute ei ole minkäänlaisen satunnaisprosessin tulos: vaarallisimmat rikolliset halutaan ottaa talteen ja vaarattomimmat päästää pois \cite{laaksonen13}. +Kausaalipäättelyssä mallit voidaan esittää graafeina, eli verkkoina. Verkoista voidaan suoraan lukea eri muuttujien relaatiot kausaalisuuden suuntien ja riippuvuuksien suhteen. -Lakkaraju käyttää termiä harhasta \emph{''selective labels''} \cite{lakkaraju17}. - -%%%%%%%%% %%%%%%%%% -%%%%%%%%% - -\chapter{Aineistot}\label{aineisto} -Tässä luvussa kuvaillaan käytetyt aineistot ja niiden ominaispiirteet. +\section{Valikoitumisharha -- seulotun aineiston ongelma}\label{sl} -%%%%%%%%% +Aineiston luova mekanismi on esitetty kuvassa \ref{valikoitumisharha} ja toimii siten, että aluksi jokin henkilö tai muu entiteetti saapuu päätöksentekijän eteen seulottavaksi. Päätöksentekijän tavoitteena on estää haitallinen tulos ($y=0$) pitäen samalla myönteisten päätösten ($t=1$) määrä mahdollisimman pienenä. Seuloja pyrkii siis antamaan kielteisen päätöksen kaikille niille, joilla epätoivottava tulos on todennäköisin. Päätöksen jälkeen henkilö siirtyy vaiheeseen, jossa Kohtalo määrittää hänelle tuloksen $y\in\{0, 1\}$. Kielteisen päätöksen saaneille tulos voidaan merkitä puuttuvaksi tai onnistuneeksi, koska haitallista tapahtumaa ei havaita. -\section{COMPAS}\label{compas} +Aineiston generoivaa mekanismia voidaan havainnollistaa lääke- ja oikeustieteen alan esimerkeillä. Henkilöllä viitataan ensin mainitussa potilaaseen ja jälkimmäisessä epäiltyyn. Seuloja voi olla esimerkiksi lääkäri, joka päättää annetaanko potilaalle vahvempaa ja samalla kalliimpaa lääkettä, jolloin relapsia ei havaita. Oikeudellisessa asetelmassa seulojalla voidaan tarkoittaa tuomaria, joka päättää epäillyn vapauttamisesta takuita vastaan ilman pelkoa rikoksen uusimisesta. Molemmilla päättäjillä on selkeä kannustin estää haitalliset tulokset -- sairauskohtaukset tai rikokset -- pitäen samalla päätöksistä aiheutuvat rasitteet yhteiskunnalle ja yksilöiden elämille mahdollisimman pienenä. -COMPAS-aineisto (Correctional Offender Management Profiling for Alternative Sanctions) on alun perin ProPublica-julkaisun koostama aineisto yhteensä 18 610 amerikkalaisesta. Aineistossa on muun muassa heidän demografiset tiedot, kuten ikä, sukupuoli ja rotu, ja rikoshistoriaan liittyvät tiedot. Oikeammin COMPAS viittaa Northpointe-yhtiön työkaluun, joka antaa arvion epäillyn rikoksenuusintariskistä. Arvio perustuu epäillyn vastauksiin kyselyyn, jossa tiedustellaan hänen taustoistaan, kuten lähipiirin huumeidenkäytöstä ja epäillyn taipumuksesta väkivaltaisuuteen. ProPublica kokosi aineiston alun perin paljastaakseen arvion tuottavan algoritmin mustia syrjivän luonteen. ProPublican analyysi osoitti, että mustat saivat järjestelmällisesti korkeamman riskiarvion kuin valkoihoiset. \cite{propublica16} +Havaintoja voi puuttua erilaisissa tutkimuksissa useista eri syistä. Kyselytutkimuksissa vastauskatoa voi syntyä esimerkiksi vastaajan haluttomuudesta vastata kysymykseen tai yksinkertaisesti siitä syystä, että vastaajaa ei tavoiteta. Jos aineiston puuttuneisuusmekanismi on luonteeltaan täysin satunnainen, eli vastauksen puuttuneisuus ei liity millään tavalla mitattuihin muuttujiin, voidaan sanoa aineistoa puuttuvan \emph{täysin satunnaisesti}. Käänteisessä tapauksessa voidaan puhua \emph{ei-satunnaisesta puuttuvuuudesta}. \cite{laaksonen13} -ProPublica esittää artikkelinsa metodologiaosiossa, kuinka he ovat päätyneet lopulliseen aineistoon, joka käsittää tiedot 6172 henkilöstä. Pääpiirteissään he ovat siistineet aineistoa siten, että se yhdistää oikeat henkilöt oikeisiin pisteytyksiin ja oikeisiin uusintatuomioihin. Joitakin johdettuja mutujia luotiin, kuten tekstuaalinen kuvaus desiilipisteytyksestä scoretext joka ryhmittää etc etc. +Tässä tutkielmassa tarkasteltavasssa asetelmassa havaintojen puuttuminen liittyy sekä havaittuihin että havaitsemattomiin muuttujiin. Puuttuneisuuden voidaan sanoa olevan \emph{satunnaista ehdollisesti}, koska aineistoa puuttuu vain yksilöiltä, joilla on korkea todennäköisyys haitalliseen tulokseen. (Erilaisia aineiston puuttuneisuusmekanismeja esitelllään laajemmin esimerkiksi Laaksosen kirjassa \emph{Surveymetodiikka}.) Puuttuneisuutta voidaan korvata imputoinnilla, jolla yritetään tehdä mahdollisimman hyvä arvaus puuttuvasta arvosta. Todistan tutkielmassani myöhemmin, että kausaalipäättelyä hyödyntämällä voimme estimoida havaitusta, valikoitumisharhaisesta aineistosta haluttuja tunnuslukuja ilman imputointia harhattomasti. \cite{laaksonen13} Englanninkielisessä kirjallisuudessa seulotun aineiston ongelmasta on alettu käyttää Lakkarajun esittämää termiä \emph{selective labels} \cite{lakkaraju17}. % se lähde, missä näin väitettiin -\begin{table}[h!] +\begin{figure}%[H] \centering -\begin{tabular}{lrrrrrrrrrr} -\hline \hline - Muuttujan nimi & $\bar{x}$ & Keskihajonta & Min & 25\% & 50\% & 75\% & Max \\ -\hline \hline - age & 34,5 & 11,7 & 18 & 25 & 31 & 42 & 96 \\ - priors\_count & 3,25 & 4,74 & 0 & 0 & 1 & 4 & 38 \\ \hline - days\_b\_screening\_arrest & -1,74 & 5,08 & -30 & -1 & -1 & -1 & 30 \\ - decile\_score & 4,42 & 2,84 & 1 & 2 & 4 & 7 & 10 \\ - is\_recid & 0,484 & 0,500 & 0 & 0 & 0 & 1 & 1 \\ \hline - two\_year\_recid & 0,455 & 0,498 & 0 & 0 & 0 & 1 & 1 \\ - length\_of\_stay & 14,6 & 46,7 & -1 & 0 & 1 & 5 & 799 \\ -\hline \hline -\end{tabular} -\caption{COMPAS-aineiston numeeristen muuttujien hajontalukuja} -\label{table:1} -\end{table} +\includegraphics[scale = 0.4]{valikoitumis_iso} +\caption{Valikoitumisharha aineiston generoivana mekanismina \cite{lakkaraju17}} +\label{valikoitumisharha} +\end{figure} +%%%%%%%%% +%%%%%%%%% %%%%%%%%% -\section{Synteettinen}\label{synteettinen} - -Synteettinen aineisto luotiin Lakkarajun artikkelissaan selostamalla tavalla \cite{lakkaraju17}. aineistoan simuloitiin kolme muuttujaa $X$, $Z$, ja $W$. Näistä muuttujista $X$ vastaa informaatiota, joka on sekä mallin että tuomarin havaittavissa, eli informaatiota, joka on kirjattu oikeuden pöytäkirjoihin tai on kerättävissä muista rekistereistä, kuten vastaajan sukupuoli. Muuttujalla $Z$ kuvataan tietoa, jonka vain tuomari voi havaita: kuten Lakkaraju havainnollistaa, tällaista voi olla esimerkiksi tieto siitä, onko vastaajalla perhettä mukana oikeussalissa \cite{lakkaraju17}. $W$ on mallissa havainnollistamassa reaalimaailmaa. Muuttujalla esitämme aineistossa informaatiota, joka ei ole saatavilla päätöksentekijöille eikä mallille mutta vaikuttaa silti rikoksenuusimisriskiin. aineistossa nämä ovat kaikki riippumattomia standardinormaalijakautuneita satunnaismuuttujia, eli $X, W, Z \sim N(0, 1) \independent$. - -Yhdistämme henkilöt satunnaisesti kuhunkin $M = 500$ tuomariin, joista jokaiselle määritellään hyväksymisprosentti $r \in [0,1]$. Tuomarin hyväksymisprosentti määritetään ottamalla arvoja tasajakaumasta suljetulta väliltä [0,1; 0,9] ja sitten pyöristämällä ne 10 desimaalin tarkkuuteen. Tulosmuuttuja Y simuloidaan määrittämällä sen ehdollinen todennäköisyys seuraavasti: $\pr(Y=0|X, Z, W)=\frac{1}{1+\text{exp}\{-(\beta_XX+\beta_ZZ+\beta_WW)\}}$, missä kertoimet $\beta_X$, $\beta_Z$ ja $\beta_W$ on asetettu arvoihin 1, 1 ja 0,2 vastaavassa järjestyksessä. \cite{lakkaraju17} +\chapter{Aineiston generointi}\label{aineisto} -Päätösmuuttujan $T$ ehdolinen todennäköisyys $\pr(T=0|X, Z)=\frac{1}{1+\text{exp}\{-(\beta_XX+\beta_ZZ)\}} + \epsilon$ missä $\epsilon \sim N(0, 0,1)$ vastaa pientä määrää kohinaa. Henkilöltä $i$ kielletään takuut, eli $T_i=0$ jos muuttujan $T$ ehdollinen todennäköisyys on tuomarin $j$ suurimman $(1-r)\cdot 100\%$ joukossa. Lopuksi koulutusaineisto suodatettiin siten, että saatavissa oli vain yksilöt, jotka päästettiin vapaaksi $(T=1)$. \cite{lakkaraju17} -\begin{table}[h!] +%%%%%%%%%% +% +%\section{COMPAS}\label{compas} +% +%COMPAS-aineisto (Correctional Offender Management Profiling for Alternative Sanctions) on alun perin ProPublica-julkaisun koostama aineisto yhteensä 18 610 amerikkalaisesta. Aineistossa on muun muassa heidän demografiset tiedot, kuten ikä, sukupuoli ja rotu, ja rikoshistoriaan liittyvät tiedot. Oikeammin COMPAS viittaa Northpointe-yhtiön työkaluun, joka antaa arvion epäillyn rikoksenuusintariskistä. Arvio perustuu epäillyn vastauksiin kyselyyn, jossa tiedustellaan hänen taustoistaan, kuten lähipiirin huumeidenkäytöstä ja epäillyn taipumuksesta väkivaltaisuuteen. ProPublica kokosi aineiston alun perin paljastaakseen arvion tuottavan algoritmin mustia syrjivän luonteen. ProPublican analyysi osoitti, että mustat saivat järjestelmällisesti korkeamman riskiarvion kuin valkoihoiset. \cite{propublica16} +% +%ProPublica esittää artikkelinsa metodologiaosiossa, kuinka he ovat päätyneet lopulliseen aineistoon, joka käsittää tiedot 6172 henkilöstä. Pääpiirteissään he ovat siistineet aineistoa siten, että se yhdistää oikeat henkilöt oikeisiin pisteytyksiin ja oikeisiin uusintatuomioihin. Joitakin johdettuja mutujia luotiin, kuten tekstuaalinen kuvaus desiilipisteytyksestä scoretext joka ryhmittää etc etc. +% +%\begin{table}[H] +%\centering +%\begin{tabular}{lrrrrrrrrrr} +%\hline \hline +% Muuttujan nimi & $\bar{x}$ & Keskihajonta & Min & 25\% & 50\% & 75\% & Max \\ +%\hline \hline +% age & 34,5 & 11,7 & 18 & 25 & 31 & 42 & 96 \\ +% priors\_count & 3,25 & 4,74 & 0 & 0 & 1 & 4 & 38 \\ \hline +% days\_b\_screening\_arrest & -1,74 & 5,08 & -30 & -1 & -1 & -1 & 30 \\ +% decile\_score & 4,42 & 2,84 & 1 & 2 & 4 & 7 & 10 \\ +% is\_recid & 0,484 & 0,500 & 0 & 0 & 0 & 1 & 1 \\ \hline +% two\_year\_recid & 0,455 & 0,498 & 0 & 0 & 0 & 1 & 1 \\ +% length\_of\_stay & 14,6 & 46,7 & -1 & 0 & 1 & 5 & 799 \\ +%\hline \hline +%\end{tabular} +%\caption{COMPAS-aineiston numeeristen muuttujien hajontalukuja} +%\label{table:1} +%\end{table} + +%%%%%%%%% + +%\section{Synteettinen}\label{synteettinen} + +Synteettinen aineisto luotiin Lakkarajun selostamalla tavalla. Aineistoon simuloitiin kolme muuttujaa $X$, $Z$, ja $W$. Näistä muuttujista $X$ vastaa informaatiota, joka on sekä mallin että päätöksentekijän havaittavissa. Käytännössä muuttuja $X$ vastaa kirjallista informaatiota, joka on kirjattu erilaisiin pöytäkirjoihin tai rekistereihin. Muuttujalla $Z$ kuvataan tietoa, jonka vain päätöksentekijä voi havaita: kuten Lakkaraju havainnollistaa, tällaista voi olla oikeudessa esimerkiksi tieto siitä, onko vastaajalla perhettä mukana oikeussalissa. $W$ tuo malliin kohinaa. Muuttujalla esitämme aineistossa informaatiota, joka ei ole saatavilla päätöksentekijöille eikä mallille, mutta vaikuttaa silti epätoivottavan tuloksen riskiin. Aineistossa nämä ovat kaikki riippumattomia standardinormaalijakautuneita satunnaismuuttujia, eli $X, W, Z \sim N(0, 1) \independent$. \cite{lakkaraju17} + +Aineistossa jyvitämme jokaiselle $M=100$ päätöksentekijälle 500 arvioitavaa. Kaikille päättäjille arvotaan hyväksymisprosentti ottamalla arvoja tasajakaumasta suljetulta väliltä [0,1; 0,9] ja sitten pyöristämällä saadut arvot 10 desimaalin tarkkuuteen. Tulosmuuttuja Y määritetään ehdollisen todennäköisyyden +\begin{equation} \label{y_ehd} +\pr(Y=0|X, Z, W)=\dfrac{1}{1+\text{exp}\{-(\beta_XX+\beta_ZZ+\beta_WW)\}} +\end{equation} +mukaisesti. Jos $\pr(Y=0|X, Z, W) \geq 0,5$, tulosmuuttujan arvoksi asetetaan 0 ja vastaavasti jos $\pr(Y=0|X, Z, W) < 0,5$ muuttujan arvoksi asetetaan 1. Lausekkeissa \ref{y_ehd} ja \ref{t_ehd} olevat kertoimet $\beta_X$, $\beta_Z$ ja $\beta_W$ on asetettu arvoihin 1, 1 ja 0,2 vastaavassa järjestyksessä. \cite{lakkaraju17} + +Päätösmuuttuja $T$ määritetään kaksivaiheisesti: ensin määritetään todennäköisyys kielteiselle päätökselle ja sitten muuttujan arvo asetetaan näiden todennäköisyyksien keskinäisen suuruuden mukaisesti. Muuttujan $T$ ehdollinen todennäköisyys +\begin{equation} \label{t_ehd} +\pr(T=0|X, Z)=\frac{1}{1+\text{exp}\{-(\beta_XX+\beta_ZZ)\}} + \epsilon, +\end{equation} +missä $\epsilon \sim N(0, 0,1)$ vastaa pientä määrää kohinaa. Henkilölle $i$ annetaan kielteinen päätös, eli $T_i=0$, jos muuttujan $T$ ehdollinen todennäköisyys on päättäjän $j$ suurimman $(1-r)\cdot 100\%$ joukossa. Toisin sanoen tuomari $j$ antaa myönteisen päätöksen $r$ prosentille hänen arvioitavakseen annetuista henkilöistä, joilla on alin todennäköisyys kielteiseen päätökseen. \cite{lakkaraju17} + +Kun aineisto saatiin simuloitua, se jaettiin koneoppimisen käytäntöjen mukaisesti kahteen yhtä suureen osaan, niin sanottuihin koulutus- ja testiaineistoihin. Lopuksi koulutusaineistoa muokattiin siten, että tulosmuuttujan arvo oli saatavissa vain yksilöille, joille oli annettu positiivinen päätös $(T=1)$. Kielteisen päätöksen saaneille tulosmuuttujan arvo asetettiin arvoon NA, kuten kuvassa \ref{valikoitumisharha}. Syntetisoidun aineiston keskeisimmät hajontaluvut on esitetty taulukossa \ref{synt_hl}. \cite{lakkaraju17} + +\begin{table}[H] \centering \begin{tabular}{lrrrrrrrrrr} \hline \hline @@ -247,12 +245,12 @@ Muuttuja & Keskiarvo & Keskihajonta & Minimi & 25\% & 50\% & Z & 0.01 & 1.00 & -4.85 & -0.67 & 0.00 & 0.68 & 4.24 \\ W & 0.01 & 1.00 & -4.03 & -0.67 & 0.01 & 0.68 & 4.29 \\ result\_Y & 0.50 & 0.50 & 0.00 & 0.00 & 0.00 & 1.00 & 1.00 \\ - probabilities\_T & 0.50 & 0.28 & -0.34 & 0.28 & 0.50 & 0.72 & 1.30 \\ +% probabilities\_T & 0.50 & 0.28 & -0.34 & 0.28 & 0.50 & 0.72 & 1.30 \\ decision\_T & 0.48 & 0.50 & 0.00 & 0.00 & 0.00 & 1.00 & 1.00 \\ \hline \end{tabular} \caption{Synteettisen aineiston muuttujien hajontalukuja} -\label{table:2} +\label{synt_hl} \end{table} %%%%%%%%% @@ -261,63 +259,60 @@ Muuttuja & Keskiarvo & Keskihajonta & Minimi & 25\% & 50\% & \chapter{Menetelmät}\label{metodit} -Tässä kappaleessa selostan analyyseissa, mallinnuksessa ja validoinnissa käyttämäni menetelmät. - -%%%%%%%%% - -\section{Aiemmat tutkimukset?}\label{aiemmat} +Tässä kappaleessa selostan mallin laatimisessa ja arvioinnissa käyttämäni teoreettisen taustan. Koska kausaalinen malli esitetään verkkona, käyn aluksi läpi vaadittavat verkkoteoreettiset määritelmät. Esitän sen jälkeen mallini graafina ja osoitan kausaalisen vaikutuksen olevan identifioituva. -Aiemmat tutkimukset ovat lähestyneet monesta näkökulmasta, mutta ilman kausaatiota. - -%%%%%%%%% - -\section{Validointimetodit}\label{validointi} - -Tulosten arvioinnissa käytetään visuaalista tarkastelua ja XZY. Laskemme arvioista vapaaksi päässeiden uusijoiden suhteen kaikkiin tuomittuihin, eli niin sanotun virhesuhteen (failure rate). - -%%%%%%%%% +%%%%%%%%%% +% +%\section{Aiemmat tutkimukset?}\label{aiemmat} +% +%Aiemmat tutkimukset ovat lähestyneet monesta näkökulmasta, mutta ilman kausaatiota. +% +%%%%%%%%%% +% +%\section{Validointimetodit}\label{validointi} +% +%Tulosten arvioinnissa käytetään +% +%Tulosten arvioinnissa käytetään visuaalista tarkastelua ja XZY. Laskemme arvioista vapaaksi päässeiden uusijoiden suhteen kaikkiin tuomittuihin, eli niin sanotun virhesuhteen (failure rate). +% +%%%%%%%%%% \section{Verkkoteoria}\label{verkot} -Kausaalipäättelyn mallit määritellään verkkoina. Esitän tässä kappaleessa lyhyesti kaikki tarvittavat verkkoteoreettiset määritelmät, joita tulen hyödyntämään. Noudatan määritelmissä Oinosta \cite{oinonen16}. - +Verkot koostuvat \emph{solmuista} ja \emph{kaarista}, joita voidaan havainnollistaa pisteinä ja viivoina tai nuolina näiden pisteiden väliilä. Kaaret ovat järjestettyjä pareja, kuten verkot itsekin, mutta oletan tavallisimmat joukko-opin merkinnät ja käsitteet tunnetuiksi. Noudatan määritelmissä Oinosta \cite{oinonen16} ja erikseen merkityissä kohdissa Kivistä \cite{tira}. % TiRan materiaalit?? % Ota esimerkki verkko ja kirjoita siitä lyhyet havainnollistavat kommentit -\begin{figure}[H]\label{esverkko} +\begin{figure}[H] \centering \includegraphics[scale = 0.5]{full_model} -\caption{Esimerkkiverkko $H = (V, E)$, missä $V = \{R, X, Z, T, Y\}$.} +\caption{Eräs verkko $H = (V, E)$, missä $V = \{R, X, Z, T, Y\}$.} +\label{esverkko} \end{figure} \begin{maar}[Suunnattu verkko] \label{suun_verkko} -\emph{Suunnattu verkko G} on pari $(V, E)$, missä $V \neq \emptyset$ on solmujen joukko ja $$E = \{(a, b) \in V \times V | \text{ solmusta } a \text{ on nuoli solmuun } b \} $$ on \emph{kaarien} joukko. +\emph{Suunnattu verkko G} on pari $(V, E)$, missä $V \neq \emptyset$ on \emph{solmujen} joukko ja $$E = \{(a, b) \in V \times V | \text{ solmusta } a \text{ on nuoli solmuun } b \} $$ on \emph{kaarien} joukko. -\end{maar} +\end{maar} -\noindent Kuvassa \ref{esverkko} näkyvässä verkossa esimerkiksi $(X, R) \in E$, mutta $(T, Z) \notin E$, koska solmusta $T$ ei ole nuolta solmuun $Z$. Lisäksi voidaan todeta, että kaarien joukkoon kuuluu yhdeksän järjestettyä paria ja solmujen joukko $V$ käsittää viisi alkiota, jotka on lueteltu kuvatekstissä. +\smallskip + +\noindent Kuvassa \ref{esverkko} näkyvässä verkossa esimerkiksi $(X, R) \in E$, mutta $(T, Z) \notin E$, koska solmusta $T$ ei ole nuolta solmuun $Z$. Lisäksi voidaan todeta, että kaarien joukkoon kuuluu yhdeksän järjestettyä paria ja solmujen joukko $V$ käsittää viisi alkiota. + +\smallskip \begin{maar} % Lähtösolmu, maalisolmu, vierussolmu Oletetaan, että $G=(V, E)$ on suunnattu verkko ja $a, b \in V$. \\ -\noindent Merkintä $a \rightarrow b$ tarkoittaa, että $(a, b) \in E$. Tällöin sanotaan, että $a$ on kaaren $(a, b)$ \emph{lähtösolmu} ja $b$ on kaaren $(a, b)$ \emph{maalisolmu}. Sanotaan myös, että solmu $b$ on solmun $a$ \emph{vierussolmu}. \\ +\noindent Merkintä $a \rightarrow b$ tarkoittaa, että $(a, b) \in E$. Tällöin sanotaan, että $a$ on kaaren $(a, b)$ \emph{lähtösolmu} ja $b$ on kaaren $(a, b)$ \emph{maalisolmu}. Sanotaan myös, että solmu $b$ on solmun $a$ \emph{vierussolmu} tai että solmut $a$ ja $b$ ovat \emph{vierekkäisiä}. \\ \noindent Jos $(a, a) \in E$, sanotaan suunnatussa verkossa olevan \emph{silmukka} solmussa $a$. \end{maar} -\noindent Esimerkkiverkossa $H$ kaaren $(Z, T)$ lähtösolmu on solmu $Z$ ja maalisolmu solmu $T$. Lisäksi huomataan, että verkossa $H$ ei ole yhtään silmukkaa. - -\begin{maar}[Vierekkäisyys] \label{vierekkaisyys} - -Oletetaan, että $G=(V, E)$ on suunnattu verkko ja $a, b \in V$. \\ - -\noindent Jos solmujen $a$ ja $b$ välillä on nuoli, niin solmujen $a$ ja $b$ sanotaan olevan \emph{vierekkäisiä}. -\end{maar} - -\noindent Kuvan \ref{esverkko} verkosta havaitaan, että melkein kaikki solmut ovat toistensa vierussolmuja. Ainoa poikkeus on solmut $R$ ja $Y$, joiden välillä ei ole nuolta ja jotka eivät siten ole vierekkäisiä. +\noindent Esimerkkiverkossa $H$ kaaren $(Z, T)$ lähtösolmu on solmu $Z$ ja maalisolmu solmu $T$. Lisäksi huomataan, että verkossa $H$ ei ole yhtään silmukkaa. Kuvan \ref{esverkko} verkosta havaitaan, että melkein kaikki solmut ovat toistensa vierussolmuja. Ainoa poikkeus on solmut $R$ ja $Y$, joiden välillä ei ole nuolta ja jotka eivät siten ole vierekkäisiä. \begin{maar}[Yksinkertainen suunnattu verkko] \label{yk_suun_verkko} @@ -330,17 +325,24 @@ Oletetaan, että $G = (V,E)$ on suunnattu verkko, jossa ei ole yhtään silmukka \begin{maar}[Polku ja suunnattu polku] \label{polku} -Oletetaan, että $G$ on yksinkertainen verkko ja $n \in \N, n \geq 1$. \\ +Oletetaan, että $G$ on yksinkertainen verkko ja $n \in \N, n \geq 1$. -\noindent Verkon $G$ solmujen jono $v_1, \ldots, v_n$ on \emph{polku} solmusta $v_1$ solmuun $v_n$, jos jonon jokaisesta solmusta on kaari jonon seuraavaan solmuun. Polkua voidaan merkitä $v_1 \leadsto v_n$. \\ - -\noindent Jos verkko $G$ on suunnattu verkko, $a, b \in V$ ja kaikki polun $a \leadsto b$ kaaret kulkevat kaarien suuntien mukaisesti, voidaan täsmentää, että polku $a \leadsto b$ on \emph{suunnattu polku}. +\begin{enumerate}[(a)] +\item Verkon $G$ solmujen jono $v_1, \ldots, v_n$ on \emph{polku} solmusta $v_1$ solmuun $v_n$, jos jonon jokaisesta solmusta on kaari jonon seuraavaan solmuun. Polkua voidaan merkitä $v_1 \leadsto v_n$. +\item Jos verkko $G$ on suunnattu verkko, $a, b \in V$ ja kaikki polun $a \leadsto b$ kaaret kulkevat kaarien suuntien mukaisesti, voidaan täsmentää, että polku $a \leadsto b$ on \emph{suunnattu polku}. +\item Polku on \emph{yksinkertainen}, jos kukin solmu esiintyy polussa vain kerran, paitsi että viimeinen ja ensimmäinen saavat olla sama solmu. \cite{tira} +\item Yksinkertainen polku on \emph{sykli} eli \emph{kehä}, jos viimeinen ja ensimmäinen solmu ovat samat. \cite{tira} %Suuntaamattomassa verkossa lisäksi vaaditaan, että syklissä pitää olla vähintään kolme solmua. +\end{enumerate} \end{maar} -\noindent Huomataan, että esimerkkinä käytetyssä verkossa $H$ on useita polkuja solmusta $R$ solmuun $Y$. Polku $R \rightarrow T \rightarrow Y$ on suunnattu polku ja $R \leftarrow X \rightarrow Y$ on tavallinen polku, sillä solmujen $R$ ja $X$ välillä kuljetaan nuolen suunnan vastaisesti. +\smallskip + +\noindent Huomataan, että verkossa $H$ on useita polkuja solmusta $R$ solmuun $Y$. Polku $R \rightarrow T \rightarrow Y$ on ainut suunnattu polku ja $R \leftarrow X \rightarrow Y$ on tavallinen polku, sillä solmujen $R$ ja $X$ välillä kuljetaan nuolen suunnan vastaisesti. Verkossa ei ole yhtään sykliä eli se on \emph{syklitön}. Suunnatuista ja syklittömistä verkoista voidaan käyttää englannin kielestä juontuvaa lyhennettä DAG \emph{(directed acyclic graph)} \cite{tira}. -\begin{maar} \label{sukulaisuus} +\smallskip + +\begin{maar}[Jälkeläisyys] \label{sukulaisuus} Oletetaan, että $G=(V, E)$ on suunnattu verkko ja $a, b \in V$. \\ @@ -349,114 +351,168 @@ Oletetaan, että $G=(V, E)$ on suunnattu verkko ja $a, b \in V$. \\ \noindent Esimerkiksi kuvan \ref{esverkko} verkossa solmulla $Y$ ei ole jälkeläisiä ja solmun $Z$ jälkeläiset ovat kaikki muut verkon solmut poislukien se itse, eli solmun $Z$ jälkeläiset on joukko $V \setminus \{Z\}$. +Kausaalipäättelyssä kausaalisten vaikutusten identifiomiseksi tarvitaan usein selvittää niin sanotut \emph{haarukka-} ja \emph{käänteiset haarukkasolmut}. Määritellään ne seuraavaksi. + +\begin{maar}[Haarukkasolmu] \label{haarukka} + +Oletetaan, että suunnatussa verkossa on polku $A \leftarrow B \rightarrow C \leftarrow D$. Tällöin solmua B sanotaan \emph{haarukkasolmuksi} ja solmua C \emph{käänteiseksi haarukkasolmuksi}. + +\end{maar} + %%%%%%%%% \section{Kausaalipäättely}\label{kausaali} -Erityisesti \cite{pearl10}. Esittele merkunnät, määritelmät ja mallli. Käännökset Miksi-kirjaa mukaillen? +Judea Pearl esittää artikkelissaan \cite{pearl10}, että kaikessa tutkimuksessa, joka hyödyntää kausaalipäättelyä, tulisi edetä järjestelmällisesti neljässä vaiheessa: + +\begin{enumerate} -\subsection{Johdanto?}\label{kausaalijohd} +\item Määrittely: Määritetään tavoitesuuruus Q funktiona Q($\M$), joka voidaan laskea kaikille malleille $\M$. +\item Oletuksien esitys: Esitä kausaaliset oletukset luonnollisella kielellä ja ilmaise niiden rakenteellinen osa verkkona. +\item Identifioituvuus: Osoita, onko tavoitesuuruus määritettävissä (ilmaistavissa estimoitavina parametreina). +\item Estimointi: Estimoi tavoitesuuruutta, jos se on identifioituva tai approksimoi sitä jos se ei ole. Tarkista mallin mahdolliset (tilastolliset) oletukset ja implikaatiot ja muuta mallia, jos oletukset osoittautuvat paikkaansa pitämättömiksi. -Kausaalipäättelyssä mallit määritellään usein yksinkertaisina suunnattuina verkkoina. Mallin määrittämästä verkosta voidaan suoraan lukea kausaaliset riippuvuussuhteet ja malliin kuuluvat muuttujat. Jos mallissa on solmut $A$ ja $B$ ja jos solmu $B$ on solmun $A$ jälkeläinen, niin muuttujalla $A$ on mallin mukaan jonkinlainen kausaalinen vaikutus muuttujaan $B$. Jos verkossa muuttujien välillä ei ole jälkeläisyyssuhdetta, niin ne ovat toisistaan riipumattomat. Kausalisen vaikutuksen funktionaalista muotoa ei usein määritellä. +\end{enumerate} + +\noindent Tutkielmani tavoitteena on esittää algoritmi, jolla voimme paremmin ennustaa riskiä populaatiotasolla, kun muutamme myönteisten päätösten osuutta jakun käytössä on valintaharhasta kärsivää aineistoa. Todennäköisyyslausekkein ilmaistuna haluamme siis selvittää vapautusprosentin muutoksen vaikutusta epätoivottavan tapahtuman $Y=0$ todennäköisyyteen, mikä voidaan kirjoittaa muotoon +\begin{equation} \label{q_m} +\pr(Y=0 | \text{do}(R=r)). +\end{equation} -* Usein funktionaalista muotoa ei määritellä,, lisää tähän ne nuoliversiot yhtälöistä havainnollistamaan, että siirrytään yhtäsuuruudesta määräytymiseen \cite{kalisch14} +\noindent Huomataan, että lauseke \ref{q_m} ei riipu mistään mallista $\M$, joten se täyttää Pearlin tavoitesuuruuden Q määritelmän mukaiset ehdot. -\subsection{Merkinnät}\label{kausaalimerk} +Kausaalipäättelyssä mallit määritellään usein yksinkertaisina suunnattuina verkkoina. Mallin määrittämästä verkosta voidaan suoraan lukea kausaaliset riippuvuussuhteet ja malliin kuuluvat muuttujat. Jos mallissa on solmut $A$ ja $B$ ja jos solmu $B$ on solmun $A$ jälkeläinen, niin muuttujalla $A$ on mallin mukaan jonkinlainen kausaalinen vaikutus muuttujaan $B$. Jos verkossa muuttujien välillä ei ole jälkeläisyyssuhdetta, niin ne ovat toisistaan riipumattomat. Kausaalisen vaikutuksen funktionaalista muotoa ei usein määritellä. -Kausaalipäättelyssä käytettävät merkinnät noudattelevat pitkälle tavallisia todennäköisyyslaskennan merkintöjä. Kun selvitetään muuttujan $X$ vaikutusta muuttujaan $Y$ ja tehdään interventio asettamalla muuttuja $X$ arvoon $x_0$, sitä merkitään $\pr(Y| \text{do} (X=x_0))$. +\subsection{Merkinnät ja keskeiset lauseet}\label{kausaalimerk_laus} -\subsection{Määritelmät}\label{kausaalimäär} +Kausaalipäättelyssä käytettävät merkinnät noudattelevat pitkälle tavallisia todennäköisyyslaskennan merkintöjä. Kun selvitetään muuttujan $X$ vaikutusta muuttujaan $Y$ ja tehdään interventio asettamalla muuttuja $X$ arvoon $x_0$, sitä merkitään $\pr(Y| \text{do} (X=x_0))$. -\begin{maar}[Takaovikriteeri, \emph{back-door criterion}]\label{d_sep} +Käydään seuraavaksi läpi kausaalilaskennan kannalta keskeisimmät lauseet. Lauseiden todistukset sivuutetaan, mutta ne on löydettävissä Pearlin artikkelin lähteistä \cite{pearl10}. Määritelmät \ref{d_sep} ja \ref{takaovi} \textbf{JNE}. -Joukko $\s$ sulkee / katkaisee (blocks) polun $p$, jos vähintään toinen seuraavista ehdoista on voimassa: +\begin{maar}[d-separoituvuus \cite{pearl10}]\label{d_sep} + +Joukko $\s$ katkaisee (blocks) polun $p$, jos vähintään toinen seuraavista ehdoista on voimassa: \begin{enumerate}[(a)] -\item Polku $p$ sisältää vähintään yhden solmun, joka on jonkin kaaren lähtösolmu ja kuuluu joukkoon $\s$. (arrow-emitting) +\item Polku $p$ sisältää vähintään yhden solmun, joka on jonkin polun kulkusuuntaisen kaaren lähtösolmu ja kuuluu joukkoon $\s$. (arrow-emitting) \item Polku $p$ sisältää vähintään yhden käänteisen haarukkasolmun (collision node), joka ei kuulu joukkoon $\s$ ja jolla ei ole jälkeläisiä joukossa $\s$. \end{enumerate} +\noindent Jos joukko $\s$ katkaisee kaikki polut muuttujasta $X$ muuttujaan $Y$, sanotaan joukon $\s$ d-separoivan muuttujat $X$ ja $Y$. Tällöin $X$ ja $Y$ ovat riippumattomia ehdolla $\s$, eli $X \independent Y | \s$. + \end{maar} -\begin{maar}\label{adjustment} -Oletetaan, että halutaan selvittää (satunnais)muuttujan X kausaalista vaikutusta muuttujaan Y. Joukko $\s$ on \emph{riittävä} tasoitukseen (adjustment), kun seuraavat ehdot ovat voimassa: \textbf{sufficifient to adjusment = identifioituva?} + +\begin{maar}[Takaovikriteeri (\emph{back-door criterion}) \cite{pearl10}] \label{takaovi} + +Oletetaan, että halutaan selvittää muuttujan X kausaalista vaikutusta muuttujaan Y. Joukko $\s$ on \emph{riittävä} vaikutuksen selvittämiseen (sufficient for adjustment), kun seuraavat ehdot ovat voimassa: \begin{enumerate}[(1)] \item Yksikään joukon $\s$ alkioista ei ole solmun X jälkeläinen. -\item Joukon $\s$ alkiot katkaisevat kaikki märitelmän \ref{d_sep} mukaiset polut / ''takaovireitit'' solmusta X solmuun Y. +\item Joukon $\s$ alkiot katkaisevat kaikki määritelmän \ref{d_sep} mukaiset polut solmusta X solmuun Y. \end{enumerate} \end{maar} + + \subsection{Malli}\label{kausaalimalli} -Mallimme esitellään alla. Mallissamm +Malli sisältää viisi muuttujaa, jotka on esitelty lyhyesti taulukossa \ref{syntmjat}. Muuttujalla $R$ kuvataan päätöksentekijän hyväksymisprosenttia, eli sitä prosentuaalista osuutta henkilöistä, joilla on pienin vaara epätoivottavaan tulokseen ja joille siten voidaan antaa myönteinen päätös. $X$ ilmentää henkilön henkilökohtaisia ominaisuuksia, jotka ovat sekä päätöksentekijän että mallin havaittavissa. Muuttuja $X$ voi olla esimerkiksi jonkinlainen rekisteritieto, kuten ikä tai sukupuoli. Muuttuja $Z$ on muuttuja, jonka tuomari tai muu asiantuntija voi havaita, mutta joka on mallilta piilotettu. Muuttujan $Z$ voidaan ajatella esimerkiksi oikeuskäsittelyjen tapauksessa kuvaavan epäillyn kääytöstä oikeussalissa. Tulosmuuttuja $Y$ ja päätösmuuttuja $T$ ovat kaksiarvoisia ja niiden määrittelyt on esitelty kuvassa \ref{valikoitumisharha}: myönteistä päätöstä merkitään $t=1$, kielteistä $t=0$. Vastaavasti myönteinen tulos määritellään muuttujan $y$ arvoksi 1, kielteinen arvoksi 0. +Mallin määrittelevä graafi on estetty kuviossa \ref{final_model} ilman virhemuuttujia. Graafista voidaan suoraan lukea oletukset: oletetaan, että $Z \independent X, R$ mutta laajennetaan Lakkarajun oletuksia sallimalla muuttujan X vaikutus muuttujaan R \cite{lakkaraju17}. Mallin oletetuilla kausaalisilla vaikutuksilla on lisäksi selkeästi ilmaistavat realisaatiot: kuinka osuuden $R$ muuttaminen vaikuttaa päätökseen ja edelleen päätös tulokseen ja niin edelleen. -\begin{table}[h!] +\begin{table} %[H] \centering \begin{tabular}{rl} \hline \hline Muuttuja & Kuvaus \\ \hline - R & Vapautusprosentti, vapautumiskynnys \\ - X & Henkilökohtaiset muuttujat, kirjalliset \\ - Z & Henkilökohtaiset muuttujat, tuomarin havaiitsemat\\ - W & Henkilökohtaiset muuttujat, havaitsemattomat, \emph{kohtalo}\\ - Y & Uusinta, $Y=0$ uusi, 1 niin ei uusinut\\ - T & 0 on jail, 1 on bail\\ + R & Myönteisten päätösten osuus prosentteina $r \in [0, 1]$ \\ + X & Kirjatut muuttujat, havaittavissa kaikille \\ + Z & Kirjaamattomat muuttujat, vain päättäjän havaitsemat\\ + Y & Tulosmuuttuja, $y \in \{0, 1\}$\\ + T & Päätösmuuttuja, $t \in \{0, 1\}$\\ \hline \hline \end{tabular} -\caption{Mallin muuttjienn selitteet} +\caption{Mallin muuttujien selitteet} \label{syntmjat} \end{table} -\begin{figure}[H] +\begin{figure}% [H] \centering \begin{subfigure}[b]{0.4\textwidth} \includegraphics[width=\textwidth]{final_model} - \caption{lopullinen malli} + \caption{Malli ilman interventiota.} \label{final_model} - \end{subfigure} + \end{subfigure} ~ %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. %(or a blank line to force the subfigure onto a new line) \begin{subfigure}[b]{0.5\textwidth} \includegraphics[width=\textwidth]{intervention_model} - \caption{interventio} + \caption{Malli, johon interventio on merkitty.} \label{intervention_model} \end{subfigure} ~ %add desired spacing between images, e. g. ~, \quad, \qquad, \hfill etc. %(or a blank line to force the subfigure onto a new line) - \caption{Kasuaalimallit graafina}\label{mallikuvat} + \caption{Kausaalimallit graafeina.}\label{mallikuvat} \end{figure} +Johdetaan muuttujan $R$ kausaalivaikutus muuttujaan $Y$ yli kaikkien ositteiden X. Huomataan, että osuuden $R$ kausaalinen vaikutus voidaan ilmaista suoraan lausekkeella \ref{q_m}, sillä $\pr(Y=0|\text{do}(R=0))=0$ ja siten edelleen +\begin{equation*} + \pr(Y=0|\text{do}(R=r))-\pr(Y=0|\text{do}(R=0)) \\ +% =\: \pr(Y=0|\text{do}(R=r))-0 \\ + =\: \pr(Y=0|\text{do}(R=r)). +\end{equation*} + +Osoitetaan seuraavaksi, että X on riittävä vaikutusten korjaamiseen määritelmän \ref{takaovi} mukaisesti, kun selvitetään muuttujan R kausaalista vaikutusta muuttujaan Y. Mallista voidaan suoraan lukea, että takaovikriteerin ensimmäinen ehto on voimassa: X ei ole muuttujan R jälkeläinen. Polut, jotka muuttujan X pitää katkaista ollakseen riittävä vaikutusten korjaamiseen ovat $R \leftarrow X \rightarrow Y$, $R \leftarrow X \rightarrow T \rightarrow Y$ ja $R \leftarrow X \rightarrow T \leftarrow Z \rightarrow Y$. Muuttuja X täyttää kuitenkin määritelmän \ref{d_sep} (a)-kohdan ehdon ja siten d-separoi muuttujat R ja Y. Tällöin X on riittävä vaikutusten korjaamiseen ja voidaan hyödyntää Pearlin kaavaa 25 \cite{pearl10}: +\begin{subequations} \label{derivation} +\begin{align} + \pr&(Y=0|\text{do}(R=r)) = \sum_x \pr(Y=0| R=r, X=x) \pr(X=x) \label{derivation1} \\ + &= \sum_x \left( \sum_t \pr(Y=0, T=t| R=r, X=x) \right) \pr(X=x) \label{derivation2} \\ + &= \sum_x \left( \sum_t \pr(Y=0| T=t, R=r, X=x)\pr(T=t| R=r, X=x) \right) \pr(X=x) \label{derivation3} \\ + &= \sum_x \pr(Y=0| T=1, R=r, X=x) \pr(T=1| R=r, X=x) \pr(X=x) \label{derivation4} \\ + &= \sum_x \pr(Y=0| T=1, X=x) \pr(T=1| R=r, X=x) \pr(X=x) \label{derivation5} +\end{align} +\end{subequations} -\begin{algorithm} % enter the algorithm environment -\caption{Kausaalialgoritmi} % give the algorithm a caption -\label{causal_alg} % and a label for \ref{} commands later in the document -\begin{algorithmic}[1] % enter the algorithmic environment -\REQUIRE aineisto $(\mathbf{x}, t, y) \in \D_t, \D_v$ ja hyväksymisaste $r \in [0, 1]$, missä $\D_t$ on testiaineisto ja $\D_v$ validointiaineisto. -\ENSURE $\pr(Y=0|\text{do}(R=r))$ - -\STATE Määritä $f(x) = \pr(X=x)$ testiaineistosta. -\STATE Ennusta vastetta $Y$ selittävillä muuttujilla $X$ käyttäen harjoitusaineiston havaintoja, joilla $T=1$. -\STATE Määritä harjoitusaineiston jokaiselle havainnolle $P(Y=0|X=x)$ käyttäen yllä olevaa mallia. -\STATE Järjestä havainnot nousevaan järjestykeen edellisen kohdan todennäköisyyksien mukaan. -\STATE Alusta muuttuja \texttt{summa} = 0. -\FORALL{Jokaiselle parametriavaruuden pisteelle} - \STATE $p_x \leftarrow P(X=x)$ - \STATE $\mathcal{D_x} \leftarrow \{\mathcal{D} | X = x\}$ - \STATE Assign first $r\cdot 100\%$ observations from $\mathcal{D_x}$ to $\mathcal{D}_{rx}$ - \STATE $p_t \leftarrow \dfrac{|\{\mathcal{D}_{rx}|T=1\}|}{|\mathcal{D}_{rx}|}$ - \STATE $\mathcal{D}_{tx} \leftarrow \{\mathcal{D}_x | T = 1\}$ - \STATE $p_y \leftarrow \dfrac{|\{\mathcal{D}_{tx}|Y=0\}|}{|\mathcal{D}_{tx}|}$ - \STATE Lisää muuttujaan \texttt{summa} tulo $p_y \cdot p_t \cdot p_x$ -\ENDFOR -\RETURN \texttt{summa} -\end{algorithmic} -\end{algorithm} +Yllä oleva lauseke on yhtäpitävä myös jatkuville muuttujan $x$ arvoille, kun korvaamme summaukset integraalilla parametriavaruuden yli: $$\pr(Y=0|\text{do}(R=r)) = \int_x \pr(Y=0| T=1, X=x) \pr(T=1| R=r, X=x) \pr(X=x).$$ + +\subsection{algo} + + +Pearlin mukaan: + +$$P(Y=0|do(R=r), X=x)=P(Y=0|R=r, X=x)=P(Y=0|R=r, X=x, T=1)P(T=1|R=r, X=x)$$ + +Mallit vaikutukset laskettiin Pythonilla versio 3.6. Syötteett sklinear mallliin , joka fitattiin testi dataan ja sitten integroitiin eri leniencyn tasoilla muuttujan X parametriavaruuden eli reaaliakselin ylitse. + +%\begin{algorithm} % enter the algorithm environment +%\caption{Kausaalialgoritmi} % give the algorithm a caption +%\label{causal_alg} % and a label for \ref{} commands later in the document +%\begin{algorithmic}[1] % enter the algorithmic environment +%\REQUIRE aineisto $(\mathbf{x}, t, y) \in \D_t, \D_v$ ja hyväksymisaste $r \in [0, 1]$, missä $\D_t$ on testiaineisto ja $\D_v$ validointiaineisto. +%\ENSURE $\pr(Y=0|\text{do}(R=r))$ +% +%\STATE Määritä $f(x) = \pr(X=x)$ testiaineistosta. +%\STATE Ennusta vastetta $Y$ selittävillä muuttujilla $X$ käyttäen harjoitusaineiston havaintoja, joilla $T=1$. +%\STATE Määritä harjoitusaineiston jokaiselle havainnolle $P(Y=0|X=x)$ käyttäen yllä olevaa mallia. +%\STATE Järjestä havainnot nousevaan järjestykeen edellisen kohdan todennäköisyyksien mukaan. +%\STATE Alusta muuttuja \texttt{summa} = 0. +%\FORALL{Jokaiselle parametriavaruuden pisteelle} +% \STATE $p_x \leftarrow P(X=x)$ +% \STATE $\mathcal{D_x} \leftarrow \{\mathcal{D} | X = x\}$ +% \STATE Assign first $r\cdot 100\%$ observations from $\mathcal{D_x}$ to $\mathcal{D}_{rx}$ +% \STATE $p_t \leftarrow \dfrac{|\{\mathcal{D}_{rx}|T=1\}|}{|\mathcal{D}_{rx}|}$ +% \STATE $\mathcal{D}_{tx} \leftarrow \{\mathcal{D}_x | T = 1\}$ +% \STATE $p_y \leftarrow \dfrac{|\{\mathcal{D}_{tx}|Y=0\}|}{|\mathcal{D}_{tx}|}$ +% \STATE Lisää muuttujaan \texttt{summa} tulo $p_y \cdot p_t \cdot p_x$ +%\ENDFOR +%\RETURN \texttt{summa} +%\end{algorithmic} +%\end{algorithm} %%%%%%%%% %%%%%%%%% @@ -464,15 +520,20 @@ Muuttuja & Kuvaus \\ \chapter{Tulokset}\label{tulokset} -%%%%%%%%% +- se pääkuvaaja vertailuineen -\section{Synteettinen}\label{synttulokset} +- beta ztan vaikutus? -%%%%%%%%% - -\section{Compas}\label{compastulokset} +- erilaiset mallit ja koko käyrä aina 1 asti -> kuinka meillä parempi +- voidaanko antaa estimaateille mitään luottusvälejä tjsp? +\begin{figure}[H] +\centering +\includegraphics[width = \textwidth]{tulos_kuva_placeholder_en} +\caption{Tulokset kuvana} +\label{tuloskuva} +\end{figure} %%%%%%%%% %%%%%%%%% @@ -480,20 +541,26 @@ Muuttuja & Kuvaus \\ \chapter{Diskussio}\label{diskussio} +- Jatkosuunnitelmat: tutkitaan beta zetan vaikutusta tuloksiin, kuinka hyvin estimoituu. Sovelletaan oikeaan data settiiin. Mielenkiintoiseksi on osoittautunut propublica julkaisun artikkelissa machine bias käyttämä COMPAS-aineisto. + +- Ongelmat / muut huomiot: Tällä aikataululla en ole tehnyt mallin validointeja: onko kausaaliset pathwayt reasonable. Malli itsessään on suhteellisen yksinkertainen joten (KÄSIENHEILUTTELU) on jokseenkin luultavaa, että sinällään mallin spesifionnissa tuskin on mitään virheitä. Voitaisiin ehkä tietenkin koostaa jokseenkin hienosyisempi malli (erilaiset rikoshistoria yms erikseeen) ja jotain. Jvat muuttujat? P-uloitteinen parametriavaruus??? +- Mallin validointi epäeettistä, koska vaatisi huonoja päätöksiä > meillä kyllä synteettinen? -\begin{verbatim} -# R-koodi, tulos sama -library(igraph) -library(causaleffect) -# simplify = FALSE to allow multiple edges -g <- graph.formula(X -+ R, X -+ D, X -+ Y, R -+ D , D -+ Y, D -+ Y, Y -+ D, simplify = FALSE) -# Here the bidirected edge between X and Z is set to be unobserved in graph g -# This is denoted by giving them a description attribute with the value "U"# The edges in question are the fourth and the fifth edge -g <- set.edge.attribute(graph = g, name = "description", index = c(6,7), value = "U") +- Implikaatiot: parempia malleja??? -res <- causal.effect("Y", "R", G = g) -\end{verbatim} +%\begin{verbatim} +%# R-koodi, tulos sama +%library(igraph) +%library(causaleffect) +%# simplify = FALSE to allow multiple edges +%g <- graph.formula(X -+ R, X -+ D, X -+ Y, R -+ D , D -+ Y, D -+ Y, Y -+ D, simplify = FALSE) +%# Here the bidirected edge between X and Z is set to be unobserved in graph g +%# This is denoted by giving them a description attribute with the value "U"# The edges in question are the fourth and the fifth edge +%g <- set.edge.attribute(graph = g, name = "description", index = c(6,7), value = "U") +% +%res <- causal.effect("Y", "R", G = g) +%\end{verbatim} %%%%%%%%% diff --git a/analysis_and_scripts/Analysis_07MAY2019_new.ipynb b/analysis_and_scripts/Analysis_07MAY2019_new.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..aabbd42e4084f328c8bf084ed8a4a4918d52d1b0 --- /dev/null +++ b/analysis_and_scripts/Analysis_07MAY2019_new.ipynb @@ -0,0 +1,645 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "toc": true + }, + "source": [ + "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n", + "<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Causal-model\" data-toc-modified-id=\"Causal-model-1\"><span class=\"toc-item-num\">1 </span>Causal model</a></span><ul class=\"toc-item\"><li><span><a href=\"#Notes\" data-toc-modified-id=\"Notes-1.1\"><span class=\"toc-item-num\">1.1 </span>Notes</a></span></li></ul></li><li><span><a href=\"#Synthetic-data\" data-toc-modified-id=\"Synthetic-data-2\"><span class=\"toc-item-num\">2 </span>Synthetic data</a></span></li><li><span><a href=\"#Algorithms\" data-toc-modified-id=\"Algorithms-3\"><span class=\"toc-item-num\">3 </span>Algorithms</a></span><ul class=\"toc-item\"><li><span><a href=\"#Contraction-algorithm\" data-toc-modified-id=\"Contraction-algorithm-3.1\"><span class=\"toc-item-num\">3.1 </span>Contraction algorithm</a></span></li><li><span><a href=\"#Causal-algorithm\" data-toc-modified-id=\"Causal-algorithm-3.2\"><span class=\"toc-item-num\">3.2 </span>Causal algorithm</a></span></li></ul></li><li><span><a href=\"#Performance-comparison\" data-toc-modified-id=\"Performance-comparison-4\"><span class=\"toc-item-num\">4 </span>Performance comparison</a></span><ul class=\"toc-item\"><li><span><a href=\"#Predictive-models\" data-toc-modified-id=\"Predictive-models-4.1\"><span class=\"toc-item-num\">4.1 </span>Predictive models</a></span></li><li><span><a href=\"#Visual-comparison\" data-toc-modified-id=\"Visual-comparison-4.2\"><span class=\"toc-item-num\">4.2 </span>Visual comparison</a></span></li></ul></li></ul></div>" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Causal model\n", + "\n", + "Our model is defined by the probabilistic expression \n", + "\n", + "\\begin{equation}\\label{model_disc}\n", + "P(Y=0 | \\text{do}(R=r)) = \\sum_x \\underbrace{P(Y=0|X=x, T=1)}_\\text{1} \n", + "\\overbrace{P(T=1|R=r, X=x)}^\\text{2} \n", + "\\underbrace{P(X=x)}_\\text{3}\n", + "\\end{equation}\n", + "\n", + "which is equal to \n", + "\n", + "\\begin{equation}\\label{model_cont}\n", + "P(Y=0 | \\text{do}(R=r)) = \\int_x P(Y=0|X=x, T=1)P(T=1|R=r, X=x)P(X=x)\n", + "\\end{equation}\n", + "\n", + "for continuous $x$. Model as a graph (Z is a latent variable, and can be excluded from the expression with do-calculus by showing that $X$ is admissible for adjustment):\n", + "\n", + "<!---  --->\n", + "\n", + "For predicting the probability of negative outcome the following should hold because by Pearl $P(Y=0 | \\text{do}(R=r), X=x) = P(Y=0 | R=r, X=x)$ when $X$ is an admissible set:\n", + "\n", + "\\begin{equation} \\label{model_pred}\n", + "P(Y=0 | \\text{do}(R=r), X=x) = P(Y=0|X=x, T=1)P(T=1|R=r, X=x).\n", + "\\end{equation}\n", + "\n", + "Still it should be noted that this prediction takes into account the probability of the individual to be given a positive decision ($T=1$), see second term in \\ref{model_pred}.\n", + "\n", + "----\n", + "\n", + "### Notes\n", + "\n", + "* Equations \\ref{model_disc} and \\ref{model_cont} describe the whole causal effect in the population (the causal effect of changing $r$ over all strata $X$).\n", + "* Prediction should be possible with \\ref{model_pred}. Both terms can be learned from the data. NB: the probability $P(Y=0 | \\text{do}(R=r), X=x)$ is lowest when the individual $x$ is the most dangerous or the least dangerous. How could we infer/predict the counterfactual \"what is the probability of $Y=0$ if we were to let this individual go?\" has yet to be calculated.\n", + "* Is the effect of R learned/estimated correctly if it is just plugged in to a predictive model (e.g. logistic regression)?\n", + "* $P(Y=0 | do(R=0)) = 0$ only in this application. My predictive models say that when $r=0$ the probability $P(Y=0) \\approx 0.027$ which would be a natural estimate in another application/scenario (e.g. in medicine the probability of an adverse event when a stronger medicine is distributed to everyone. Then the probability will be close to zero but not exactly zero.)" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [], + "source": [ + "# Imports\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "from datetime import datetime\n", + "import matplotlib.pyplot as plt\n", + "import scipy.stats as scs\n", + "import scipy.integrate as si\n", + "import seaborn as sns\n", + "import numpy.random as npr\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "# Settings\n", + "\n", + "%matplotlib inline\n", + "\n", + "plt.rcParams.update({'font.size': 16})\n", + "plt.rcParams.update({'figure.figsize': (14, 7)})\n", + "\n", + "# Suppress deprecation warnings.\n", + "\n", + "import warnings\n", + "\n", + "def fxn():\n", + " warnings.warn(\"deprecated\", DeprecationWarning)\n", + "\n", + "with warnings.catch_warnings():\n", + " warnings.simplefilter(\"ignore\")\n", + " fxn()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Synthetic data\n", + "\n", + "In the chunk below, we generate the synthetic data as described by Lakkaraju et al. The default values and definitions of $Y$ and $T$ values follow their description.\n", + "\n", + "**Parameters**\n", + "\n", + "* M = `nJudges_M`, number of judges\n", + "* N = `nSubjects_N`, number of subjects assigned to each judge\n", + "* betas $\\beta_i$ = `beta_i`, where $i \\in \\{X, Z, W\\}$ are coefficients for the respected variables\n", + "\n", + "**Columns of the data:**\n", + "\n", + "* `judgeID_J` = judge IDs as running numbering from 0 to `nJudges_M - 1`\n", + "* R = `acceptanceRate_R`, acceptance rates\n", + "* X = `X`, invidual's features observable to all (models and judges)\n", + "* Z = `Z`, information observable for judges only\n", + "* W = `W`, unobservable / inaccessible information\n", + "* T = `decision_T`, bail-or-jail decisions where $T=0$ represents jail decision and $T=1$ bail decision.\n", + "* Y = `result_Y`, result variable, if $Y=0$ person will or would recidivate and if $Y=1$ person will or would not commit a crime." + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [], + "source": [ + "# Set seed for reproducibility\n", + "#npr.seed(0)\n", + "\n", + "def generateData(nJudges_M=100,\n", + " nSubjects_N=500,\n", + " beta_X=1.0,\n", + " beta_Z=1.0,\n", + " beta_W=0.2):\n", + "\n", + " # Assign judge IDs as running numbering from 0 to nJudges_M - 1\n", + " judgeID_J = np.repeat(np.arange(0, nJudges_M, dtype=np.int32), nSubjects_N)\n", + "\n", + " # Sample acceptance rates uniformly from a closed interval\n", + " # from 0.1 to 0.9 and round to tenth decimal place.\n", + " acceptance_rates = np.round(npr.uniform(.1, .9, nJudges_M), 10)\n", + "\n", + " # Replicate the rates so they can be attached to the corresponding judge ID.\n", + " acceptanceRate_R = np.repeat(acceptance_rates, nSubjects_N)\n", + "\n", + " # Sample the variables from standard Gaussian distributions.\n", + " X = npr.normal(size=nJudges_M * nSubjects_N)\n", + " Z = npr.normal(size=nJudges_M * nSubjects_N)\n", + " W = npr.normal(size=nJudges_M * nSubjects_N)\n", + "\n", + " probabilities_Y = 1 / (1 + np.exp(-(beta_X * X + beta_Z * Z + beta_W * W)))\n", + "\n", + " # 0 if P(Y = 0| X = x; Z = z; W = w) >= 0.5 , 1 otherwise\n", + " result_Y = 1 - probabilities_Y.round()\n", + " \n", + " # For the conditional probabilities of T we add noise ~ N(0, 0.1)\n", + " probabilities_T = 1 / (1 + np.exp(-(beta_X * X + beta_Z * Z)))\n", + " probabilities_T += npr.normal(0, np.sqrt(0.1), nJudges_M * nSubjects_N)\n", + "\n", + " # Initialize decision values as 1\n", + " decision_T = np.ones(nJudges_M * nSubjects_N)\n", + "\n", + " # Initialize the dataframe\n", + " df_init = pd.DataFrame(np.column_stack(\n", + " (judgeID_J, acceptanceRate_R, X, Z, W, result_Y, probabilities_T,\n", + " decision_T)),\n", + " columns=[\n", + " \"judgeID_J\", \"acceptanceRate_R\", \"X\", \"Z\", \"W\",\n", + " \"result_Y\", \"probabilities_T\", \"decision_T\"\n", + " ])\n", + "\n", + " # Sort by judges then probabilities\n", + " data = df_init.sort_values(by=[\"judgeID_J\", \"probabilities_T\"],\n", + " ascending=False)\n", + "\n", + " # Iterate over the data. Subject is in the top (1-r)*100% if\n", + " # his within-judge-index is over acceptance threshold times\n", + " # the number of subjects assigned to each judge. If subject\n", + " # is over the limit they are assigned a zero, else one.\n", + " data.reset_index(drop=True, inplace=True)\n", + "\n", + " data['decision_T'] = np.where(\n", + " (data.index.values % nSubjects_N) <\n", + " ((1 - data['acceptanceRate_R']) * nSubjects_N), 0, 1)\n", + "\n", + " return data\n", + "\n", + "\n", + "df = generateData()" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(25000, 8)\n", + "(25000, 8)\n", + "(25000, 8)\n", + "(25000, 8)\n" + ] + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th>decision_T</th>\n", + " <th>1</th>\n", + " </tr>\n", + " <tr>\n", + " <th>result_Y</th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0.0</th>\n", + " <td>3911</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1.0</th>\n", + " <td>8759</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + "decision_T 1\n", + "result_Y \n", + "0.0 3911\n", + "1.0 8759" + ] + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Split the data set to test and train\n", + "from sklearn.model_selection import train_test_split\n", + "train, test = train_test_split(df, test_size=0.5, random_state=0)\n", + "\n", + "print(train.shape)\n", + "print(test.shape)\n", + "\n", + "train_labeled = train.copy()\n", + "test_labeled = test.copy()\n", + "\n", + "# Set results as NA if decision is negative.\n", + "train_labeled.result_Y = np.where(train.decision_T == 0, np.nan, train.result_Y)\n", + "test_labeled.result_Y = np.where(test.decision_T == 0, np.nan, test.result_Y)\n", + "\n", + "print(train_labeled.shape)\n", + "print(test_labeled.shape)\n", + "\n", + "tab = train_labeled.groupby(['result_Y', 'decision_T']).size()\n", + "tab.unstack()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Algorithms\n", + "\n", + "### Contraction algorithm\n", + "\n", + "Below is an implementation of Lakkaraju's team's algorithm presented in [their paper](https://helka.finna.fi/PrimoRecord/pci.acm3098066). Relevant parameters to be passed to the function are presented in the description." + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [], + "source": [ + "def contraction(df,\n", + " judgeIDJ_col,\n", + " decisionT_col,\n", + " resultY_col,\n", + " modelProbS_col,\n", + " accRateR_col,\n", + " r,\n", + " binning=False):\n", + " '''\n", + " This is an implementation of the algorithm presented by Lakkaraju\n", + " et al. in their paper \"The Selective Labels Problem: Evaluating \n", + " Algorithmic Predictions in the Presence of Unobservables\" (2017).\n", + " \n", + " Parameters:\n", + " df = The (Pandas) data frame containing the data, judge decisions,\n", + " judge IDs, results and probability scores.\n", + " judgeIDJ_col = String, the name of the column containing the judges' IDs\n", + " in df.\n", + " decisionT_col = String, the name of the column containing the judges' decisions\n", + " resultY_col = String, the name of the column containing the realization\n", + " modelProbS_col = String, the name of the column containing the probability\n", + " scores from the black-box model B.\n", + " accRateR_col = String, the name of the column containing the judges' \n", + " acceptance rates\n", + " r = Float between 0 and 1, the given acceptance rate.\n", + " binning = Boolean, should judges with same acceptance rate be binned\n", + " \n", + " Returns:\n", + " u = The estimated failure rate at acceptance rate r.\n", + " '''\n", + " # Sort first by acceptance rate and judge ID.\n", + " sorted_df = df.sort_values(by=[accRateR_col, judgeIDJ_col],\n", + " ascending=False)\n", + "\n", + " if binning:\n", + " # Get maximum leniency\n", + " max_leniency = sorted_df[accRateR_col].values[0].round(1)\n", + "\n", + " # Get list of judges that are the most lenient\n", + " most_lenient_list = sorted_df.loc[sorted_df[accRateR_col].round(1) ==\n", + " max_leniency, judgeIDJ_col]\n", + "\n", + " # Subset to obtain D_q\n", + " D_q = sorted_df[sorted_df[judgeIDJ_col].isin(\n", + " most_lenient_list.unique())].copy()\n", + " else:\n", + " # Get most lenient judge\n", + " most_lenient_ID = sorted_df[judgeIDJ_col].values[0]\n", + "\n", + " # Subset\n", + " D_q = sorted_df[sorted_df[judgeIDJ_col] == most_lenient_ID].copy()\n", + "\n", + " # All observations of R_q have observed outcome labels\n", + " R_q = D_q[D_q[decisionT_col] == 1]\n", + "\n", + " # \"Observations deemed as high risk by B are at the top of this list\"\n", + " R_sort_q = R_q.sort_values(by=modelProbS_col, ascending=False)\n", + "\n", + " number_to_remove = int(\n", + " round((1.0 - r) * D_q.shape[0] - (D_q.shape[0] - R_q.shape[0])))\n", + "\n", + " # \"R_B is the list of observations assigned to t = 1 by B\"\n", + " R_B = R_sort_q[number_to_remove:R_sort_q.shape[0]]\n", + "\n", + " return np.sum(R_B[resultY_col] == 0) / D_q.shape[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Causal algorithm\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [], + "source": [ + "def f(x, model, class_value):\n", + " '''\n", + " Parameters:\n", + " x = individual features\n", + " model = a trained sklearn predictive model. Predicts probabilities for given x.\n", + " class_value = the result (class) to predict (usually 0 or 1).\n", + " \n", + " Returns:\n", + " The probabilities (as vector) of class value (class_value) given \n", + " individual features (x) and the trained, predictive model (model).\n", + " '''\n", + " if x.ndim == 1:\n", + " # if x is vector, transform to column matrix.\n", + " f_values = model.predict_proba(np.array(x).reshape(-1, 1))\n", + " else:\n", + " f_values = model.predict_proba(x)\n", + "\n", + " return f_values[:, model.classes_ == class_value].flatten()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Performance comparison\n", + "\n", + "Below we try to replicate the results obtained by Lakkaraju and compare their model's performance to the one of ours.\n", + "\n", + "### Predictive models\n", + "\n", + "Lakkaraju says that they used logistic regression. We construct the models using only *observed observations*, i.e. observations for which labels are available. We then predict the probability of negative outcome for all observations in the test data and attach it to our data set." + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [], + "source": [ + "# instantiate the model (using the default parameters)\n", + "logreg = LogisticRegression(solver='lbfgs')\n", + "\n", + "# fit, reshape X to be of shape (n_samples, n_features)\n", + "logreg = logreg.fit(\n", + " train_labeled.X[train_labeled.decision_T == 1].values.reshape(-1, 1),\n", + " train_labeled.result_Y[train_labeled.decision_T == 1])\n", + "\n", + "# predict probabilities and attach to data\n", + "label_probs_logreg = logreg.predict_proba(test.X.values.reshape(-1, 1))\n", + "\n", + "test = test.assign(B_prob_0_logreg=label_probs_logreg[:, 0])\n", + "test_labeled = test_labeled.assign(B_prob_0_logreg=label_probs_logreg[:, 0])" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [], + "source": [ + "# Train model for predicting the probability of positive decision with a given\n", + "# leniency r and indivual features x.\n", + "\n", + "# Instantiate the model (using the default parameters)\n", + "decision_model = LogisticRegression(solver='lbfgs')\n", + "\n", + "# fit, reshape X to be of shape (n_samples, n_features)\n", + "decision_model = decision_model.fit(train[['X', 'acceptanceRate_R']],\n", + " train.decision_T)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visual comparison\n", + "\n", + "Let's plot the failure rates against the acceptance rates using the difference." + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 1008x576 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "failure_rates = np.zeros((8, 5))\n", + "\n", + "for r in np.arange(1, 9):\n", + " \n", + " #### True evaluation\n", + " # Sort by failure probabilities, subjects with the smallest risk are first. \n", + " df_sorted = test.sort_values(by='B_prob_0_logreg', inplace=False, \n", + " ascending=True)\n", + "\n", + " to_release = int(round(df_sorted.shape[0] * r / 10))\n", + "\n", + " # Failure was coded as zero.\n", + " failure_rates[r - 1, 0] = np.mean(df_sorted.result_Y[0:to_release] == 0)\n", + " \n", + " #### Labeled outcomes only\n", + " # Sort by failure probabilities, subjects with the smallest risk are first. \n", + " df_sorted = test_labeled.sort_values(by='B_prob_0_logreg', inplace=False,\n", + " ascending=True)\n", + " \n", + " # Ensure that only labeled outcomes are available\n", + " df_sorted = df_sorted[df_sorted.decision_T == 1]\n", + " \n", + " to_release = int(round(df_sorted.shape[0] * r / 10))\n", + "\n", + " failure_rates[r - 1, 1] = np.mean(df_sorted.result_Y[0:to_release] == 0)\n", + " \n", + " #### Human error rate\n", + " # Get judges with correct leniency as list\n", + " correct_leniency_list = test_labeled.judgeID_J[\n", + " test_labeled['acceptanceRate_R'].round(1) == r / 10].values\n", + "\n", + " # Released are the people they judged and released, T = 1\n", + " released = test_labeled[test_labeled.judgeID_J.isin(correct_leniency_list)\n", + " & (test_labeled.decision_T == 1)]\n", + "\n", + " # Get their failure rate, aka ratio of reoffenders to number of people judged in total\n", + " failure_rates[r - 1, 2] = np.sum(\n", + " released.result_Y == 0) / correct_leniency_list.shape[0]\n", + " # onko jakaja oikein\n", + " \n", + " #### Contraction, logistic regression\n", + " failure_rates[r - 1, 3] = contraction(\n", + " test_labeled, 'judgeID_J', 'decision_T', 'result_Y', 'B_prob_0_logreg',\n", + " 'acceptanceRate_R', r / 10, False)\n", + "\n", + " #### P(Y=0 | T=1, X=x)*P(T=1 | R=r, X=x)*P(X=x)\n", + " failure_rates[r - 1, 4] = si.quad(lambda x: f(np.array([x]), logreg, 0) * \n", + " f(np.array([[x, r/10]]), decision_model, 1) * \n", + " scs.norm.pdf(x), -np.inf, np.inf)[0]\n", + "\n", + "# Error bars TBA\n", + "\n", + "plt.figure(figsize=(14, 8))\n", + "plt.plot(np.arange(0.1, 0.9, .1), failure_rates[:, 0], label='True Evaluation', c='green')\n", + "plt.plot(np.arange(0.1, 0.9, .1), failure_rates[:, 1], label='Labeled outcomes', c='lime')\n", + "plt.plot(np.arange(0.1, 0.9, .1), failure_rates[:, 2], label='Human evaluation', c='red')\n", + "plt.plot(np.arange(0.1, 0.9, .1), failure_rates[:, 3], label='Contraction, log.', c='blue')\n", + "plt.plot(np.arange(0.1, 0.9, .1), failure_rates[:, 4], label='Causal effect', c='magenta')\n", + "\n", + "plt.title('Failure rate vs. Acceptance rate')\n", + "plt.xlabel('Acceptance rate')\n", + "plt.ylabel('Failure rate')\n", + "plt.legend()\n", + "plt.grid()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.0 (0.018718463137853268, 7.749450073818988e-11)\n", + "1.0 (0.33301477999280144, 6.337618003666896e-09)\n" + ] + } + ], + "source": [ + "# Below are estimates for P(Y=0 | do(R=0)) and P(Y=0 | do(R=1))\n", + "r = 0.0\n", + "print(r, si.quad(lambda x: f(np.array([[x, r]]), decision_model, 1) * \\\n", + " f(np.array([x]), logreg, 0) * scs.norm.pdf(x), -np.inf, np.inf))\n", + "\n", + "r = 1.0\n", + "print(r, si.quad(lambda x: f(np.array([[x, r]]), decision_model, 1) * \\\n", + " f(np.array([x]), logreg, 0) * scs.norm.pdf(x), -np.inf, np.inf))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "So it can be concluded that:\n", + "\n", + "\\begin{equation*}\n", + "P(Y=0 | \\text{do}(R=0)) \\approx 0.018 \\\\\n", + "P(Y=0 | \\text{do}(R=1)) \\approx 0.340 \\\\\n", + "\\end{equation*}" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": true, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": true, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/analysis_and_scripts/Bachelors_thesis_analyses.ipynb b/analysis_and_scripts/Bachelors_thesis_analyses.ipynb deleted file mode 100644 index 39665d65d8330aa188233997c17d551e4b1c4207..0000000000000000000000000000000000000000 --- a/analysis_and_scripts/Bachelors_thesis_analyses.ipynb +++ /dev/null @@ -1,2455 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "toc": true - }, - "source": [ - "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n", - "<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Data-sets\" data-toc-modified-id=\"Data-sets-1\"><span class=\"toc-item-num\">1 </span>Data sets</a></span><ul class=\"toc-item\"><li><span><a href=\"#COMPAS-data\" data-toc-modified-id=\"COMPAS-data-1.1\"><span class=\"toc-item-num\">1.1 </span>COMPAS data</a></span></li><li><span><a href=\"#Synthetic-data\" data-toc-modified-id=\"Synthetic-data-1.2\"><span class=\"toc-item-num\">1.2 </span>Synthetic data</a></span></li></ul></li><li><span><a href=\"#Algorithms\" data-toc-modified-id=\"Algorithms-2\"><span class=\"toc-item-num\">2 </span>Algorithms</a></span><ul class=\"toc-item\"><li><span><a href=\"#Contraction-algorithm\" data-toc-modified-id=\"Contraction-algorithm-2.1\"><span class=\"toc-item-num\">2.1 </span>Contraction algorithm</a></span></li><li><span><a href=\"#Causal-model\" data-toc-modified-id=\"Causal-model-2.2\"><span class=\"toc-item-num\">2.2 </span>Causal model</a></span></li></ul></li><li><span><a href=\"#Performance-comparison\" data-toc-modified-id=\"Performance-comparison-3\"><span class=\"toc-item-num\">3 </span>Performance comparison</a></span><ul class=\"toc-item\"><li><span><a href=\"#On-synthetic-data\" data-toc-modified-id=\"On-synthetic-data-3.1\"><span class=\"toc-item-num\">3.1 </span>On synthetic data</a></span><ul class=\"toc-item\"><li><span><a href=\"#Predictive-models\" data-toc-modified-id=\"Predictive-models-3.1.1\"><span class=\"toc-item-num\">3.1.1 </span>Predictive models</a></span></li><li><span><a href=\"#Visual-comparison\" data-toc-modified-id=\"Visual-comparison-3.1.2\"><span class=\"toc-item-num\">3.1.2 </span>Visual comparison</a></span></li></ul></li><li><span><a href=\"#On-COMPAS-data\" data-toc-modified-id=\"On-COMPAS-data-3.2\"><span class=\"toc-item-num\">3.2 </span>On COMPAS data</a></span><ul class=\"toc-item\"><li><span><a href=\"#Predictive-models\" data-toc-modified-id=\"Predictive-models-3.2.1\"><span class=\"toc-item-num\">3.2.1 </span>Predictive models</a></span></li></ul></li></ul></li></ul></div>" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Bachelors thesis' analyses\n", - "\n", - "*This Jupyter notebook is for the analyses and model building for Riku Laine's bachelors thesis*\n", - "\n", - "Table of contents is provided above. First I will briefly present the COMPAS data set and then create the synthetic data set as done by Lakkaraju *et al.* ([link](https://helka.finna.fi/PrimoRecord/pci.acm3098066)). Then I will proceed to implement algorithms. Finally I will do the side-by-side comparisons of the results on the synthetic data. Finally I run the causal model on the COMPAS data.\n", - "\n", - "## Data sets\n", - "\n", - "*Below I load the COMPAS data set and generate the synthetic one.*\n", - "\n", - "### COMPAS data\n", - "\n", - "The following data filtering procedure follows the one described in the [ProPublica methodology](https://www.propublica.org/article/how-we-analyzed-the-compas-recidivism-algorithm)." - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(7214, 53)\n", - "['id' 'name' 'first' 'last' 'compas_screening_date' 'sex' 'dob' 'age'\n", - " 'age_cat' 'race' 'juv_fel_count' 'decile_score' 'juv_misd_count'\n", - " 'juv_other_count' 'priors_count' 'days_b_screening_arrest' 'c_jail_in'\n", - " 'c_jail_out' 'c_case_number' 'c_offense_date' 'c_arrest_date'\n", - " 'c_days_from_compas' 'c_charge_degree' 'c_charge_desc' 'is_recid'\n", - " 'r_case_number' 'r_charge_degree' 'r_days_from_arrest' 'r_offense_date'\n", - " 'r_charge_desc' 'r_jail_in' 'r_jail_out' 'violent_recid'\n", - " 'is_violent_recid' 'vr_case_number' 'vr_charge_degree' 'vr_offense_date'\n", - " 'vr_charge_desc' 'type_of_assessment' 'decile_score.1' 'score_text'\n", - " 'screening_date' 'v_type_of_assessment' 'v_decile_score' 'v_score_text'\n", - " 'v_screening_date' 'in_custody' 'out_custody' 'priors_count.1' 'start'\n", - " 'end' 'event' 'two_year_recid']\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "from datetime import datetime\n", - "import matplotlib.pyplot as plt\n", - "import scipy.stats as scs\n", - "import seaborn as sns\n", - "import numpy.random as npr\n", - "from sklearn.preprocessing import OneHotEncoder\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "\n", - "%matplotlib inline\n", - "\n", - "plt.rcParams.update({'font.size': 16})\n", - "plt.rcParams.update({'figure.figsize': (14, 7)})\n", - "\n", - "# Read file\n", - "compas_raw = pd.read_csv(\"../data/compas-scores-two-years.csv\")\n", - "\n", - "# Check dimensions, number of rows should be 7214\n", - "print(compas_raw.shape)\n", - "print(compas_raw.columns.values)" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(6172, 13)" - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Select columns\n", - "compas = compas_raw[[\n", - " 'age', 'c_charge_degree', 'race', 'age_cat', 'score_text', 'sex',\n", - " 'priors_count', 'days_b_screening_arrest', 'decile_score', 'is_recid',\n", - " 'two_year_recid', 'c_jail_in', 'c_jail_out'\n", - "]]\n", - "\n", - "# Subset values, see reasons in ProPublica methodology.\n", - "compas = compas.query('days_b_screening_arrest <= 30 and \\\n", - " days_b_screening_arrest >= -30 and \\\n", - " is_recid != -1 and \\\n", - " c_charge_degree != \"O\"')\n", - "\n", - "# Drop row if score_text is na\n", - "compas = compas[compas.score_text.notnull()]\n", - "\n", - "compas.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>0</th>\n", - " <th>1</th>\n", - " <th>2</th>\n", - " <th>5</th>\n", - " <th>6</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>age</th>\n", - " <td>69</td>\n", - " <td>34</td>\n", - " <td>24</td>\n", - " <td>44</td>\n", - " <td>41</td>\n", - " </tr>\n", - " <tr>\n", - " <th>c_charge_degree</th>\n", - " <td>F</td>\n", - " <td>F</td>\n", - " <td>F</td>\n", - " <td>M</td>\n", - " <td>F</td>\n", - " </tr>\n", - " <tr>\n", - " <th>race</th>\n", - " <td>Other</td>\n", - " <td>African-American</td>\n", - " <td>African-American</td>\n", - " <td>Other</td>\n", - " <td>Caucasian</td>\n", - " </tr>\n", - " <tr>\n", - " <th>age_cat</th>\n", - " <td>Greater than 45</td>\n", - " <td>25 - 45</td>\n", - " <td>Less than 25</td>\n", - " <td>25 - 45</td>\n", - " <td>25 - 45</td>\n", - " </tr>\n", - " <tr>\n", - " <th>score_text</th>\n", - " <td>Low</td>\n", - " <td>Low</td>\n", - " <td>Low</td>\n", - " <td>Low</td>\n", - " <td>Medium</td>\n", - " </tr>\n", - " <tr>\n", - " <th>sex</th>\n", - " <td>Male</td>\n", - " <td>Male</td>\n", - " <td>Male</td>\n", - " <td>Male</td>\n", - " <td>Male</td>\n", - " </tr>\n", - " <tr>\n", - " <th>priors_count</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>4</td>\n", - " <td>0</td>\n", - " <td>14</td>\n", - " </tr>\n", - " <tr>\n", - " <th>days_b_screening_arrest</th>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " <td>0</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>decile_score</th>\n", - " <td>1</td>\n", - " <td>3</td>\n", - " <td>4</td>\n", - " <td>1</td>\n", - " <td>6</td>\n", - " </tr>\n", - " <tr>\n", - " <th>is_recid</th>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>two_year_recid</th>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>c_jail_in</th>\n", - " <td>2013-08-13 06:03:42</td>\n", - " <td>2013-01-26 03:45:27</td>\n", - " <td>2013-04-13 04:58:34</td>\n", - " <td>2013-11-30 04:50:18</td>\n", - " <td>2014-02-18 05:08:24</td>\n", - " </tr>\n", - " <tr>\n", - " <th>c_jail_out</th>\n", - " <td>2013-08-14 05:41:20</td>\n", - " <td>2013-02-05 05:36:53</td>\n", - " <td>2013-04-14 07:02:04</td>\n", - " <td>2013-12-01 12:28:56</td>\n", - " <td>2014-02-24 12:18:30</td>\n", - " </tr>\n", - " <tr>\n", - " <th>length_of_stay</th>\n", - " <td>0</td>\n", - " <td>10</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>6</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " 0 1 \\\n", - "age 69 34 \n", - "c_charge_degree F F \n", - "race Other African-American \n", - "age_cat Greater than 45 25 - 45 \n", - "score_text Low Low \n", - "sex Male Male \n", - "priors_count 0 0 \n", - "days_b_screening_arrest -1 -1 \n", - "decile_score 1 3 \n", - "is_recid 0 1 \n", - "two_year_recid 0 1 \n", - "c_jail_in 2013-08-13 06:03:42 2013-01-26 03:45:27 \n", - "c_jail_out 2013-08-14 05:41:20 2013-02-05 05:36:53 \n", - "length_of_stay 0 10 \n", - "\n", - " 2 5 \\\n", - "age 24 44 \n", - "c_charge_degree F M \n", - "race African-American Other \n", - "age_cat Less than 25 25 - 45 \n", - "score_text Low Low \n", - "sex Male Male \n", - "priors_count 4 0 \n", - "days_b_screening_arrest -1 0 \n", - "decile_score 4 1 \n", - "is_recid 1 0 \n", - "two_year_recid 1 0 \n", - "c_jail_in 2013-04-13 04:58:34 2013-11-30 04:50:18 \n", - "c_jail_out 2013-04-14 07:02:04 2013-12-01 12:28:56 \n", - "length_of_stay 1 1 \n", - "\n", - " 6 \n", - "age 41 \n", - "c_charge_degree F \n", - "race Caucasian \n", - "age_cat 25 - 45 \n", - "score_text Medium \n", - "sex Male \n", - "priors_count 14 \n", - "days_b_screening_arrest -1 \n", - "decile_score 6 \n", - "is_recid 1 \n", - "two_year_recid 1 \n", - "c_jail_in 2014-02-18 05:08:24 \n", - "c_jail_out 2014-02-24 12:18:30 \n", - "length_of_stay 6 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Calculate length of stay\n", - "out = pd.to_datetime(compas.c_jail_out, format=\"%Y-%m-%d %H:%M:%S\")\n", - "in_ = pd.to_datetime(compas.c_jail_in, format=\"%Y-%m-%d %H:%M:%S\")\n", - "\n", - "compas['length_of_stay'] = (out - in_).astype('timedelta64[D]')\n", - "\n", - "# Structure of the data\n", - "display(compas.head(5).T)" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>count</th>\n", - " <th>unique</th>\n", - " <th>top</th>\n", - " <th>freq</th>\n", - " <th>mean</th>\n", - " <th>std</th>\n", - " <th>min</th>\n", - " <th>25%</th>\n", - " <th>50%</th>\n", - " <th>75%</th>\n", - " <th>max</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>age</th>\n", - " <td>6172</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>34.5345</td>\n", - " <td>11.7309</td>\n", - " <td>18</td>\n", - " <td>25</td>\n", - " <td>31</td>\n", - " <td>42</td>\n", - " <td>96</td>\n", - " </tr>\n", - " <tr>\n", - " <th>c_charge_degree</th>\n", - " <td>6172</td>\n", - " <td>2</td>\n", - " <td>F</td>\n", - " <td>3970</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " <tr>\n", - " <th>race</th>\n", - " <td>6172</td>\n", - " <td>6</td>\n", - " <td>African-American</td>\n", - " <td>3175</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " <tr>\n", - " <th>age_cat</th>\n", - " <td>6172</td>\n", - " <td>3</td>\n", - " <td>25 - 45</td>\n", - " <td>3532</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " <tr>\n", - " <th>score_text</th>\n", - " <td>6172</td>\n", - " <td>3</td>\n", - " <td>Low</td>\n", - " <td>3421</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " <tr>\n", - " <th>sex</th>\n", - " <td>6172</td>\n", - " <td>2</td>\n", - " <td>Male</td>\n", - " <td>4997</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " <tr>\n", - " <th>priors_count</th>\n", - " <td>6172</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>3.24644</td>\n", - " <td>4.74377</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>4</td>\n", - " <td>38</td>\n", - " </tr>\n", - " <tr>\n", - " <th>days_b_screening_arrest</th>\n", - " <td>6172</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>-1.74028</td>\n", - " <td>5.08471</td>\n", - " <td>-30</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " <td>30</td>\n", - " </tr>\n", - " <tr>\n", - " <th>decile_score</th>\n", - " <td>6172</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>4.4185</td>\n", - " <td>2.83946</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>4</td>\n", - " <td>7</td>\n", - " <td>10</td>\n", - " </tr>\n", - " <tr>\n", - " <th>is_recid</th>\n", - " <td>6172</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>0.484446</td>\n", - " <td>0.499799</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>two_year_recid</th>\n", - " <td>6172</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>0.45512</td>\n", - " <td>0.498022</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>c_jail_in</th>\n", - " <td>6172</td>\n", - " <td>6172</td>\n", - " <td>2013-03-21 04:40:57</td>\n", - " <td>1</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " <tr>\n", - " <th>c_jail_out</th>\n", - " <td>6172</td>\n", - " <td>6161</td>\n", - " <td>2013-09-14 05:58:00</td>\n", - " <td>3</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " </tr>\n", - " <tr>\n", - " <th>length_of_stay</th>\n", - " <td>6172</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>14.6228</td>\n", - " <td>46.6935</td>\n", - " <td>-1</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>5</td>\n", - " <td>799</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " count unique top freq mean \\\n", - "age 6172 NaN NaN NaN 34.5345 \n", - "c_charge_degree 6172 2 F 3970 NaN \n", - "race 6172 6 African-American 3175 NaN \n", - "age_cat 6172 3 25 - 45 3532 NaN \n", - "score_text 6172 3 Low 3421 NaN \n", - "sex 6172 2 Male 4997 NaN \n", - "priors_count 6172 NaN NaN NaN 3.24644 \n", - "days_b_screening_arrest 6172 NaN NaN NaN -1.74028 \n", - "decile_score 6172 NaN NaN NaN 4.4185 \n", - "is_recid 6172 NaN NaN NaN 0.484446 \n", - "two_year_recid 6172 NaN NaN NaN 0.45512 \n", - "c_jail_in 6172 6172 2013-03-21 04:40:57 1 NaN \n", - "c_jail_out 6172 6161 2013-09-14 05:58:00 3 NaN \n", - "length_of_stay 6172 NaN NaN NaN 14.6228 \n", - "\n", - " std min 25% 50% 75% max \n", - "age 11.7309 18 25 31 42 96 \n", - "c_charge_degree NaN NaN NaN NaN NaN NaN \n", - "race NaN NaN NaN NaN NaN NaN \n", - "age_cat NaN NaN NaN NaN NaN NaN \n", - "score_text NaN NaN NaN NaN NaN NaN \n", - "sex NaN NaN NaN NaN NaN NaN \n", - "priors_count 4.74377 0 0 1 4 38 \n", - "days_b_screening_arrest 5.08471 -30 -1 -1 -1 30 \n", - "decile_score 2.83946 1 2 4 7 10 \n", - "is_recid 0.499799 0 0 0 1 1 \n", - "two_year_recid 0.498022 0 0 0 1 1 \n", - "c_jail_in NaN NaN NaN NaN NaN NaN \n", - "c_jail_out NaN NaN NaN NaN NaN NaN \n", - "length_of_stay 46.6935 -1 0 1 5 799 " - ] - }, - "execution_count": 57, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "compas.describe(include='all').T" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Notes:**\n", - "\n", - "* Mean age is roughly 34.5 years ranging from 18 to 96\n", - "* Defendants have an average of 3.2 priors (sd 4.7) and more than half have 1 or more prior.\n", - "* 48.4% have recidivated in general and 45.5% recidivated within a two-year period following their arrest." - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "<Figure size 900x900 with 30 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>age</th>\n", - " <th>priors_count</th>\n", - " <th>days_b_screening_arrest</th>\n", - " <th>decile_score</th>\n", - " <th>length_of_stay</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>age</th>\n", - " <td>1.00</td>\n", - " <td>0.12</td>\n", - " <td>-0.07</td>\n", - " <td>-0.40</td>\n", - " <td>0.01</td>\n", - " </tr>\n", - " <tr>\n", - " <th>priors_count</th>\n", - " <td>0.12</td>\n", - " <td>1.00</td>\n", - " <td>0.02</td>\n", - " <td>0.45</td>\n", - " <td>0.19</td>\n", - " </tr>\n", - " <tr>\n", - " <th>days_b_screening_arrest</th>\n", - " <td>-0.07</td>\n", - " <td>0.02</td>\n", - " <td>1.00</td>\n", - " <td>0.09</td>\n", - " <td>0.06</td>\n", - " </tr>\n", - " <tr>\n", - " <th>decile_score</th>\n", - " <td>-0.40</td>\n", - " <td>0.45</td>\n", - " <td>0.09</td>\n", - " <td>1.00</td>\n", - " <td>0.21</td>\n", - " </tr>\n", - " <tr>\n", - " <th>length_of_stay</th>\n", - " <td>0.01</td>\n", - " <td>0.19</td>\n", - " <td>0.06</td>\n", - " <td>0.21</td>\n", - " <td>1.00</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " age priors_count days_b_screening_arrest \\\n", - "age 1.00 0.12 -0.07 \n", - "priors_count 0.12 1.00 0.02 \n", - "days_b_screening_arrest -0.07 0.02 1.00 \n", - "decile_score -0.40 0.45 0.09 \n", - "length_of_stay 0.01 0.19 0.06 \n", - "\n", - " decile_score length_of_stay \n", - "age -0.40 0.01 \n", - "priors_count 0.45 0.19 \n", - "days_b_screening_arrest 0.09 0.06 \n", - "decile_score 1.00 0.21 \n", - "length_of_stay 0.21 1.00 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "sns.pairplot(compas[[\n", - " 'age', 'priors_count', 'days_b_screening_arrest', 'decile_score',\n", - " 'length_of_stay'\n", - "]])\n", - "plt.show()\n", - "\n", - "display(compas[[\n", - " 'age', 'priors_count', 'days_b_screening_arrest', 'decile_score',\n", - " 'length_of_stay'\n", - "]].corr().round(2))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Notes:**\n", - "\n", - "* Some notable correlations: `age` and `decile_score` ($\\rho\\approx-0.40$, Spearman -0.44) and `decile_score` and `priors_count` ($\\rho\\approx0.45$, Spearman 0.44)\n", - "* Spearman correlation was for `length_of_stay` and `priors_count` 0.27 and for `length_of_stay` and `decile_score` 0.27" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1 1286\n", - "2 822\n", - "4 666\n", - "3 647\n", - "5 582\n", - "6 529\n", - "7 496\n", - "9 420\n", - "8 420\n", - "10 304\n", - "Name: decile_score, dtype: int64\n" - ] - } - ], - "source": [ - "# Decile scores should be evenly distributed but are not.\n", - "print(compas.decile_score.value_counts())" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "25 - 45 3532\n", - "Less than 25 1347\n", - "Greater than 45 1293\n", - "Name: age_cat, dtype: int64\n" - ] - } - ], - "source": [ - "print(compas.age_cat.value_counts())" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "African-American 3175\n", - "Caucasian 2103\n", - "Hispanic 509\n", - "Other 343\n", - "Asian 31\n", - "Native American 11\n", - "Name: race, dtype: int64\n" - ] - } - ], - "source": [ - "print(compas.race.value_counts())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A very small number of Asian and Native American defendants." - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Black defendants: 51.44%\n", - "White defendants: 34.07%\n", - "Hispanic defendants: 8.25%\n", - "Asian defendants: 0.50%\n", - "Native American defendants: 0.18%\n", - "---\n", - "Defendants of other race: 5.56%\n" - ] - } - ], - "source": [ - "print(\"Black defendants: %.2f%%\" % (3175 / 6172 * 100))\n", - "print(\"White defendants: %.2f%%\" % (2103 / 6172 * 100))\n", - "print(\"Hispanic defendants: %.2f%%\" % (509 / 6172 * 100))\n", - "print(\"Asian defendants: %.2f%%\" % (31 / 6172 * 100))\n", - "print(\"Native American defendants: %.2f%%\" % (11 / 6172 * 100))\n", - "print(\"---\")\n", - "print(\"Defendants of other race: %.2f%%\" % (343 / 6172 * 100))" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Low 3421\n", - "Medium 1607\n", - "High 1144\n", - "Name: score_text, dtype: int64" - ] - }, - "execution_count": 63, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "compas.score_text.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "F 3970\n", - "M 2202\n", - "Name: c_charge_degree, dtype: int64" - ] - }, - "execution_count": 64, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "compas.c_charge_degree.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(-0.001, 0.5] 2085\n", - "(0.5, 5.5] 2866\n", - "(5.5, 10.5] 729\n", - "(10.5, 20.5] 402\n", - "(20.5, 40.5] 90\n", - "Name: priors_count, dtype: int64" - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "compas.priors_count.value_counts(\n", - " sort=False, bins=[0, 0.5, 5.5, 10.5, 20.5, 40.5])" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th>race</th>\n", - " <th>African-American</th>\n", - " <th>Asian</th>\n", - " <th>Caucasian</th>\n", - " <th>Hispanic</th>\n", - " <th>Native American</th>\n", - " <th>Other</th>\n", - " </tr>\n", - " <tr>\n", - " <th>sex</th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>Female</th>\n", - " <td>549</td>\n", - " <td>2</td>\n", - " <td>482</td>\n", - " <td>82</td>\n", - " <td>2</td>\n", - " <td>58</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Male</th>\n", - " <td>2626</td>\n", - " <td>29</td>\n", - " <td>1621</td>\n", - " <td>427</td>\n", - " <td>9</td>\n", - " <td>285</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - "race African-American Asian Caucasian Hispanic Native American Other\n", - "sex \n", - "Female 549 2 482 82 2 58\n", - "Male 2626 29 1621 427 9 285" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tab = compas.groupby(['sex', 'race']).size()\n", - "tab.unstack()" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "<Figure size 1008x504 with 1 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "<Figure size 1008x504 with 2 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.bar(range(1, 11), compas.decile_score.value_counts(), ec='black')\n", - "plt.title(\"Decile scores of all defendants\")\n", - "plt.ylabel(\"Frequency\")\n", - "plt.xlabel(\"Decile score\")\n", - "plt.xticks(range(1, 11))\n", - "plt.show()\n", - "\n", - "fig, ax = compas.query(\"race in ['Caucasian', 'African-American']\").hist(\n", - " \"decile_score\",\n", - " by=\"race\",\n", - " figsize=(14, 7),\n", - " sharey=True,\n", - " xrot='horizontal',\n", - " ec='black',\n", - " bins=np.arange(0.5, 11.5, 1.0),\n", - " rwidth=0.8)\n", - "\n", - "fig.text(-1.5, 350, \"Frequency\", rotation='vertical')\n", - "fig.text(11.5, -60, \"Decile score\", horizontalalignment='center')\n", - "plt.tight_layout(w_pad=-2)\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\scipy\\stats\\stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.\n", - " return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "<Figure size 1008x504 with 1 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "sns.distplot(compas.age)\n", - "plt.title(\"Histogram of defendants' ages\")\n", - "plt.xlabel(\"Age of defendant\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th>is_recid</th>\n", - " <th>0</th>\n", - " <th>1</th>\n", - " </tr>\n", - " <tr>\n", - " <th>age_cat</th>\n", - " <th></th>\n", - " <th></th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>25 - 45</th>\n", - " <td>1784</td>\n", - " <td>1748</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Greater than 45</th>\n", - " <td>847</td>\n", - " <td>446</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Less than 25</th>\n", - " <td>551</td>\n", - " <td>796</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - "is_recid 0 1\n", - "age_cat \n", - "25 - 45 1784 1748\n", - "Greater than 45 847 446\n", - "Less than 25 551 796" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th>is_recid</th>\n", - " <th>0</th>\n", - " <th>1</th>\n", - " </tr>\n", - " <tr>\n", - " <th>sex</th>\n", - " <th></th>\n", - " <th></th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>Female</th>\n", - " <td>740</td>\n", - " <td>435</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Male</th>\n", - " <td>2442</td>\n", - " <td>2555</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - "is_recid 0 1\n", - "sex \n", - "Female 740 435\n", - "Male 2442 2555" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>is_recid</th>\n", - " <th>0</th>\n", - " <th>1</th>\n", - " </tr>\n", - " <tr>\n", - " <th>race</th>\n", - " <th>age_cat</th>\n", - " <th></th>\n", - " <th></th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th rowspan=\"3\" valign=\"top\">African-American</th>\n", - " <th>25 - 45</th>\n", - " <td>847.0</td>\n", - " <td>1051.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Greater than 45</th>\n", - " <td>261.0</td>\n", - " <td>207.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Less than 25</th>\n", - " <td>294.0</td>\n", - " <td>515.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th rowspan=\"3\" valign=\"top\">Asian</th>\n", - " <th>25 - 45</th>\n", - " <td>10.0</td>\n", - " <td>4.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Greater than 45</th>\n", - " <td>7.0</td>\n", - " <td>4.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Less than 25</th>\n", - " <td>4.0</td>\n", - " <td>2.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th rowspan=\"3\" valign=\"top\">Caucasian</th>\n", - " <th>25 - 45</th>\n", - " <td>620.0</td>\n", - " <td>508.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Greater than 45</th>\n", - " <td>442.0</td>\n", - " <td>186.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Less than 25</th>\n", - " <td>167.0</td>\n", - " <td>180.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th rowspan=\"3\" valign=\"top\">Hispanic</th>\n", - " <th>25 - 45</th>\n", - " <td>180.0</td>\n", - " <td>111.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Greater than 45</th>\n", - " <td>81.0</td>\n", - " <td>28.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Less than 25</th>\n", - " <td>51.0</td>\n", - " <td>58.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th rowspan=\"3\" valign=\"top\">Native American</th>\n", - " <th>25 - 45</th>\n", - " <td>5.0</td>\n", - " <td>2.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Greater than 45</th>\n", - " <td>NaN</td>\n", - " <td>2.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Less than 25</th>\n", - " <td>NaN</td>\n", - " <td>2.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th rowspan=\"3\" valign=\"top\">Other</th>\n", - " <th>25 - 45</th>\n", - " <td>122.0</td>\n", - " <td>72.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Greater than 45</th>\n", - " <td>56.0</td>\n", - " <td>19.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Less than 25</th>\n", - " <td>35.0</td>\n", - " <td>39.0</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - "is_recid 0 1\n", - "race age_cat \n", - "African-American 25 - 45 847.0 1051.0\n", - " Greater than 45 261.0 207.0\n", - " Less than 25 294.0 515.0\n", - "Asian 25 - 45 10.0 4.0\n", - " Greater than 45 7.0 4.0\n", - " Less than 25 4.0 2.0\n", - "Caucasian 25 - 45 620.0 508.0\n", - " Greater than 45 442.0 186.0\n", - " Less than 25 167.0 180.0\n", - "Hispanic 25 - 45 180.0 111.0\n", - " Greater than 45 81.0 28.0\n", - " Less than 25 51.0 58.0\n", - "Native American 25 - 45 5.0 2.0\n", - " Greater than 45 NaN 2.0\n", - " Less than 25 NaN 2.0\n", - "Other 25 - 45 122.0 72.0\n", - " Greater than 45 56.0 19.0\n", - " Less than 25 35.0 39.0" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "tab = compas.groupby(['age_cat', 'is_recid']).size()\n", - "display(tab.unstack())\n", - "\n", - "tab = compas.groupby(['sex', 'is_recid']).size()\n", - "display(tab.unstack())\n", - "\n", - "tab = compas.groupby(['race', 'age_cat', 'is_recid']).size()\n", - "display(tab.unstack())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "From above it is clear that there are no Native American recidivists of age over 45 or under 25. There are some other value combinations that might be problematic. Therefore the procedure of estimating $P(X=x)$ has to be considered carefully." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Synthetic data\n", - "\n", - "In the chunk below, we generate the synthetic data as described by Lakkaraju et al. The default values and definitions of $Y$ and $T$ values follow their description.\n", - "\n", - "**Parameters**\n", - "\n", - "* M = `nJudges_M`, number of judges\n", - "* N = `nSubjects_N`, number of subjects assigned to each judge\n", - "* betas $\\beta_i$ = `beta_i`, where $i \\in \\{X, Z, W\\}$ are coefficients for the respected variables\n", - "* R = `acceptanceRate_R`, acceptance rates\n", - "* X = `X`, invidual's features observable to all (models and judges)\n", - "* Z = `Z`, information observable for judges only\n", - "* W = `W`, unobservable / inaccessible information\n", - "* T = `decision_T`, bail-or-jail decisions where $T=0$ represents jail decision and $T=1$ bail decision.\n", - "* Y = `result_Y`, result variable, if $Y=0$ person will or would recidivate and if $Y=1$ person will or would not commit a crime." - ] - }, - { - "cell_type": "code", - "execution_count": 104, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>count</th>\n", - " <th>mean</th>\n", - " <th>std</th>\n", - " <th>min</th>\n", - " <th>25%</th>\n", - " <th>50%</th>\n", - " <th>75%</th>\n", - " <th>max</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>judgeID_J</th>\n", - " <td>50000.0</td>\n", - " <td>49.500000</td>\n", - " <td>28.866359</td>\n", - " <td>0.000000</td>\n", - " <td>24.750000</td>\n", - " <td>49.500000</td>\n", - " <td>74.250000</td>\n", - " <td>99.000000</td>\n", - " </tr>\n", - " <tr>\n", - " <th>acceptanceRate_R</th>\n", - " <td>50000.0</td>\n", - " <td>0.489100</td>\n", - " <td>0.241555</td>\n", - " <td>0.113154</td>\n", - " <td>0.258107</td>\n", - " <td>0.474092</td>\n", - " <td>0.714801</td>\n", - " <td>0.898779</td>\n", - " </tr>\n", - " <tr>\n", - " <th>X</th>\n", - " <td>50000.0</td>\n", - " <td>-0.008054</td>\n", - " <td>0.998408</td>\n", - " <td>-4.050908</td>\n", - " <td>-0.680597</td>\n", - " <td>-0.008397</td>\n", - " <td>0.660901</td>\n", - " <td>4.099418</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Z</th>\n", - " <td>50000.0</td>\n", - " <td>-0.004696</td>\n", - " <td>0.993683</td>\n", - " <td>-4.182233</td>\n", - " <td>-0.680335</td>\n", - " <td>-0.004356</td>\n", - " <td>0.666608</td>\n", - " <td>3.966532</td>\n", - " </tr>\n", - " <tr>\n", - " <th>W</th>\n", - " <td>50000.0</td>\n", - " <td>-0.000542</td>\n", - " <td>0.995303</td>\n", - " <td>-4.189579</td>\n", - " <td>-0.671069</td>\n", - " <td>0.002007</td>\n", - " <td>0.671735</td>\n", - " <td>4.276601</td>\n", - " </tr>\n", - " <tr>\n", - " <th>result_Y</th>\n", - " <td>50000.0</td>\n", - " <td>0.503380</td>\n", - " <td>0.499994</td>\n", - " <td>0.000000</td>\n", - " <td>0.000000</td>\n", - " <td>1.000000</td>\n", - " <td>1.000000</td>\n", - " <td>1.000000</td>\n", - " </tr>\n", - " <tr>\n", - " <th>probabilities_T</th>\n", - " <td>50000.0</td>\n", - " <td>0.498167</td>\n", - " <td>0.278933</td>\n", - " <td>-0.295551</td>\n", - " <td>0.276483</td>\n", - " <td>0.496720</td>\n", - " <td>0.720596</td>\n", - " <td>1.261540</td>\n", - " </tr>\n", - " <tr>\n", - " <th>decision_T</th>\n", - " <td>50000.0</td>\n", - " <td>0.488120</td>\n", - " <td>0.499864</td>\n", - " <td>0.000000</td>\n", - " <td>0.000000</td>\n", - " <td>0.000000</td>\n", - " <td>1.000000</td>\n", - " <td>1.000000</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " count mean std min 25% \\\n", - "judgeID_J 50000.0 49.500000 28.866359 0.000000 24.750000 \n", - "acceptanceRate_R 50000.0 0.489100 0.241555 0.113154 0.258107 \n", - "X 50000.0 -0.008054 0.998408 -4.050908 -0.680597 \n", - "Z 50000.0 -0.004696 0.993683 -4.182233 -0.680335 \n", - "W 50000.0 -0.000542 0.995303 -4.189579 -0.671069 \n", - "result_Y 50000.0 0.503380 0.499994 0.000000 0.000000 \n", - "probabilities_T 50000.0 0.498167 0.278933 -0.295551 0.276483 \n", - "decision_T 50000.0 0.488120 0.499864 0.000000 0.000000 \n", - "\n", - " 50% 75% max \n", - "judgeID_J 49.500000 74.250000 99.000000 \n", - "acceptanceRate_R 0.474092 0.714801 0.898779 \n", - "X -0.008397 0.660901 4.099418 \n", - "Z -0.004356 0.666608 3.966532 \n", - "W 0.002007 0.671735 4.276601 \n", - "result_Y 1.000000 1.000000 1.000000 \n", - "probabilities_T 0.496720 0.720596 1.261540 \n", - "decision_T 0.000000 1.000000 1.000000 " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0 25594\n", - "1 24406\n", - "Name: decision_T, dtype: int64\n" - ] - }, - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th>decision_T</th>\n", - " <th>0</th>\n", - " <th>1</th>\n", - " </tr>\n", - " <tr>\n", - " <th>result_Y</th>\n", - " <th></th>\n", - " <th></th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0.0</th>\n", - " <td>19544</td>\n", - " <td>5287</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1.0</th>\n", - " <td>6050</td>\n", - " <td>19119</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - "decision_T 0 1\n", - "result_Y \n", - "0.0 19544 5287\n", - "1.0 6050 19119" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Set seed for reproducibility\n", - "npr.seed(111)\n", - "\n", - "def generateData(nJudges_M=100,\n", - " nSubjects_N=500,\n", - " beta_X=1.0,\n", - " beta_Z=1.0,\n", - " beta_W=0.2):\n", - "\n", - " # Assign judge IDs as running numbering from 0 to nJudges_M - 1\n", - " judgeID_J = np.repeat(np.arange(0, nJudges_M, dtype=np.int32), nSubjects_N)\n", - "\n", - " # Sample acceptance rates uniformly from a closed interval\n", - " # from 0.1 to 0.9 and round to tenth decimal place.\n", - " acceptance_rates = np.round(npr.uniform(.1, .9, nJudges_M), 10)\n", - "\n", - " # Replicate the rates so they can be attached to the corresponding judge ID.\n", - " acceptanceRate_R = np.repeat(acceptance_rates, nSubjects_N)\n", - "\n", - " # Sample the variables from standard Gaussian distributions.\n", - " X = npr.normal(size=nJudges_M * nSubjects_N)\n", - " Z = npr.normal(size=nJudges_M * nSubjects_N)\n", - " W = npr.normal(size=nJudges_M * nSubjects_N)\n", - "\n", - " probabilities_Y = 1 / (1 + np.exp(-(beta_X * X + beta_Z * Z + beta_W * W)))\n", - "\n", - " # 0 if P(Y = 0| X = x; Z = z; W = w) >= 0.5 , 1 otherwise\n", - " result_Y = 1 - probabilities_Y.round()\n", - "\n", - " probabilities_T = 1 / (1 + np.exp(-(beta_X * X + beta_Z * Z)))\n", - " probabilities_T += npr.normal(0, .1, nJudges_M * nSubjects_N)\n", - "\n", - " # Initialize decision values as 1\n", - " decision_T = np.ones(nJudges_M * nSubjects_N)\n", - "\n", - " # Initialize the dataframe\n", - " df_init = pd.DataFrame(\n", - " np.column_stack((judgeID_J, acceptanceRate_R, X, Z, W, result_Y,\n", - " probabilities_T, decision_T)),\n", - " columns=[\n", - " \"judgeID_J\", \"acceptanceRate_R\", \"X\", \"Z\", \"W\", \"result_Y\",\n", - " \"probabilities_T\", \"decision_T\"\n", - " ])\n", - "\n", - " # Sort by judges then probabilities\n", - " data = df_init.sort_values(\n", - " by=[\"judgeID_J\", \"probabilities_T\"], ascending=False)\n", - "\n", - " # Iterate over the data. Subject is in the top (1-r)*100% if\n", - " # his within-judge-index is over acceptance threshold times\n", - " # the number of subjects assigned to each judge. If subject\n", - " # is over the limit they are assigned a zero, else one.\n", - " data.reset_index(drop=True, inplace=True)\n", - "\n", - " data['decision_T'] = np.where(\n", - " (data.index.values % nSubjects_N) <\n", - " ((1 - data['acceptanceRate_R']) * nSubjects_N), 0, 1)\n", - "\n", - " return data\n", - "\n", - "\n", - "df = []\n", - "df = generateData()\n", - "\n", - "# Basic stats of the created data set.\n", - "display(df.describe().T)\n", - "\n", - "print(df.decision_T.value_counts())\n", - "\n", - "tab = df.groupby(['result_Y', 'decision_T']).size()\n", - "display(tab.unstack())" - ] - }, - { - "cell_type": "code", - "execution_count": 105, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(25000, 8)\n", - "(25000, 8)\n", - "(12094, 8)\n", - "(12312, 8)\n" - ] - }, - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th>decision_T</th>\n", - " <th>1</th>\n", - " </tr>\n", - " <tr>\n", - " <th>result_Y</th>\n", - " <th></th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0.0</th>\n", - " <td>2606</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1.0</th>\n", - " <td>9488</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - "decision_T 1\n", - "result_Y \n", - "0.0 2606\n", - "1.0 9488" - ] - }, - "execution_count": 105, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Split the data set to test and train\n", - "from sklearn.model_selection import train_test_split\n", - "train, test = train_test_split(df, test_size=0.5, random_state=0)\n", - "\n", - "print(train.shape)\n", - "print(test.shape)\n", - "\n", - "train_labeled = train[train.decision_T == 1]\n", - "test_labeled = test[test.decision_T == 1]\n", - "\n", - "print(train_labeled.shape)\n", - "print(test_labeled.shape)\n", - "\n", - "tab = train_labeled.groupby(['result_Y', 'decision_T']).size()\n", - "tab.unstack()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Algorithms\n", - "\n", - "### Contraction algorithm\n", - "\n", - "Below is an implementation of Lakkaraju's team's algorithm presented in [their paper](https://helka.finna.fi/PrimoRecord/pci.acm3098066). Relevant parameters to be passed to the function are presented in the description." - ] - }, - { - "cell_type": "code", - "execution_count": 106, - "metadata": {}, - "outputs": [], - "source": [ - "def contraction(df,\n", - " judgeIDJ_col,\n", - " decisionT_col,\n", - " resultY_col,\n", - " modelProbS_col,\n", - " accRateR_col,\n", - " r,\n", - " binning=False):\n", - " '''\n", - " This is an implementation of the algorithm presented by Lakkaraju\n", - " et al. in their paper \"The Selective Labels Problem: Evaluating \n", - " Algorithmic Predictions in the Presence of Unobservables\" (2017).\n", - " \n", - " Parameters:\n", - " df = The (Pandas) data frame containing the data, judge decisions,\n", - " judge IDs, results and probability scores.\n", - " judgeIDJ_col = String, the name of the column containing the judges' IDs\n", - " in df.\n", - " decisionT_col = String, the name of the column containing the judges' decisions\n", - " resultY_col = String, the name of the column containing the realization\n", - " modelProbS_col = String, the name of the column containing the probability\n", - " scores from the black-box model B.\n", - " accRateR_col = String, the name of the column containing the judges' \n", - " acceptance rates\n", - " r = Float between 0 and 1, the given acceptance rate.\n", - " binning = Boolean, should judges with same acceptance rate be binned\n", - " \n", - " Returns:\n", - " u = The estimated failure rate at acceptance rate r.\n", - " '''\n", - " # Sort first by acceptance rate and judge ID.\n", - " sorted_df = df.sort_values(\n", - " by=[accRateR_col, judgeIDJ_col], ascending=False)\n", - "\n", - " if binning:\n", - " # Get maximum leniency\n", - " max_leniency = sorted_df[accRateR_col].values[0].round(1)\n", - "\n", - " # Get list of judges that are the most lenient\n", - " most_lenient_list = sorted_df.loc[sorted_df[accRateR_col].round(1) ==\n", - " max_leniency, judgeIDJ_col]\n", - "\n", - " # Subset to obtain D_q\n", - " D_q = sorted_df[sorted_df[judgeIDJ_col].isin(\n", - " most_lenient_list.unique())]\n", - " else:\n", - " # Get most lenient judge\n", - " most_lenient_ID = sorted_df[judgeIDJ_col].values[0]\n", - "\n", - " # Subset\n", - " D_q = sorted_df[sorted_df[judgeIDJ_col] == most_lenient_ID]\n", - "\n", - " R_q = D_q[D_q[decisionT_col] == 1]\n", - "\n", - " R_sort_q = R_q.sort_values(by=modelProbS_col, ascending=False)\n", - "\n", - " number_to_remove = int(\n", - " np.round((1 - r) * D_q.shape[0] - (D_q.shape[0] - R_q.shape[0])))\n", - "\n", - " R_B = R_sort_q[number_to_remove:R_sort_q.shape[0]]\n", - "\n", - " return np.sum(R_B[resultY_col] == 0) / D_q.shape[0]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Causal model\n", - "\n", - "Our model is defined by the probabilistic expression \n", - "\n", - "\\begin{equation}\\label{model}\n", - "P(Y=0 | \\text{do}(R=r)) = \\sum_x \\underbrace{P(Y=0|X=x, T=1)}_\\text{1} \n", - "\\overbrace{P(T=1|R=r, X=x)}^\\text{2} \n", - "\\underbrace{P(X=x)}_\\text{3}\n", - "\\end{equation}\n", - "\n", - "As a picture (Z not in model):\n", - "\n", - "\n", - "\n", - "<!---\n", - "**Algorithm -- UPDATE!!**\n", - "\n", - "Our model will be constructed sequentially.\n", - "\n", - "Input: Training and test data sets $(\\mathbf{x}, t, y) \\in \\mathcal{D}$ and acceptance rate $r$. \n", - "Returns: $P(Y=0 | \\text{do}(R=r))$\n", - "\n", - "Procedure:\n", - "1. Model $P(X=x)$ in a suitable way and assign to $\\mathcal{M}_0$\n", - "* Build model $\\mathcal{M}_1$ predicting response $Y$ with predictors $X$ from the labeled observations (where $T=1$) in training data.\n", - "* Predict $P(Y=0|X=x)$ for every observation in the test data using model $\\mathcal{M}_1$.\n", - "* Initialize `sum = 0`\n", - "* For every point in the parameter space (for every $x$ in $X$)\n", - " 1. $p_x \\leftarrow P(X=x)$ from $\\mathcal{M}_0$\n", - " * $\\mathcal{D_x} \\leftarrow \\{\\mathcal{D} | X = x\\}$\n", - " * Assign first $r\\cdot 100\\%$ observations from $\\mathcal{D_x}$ to $\\mathcal{D}_{rx}$\n", - " * $p_t \\leftarrow \\dfrac{|\\{\\mathcal{D}_{rx}|T=1\\}|}{|\\mathcal{D}_{rx}|}$ (part 2 of eq. $\\ref{model}$) Pitääkö tähänkin treenaa joku oma luokittelija?\n", - " * $p_y$ will be predicted from the model $\\mathcal{M}_1$\n", - " * `sum +=` $p_y \\cdot p_t \\cdot p_x$\n", - "* Return `sum`\n", - "--->\n", - "**Constructing $\\mathcal{M}_0$, preliminary ideas:**\n", - "\n", - "* Approximate $P(X=x)$ with frequencies (makes independence assumption, make variables factors first)\n", - "* Construct Bayesian network using some well-known algorithm.\n", - "\n", - "Functions:\n", - "\n", - "* $f(x)$ gives probability of recidivism given personal properties and predictive model.\n", - "* `ep` counts performance of the predictive model given a data, model and leniency rate like Michael's pdf." - ] - }, - { - "cell_type": "code", - "execution_count": 107, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "def f(x, model, failure_value):\n", - " '''\n", - " Returns the probability of negative event (e.g. recidivism) given individual \n", - " features (x), predictive model (model) and value for failure.\n", - " '''\n", - " if x.ndim == 1:\n", - " # if x is vector, transform to column matrix.\n", - " f_values = model.predict_proba(np.array(x).reshape(-1, 1))\n", - " else:\n", - " f_values = model.predict_proba(x)\n", - "\n", - " return f_values[:, model.classes_ == failure_value].flatten()\n", - "\n", - "\n", - "def ep(r, df, result_col, feature_cols, model, failure_value):\n", - " '''\n", - " Returns:\n", - " Empirical performance, i.e. percentage of recidivists. \n", - " \n", - " Parameters:\n", - " r = leniency rate(s)\n", - " df = test data, pandas DataFrame\n", - " result_col = String (list), name of column containing the binarized results.\n", - " feature_cols = String (list), name of columns containge individual features.\n", - " model = trained sklearn classifier \n", - " failure_value = value obtained from the model.classes_ representing the \n", - " unwanted event label (usually 0 or 1).\n", - " '''\n", - " rates = np.zeros_like(r)\n", - " for i in range(len(rates)):\n", - " rates[i] = np.mean((df[result_col] == failure_value) &\n", - " (f(df[feature_cols], model, failure_value) < r[i]))\n", - " return rates\n", - "\n", - "def gp(r, x_values, y_model, x_model, failure_value):\n", - " '''\n", - " Returns:\n", - " Generalized performance\n", - " \n", - " Parameters:\n", - " r = leniency rate\n", - " df = test data, pandas DataFrame\n", - " feature_cols = String (list), name of columns containing individual features.\n", - " y_model = trained sklearn classifier to predict response\n", - " x_model = model of P(X=x)\n", - " failure_value = value obtained from the model.classes_ representing the \n", - " unwanted event label.\n", - " '''\n", - " preds = f(x_values, y_model, failure_value)\n", - " \n", - " return np.sum(preds * (preds < r) * x_model(x_values))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Performance comparison\n", - "\n", - "Below we try to replicate the results obtained by Lakkaraju and compare their model's performance to the one of ours.\n", - "\n", - "### On synthetic data\n", - "\n", - "#### Predictive models\n", - "\n", - "Lakkaraju says that they used logistic regression to predict recidivism. We models using only *observed observations*, i.e. defendants that were granted bail and are in the train set. We then predict the probability of recidivism for all observations in the test data and attach it to our data set. I also applied random forest classifier." - ] - }, - { - "cell_type": "code", - "execution_count": 108, - "metadata": {}, - "outputs": [], - "source": [ - "# instantiate the model (using the default parameters)\n", - "logreg = LogisticRegression(solver='lbfgs')\n", - "\n", - "# fit, reshape X to be of shape (n_samples, n_features)\n", - "logreg = logreg.fit(train_labeled.X.values.reshape(-1, 1), train_labeled.result_Y)\n", - "\n", - "# predict probabilities and attach to data\n", - "label_probs_logreg = logreg.predict_proba(test.X.values.reshape(-1, 1))\n", - "test = test.assign(B_prob_0_logreg=label_probs_logreg[:, 0])\n", - "\n", - "label_probs_logreg = logreg.predict_proba(test_labeled.X.values.reshape(-1, 1))\n", - "test_labeled = test_labeled.assign(B_prob_0_logreg=label_probs_logreg[:, 0])\n", - "test_labeled = test_labeled.assign(B_prob_1_logreg=label_probs_logreg[:, 1])\n", - "\n", - "########\n", - "\n", - "# instantiate the model (using the default parameters)\n", - "forest = RandomForestClassifier(n_estimators=400, max_depth=8, random_state=0)\n", - "\n", - "# fit, reshape X to be of shape (n_samples, n_features)\n", - "forest = forest.fit(train_labeled.X.values.reshape(-1, 1), train_labeled.result_Y)\n", - "\n", - "# predict probabilities and attach to data\n", - "label_probs_forest = forest.predict_proba(test.X.values.reshape(-1, 1))\n", - "test = test.assign(B_prob_0_forest=label_probs_forest[:, 0])\n", - "\n", - "label_probs_forest = forest.predict_proba(test_labeled.X.values.reshape(-1, 1))\n", - "test_labeled = test_labeled.assign(B_prob_0_forest=label_probs_forest[:, 0])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Visual comparison\n", - "\n", - "Let's plot the failure rates against the acceptance rates using the difference." - ] - }, - { - "cell_type": "code", - "execution_count": 109, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "<Figure size 1008x576 with 1 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[0. 0. 0. 0. 0.0165 0.0191]\n", - " [0.0137 0.0137 0.0011 0.0123 0.0531 0.0568]\n", - " [0.0457 0.0502 0.0076 0.0492 0.0905 0.0838]\n", - " [0.0731 0.0731 0.049 0.0902 0.1227 0.1326]\n", - " [0.1142 0.1187 0.0846 0.1311 0.1552 0.1607]\n", - " [0.1644 0.1689 0.1742 0.1598 0.1786 0.1778]\n", - " [0.2009 0.2146 0.277 0.2377 0.197 0.1974]\n", - " [0.2785 0.2877 0.3625 0.3115 0.2095 0.2069]]\n" - ] - } - ], - "source": [ - "failure_rates = np.zeros((8, 6))\n", - "\n", - "# sort whole test data by \n", - "#test_sorted = test.sort_values(by='B_prob_0_logreg', ascending=False)\n", - "\n", - "for r in np.arange(1, 9):\n", - " ## Contraction, logistic regression\n", - " failure_rates[r - 1, 0] = contraction(\n", - " test_labeled, 'judgeID_J', 'decision_T', 'result_Y',\n", - " 'B_prob_0_logreg', 'acceptanceRate_R', r / 10, False)\n", - " \n", - " ## Contraction, random forest\n", - " failure_rates[r - 1, 1] = contraction(\n", - " test_labeled, 'judgeID_J', 'decision_T', 'result_Y',\n", - " 'B_prob_0_forest', 'acceptanceRate_R', r / 10, False)\n", - "\n", - " ## Human error rate - Correct?\n", - " # Get judges with correct leniency as list\n", - " correct_leniency_list = test_labeled.judgeID_J[test_labeled['acceptanceRate_R'].round(1) ==\n", - " r / 10]\n", - "\n", - " # Released are the people they judged and released, T = 1\n", - " released = test_labeled[test_labeled.judgeID_J.isin(correct_leniency_list)]\n", - "\n", - " # Get their failure rate, aka ratio of reoffenders to number of people judged in total\n", - " failure_rates[r - 1, 2] = np.sum(\n", - " released.result_Y == 0) / correct_leniency_list.shape[0]\n", - " \n", - " ## True evaluation -- didn't mention using contraction here???\n", - " failure_rates[r - 1, 3] = contraction(test, 'judgeID_J', 'decision_T',\n", - " 'result_Y', 'B_prob_0_logreg',\n", - " 'acceptanceRate_R', r / 10, False)\n", - "\n", - " ## Causal model with logistic regression\n", - " failure_rates[r - 1, 4] = ep([r / 10], test_labeled, 'result_Y', 'X', logreg, 0)\n", - " \n", - " ## Causal model with random forest classifier\n", - " failure_rates[r - 1, 5] = ep([r / 10], test_labeled, 'result_Y', 'X', forest, 0)\n", - " \n", - "\n", - "# klassifikaatioille scipy.stats semin kautta error barit xerr ja yerr argumenttien kautta\n", - "\n", - "plt.figure(figsize=(14, 8))\n", - "plt.plot(np.arange(0.1, 0.9, .1), failure_rates[:, 0], label='Contraction, logistic')\n", - "#plt.plot(np.arange(0.1, 0.9, .1), failure_rates[:, 1], label='Contraction, forest')\n", - "plt.plot(np.arange(0.1, 0.9, .1), failure_rates[:, 2], label='\"Human judges\"')\n", - "plt.plot(np.arange(0.1, 0.9, .1), failure_rates[:, 3], label='True Evaluation')\n", - "\n", - "plt.plot(np.arange(0.1, 0.9, .1), failure_rates[:, 4], label='Causal model, log.')\n", - "plt.plot(np.arange(0.1, 0.9, .1), failure_rates[:, 5], label='Causal model, r.f.')\n", - "\n", - "plt.title('Failure rate vs. Acceptance rate')\n", - "plt.xlabel('Acceptance rate')\n", - "plt.ylabel('Failure rate')\n", - "plt.legend()\n", - "plt.show()\n", - "\n", - "with np.printoptions(precision=4, suppress=True):\n", - " print(failure_rates)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Thoughts:**\n", - "\n", - "Failure rates still too high for about 10 percentage points compared to Lakkaraju paper. Failure rates will change if seed is changed (e.g. with seed 0 contraction's failure rates are approximately 0.31, causal doesn't change that much). It seems like the contraction or our model is some how predicting the wrong thing. Behavior after 0.5 is not consistent? (Curves curve down in Lakkaraju's paper. + Human evaluation curve jumps to the wrong side.) Have to check some rounding rules." - ] - }, - { - "cell_type": "code", - "execution_count": 110, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "<Figure size 1008x576 with 1 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0.48356 0.48564 0.48748 0.48964 0.49132 0.49248 0.4932 0.49368 0.49428\n", - " 0.49432]\n", - "[0.3268 0.34608 0.36604 0.3874 0.4092 0.43092 0.45352 0.47776 0.49836\n", - " 0.50568]\n", - "1.0 12642\n", - "0.0 12358\n", - "Name: result_Y, dtype: int64\n", - "1.0 9631\n", - "0.0 2681\n", - "Name: result_Y, dtype: int64\n" - ] - } - ], - "source": [ - "x_vals = np.linspace(0, 1, 100)\n", - "y_vals = ep(x_vals, test_labeled, 'result_Y', 'X', logreg, 0)\n", - "y_vals2 = ep(x_vals, test_labeled, 'result_Y', 'X', logreg, 1)\n", - "\n", - "y_vals3 = ep(x_vals, test, 'result_Y', 'X', logreg, 0)\n", - "y_vals4 = ep(x_vals, test, 'result_Y', 'X', logreg, 1)\n", - "\n", - "plt.figure(figsize=(14, 8))\n", - "plt.plot(x_vals, y_vals)\n", - "plt.plot(x_vals, y_vals2)\n", - "plt.plot(x_vals, y_vals3)\n", - "plt.plot(x_vals, y_vals4)\n", - "plt.show()\n", - "\n", - "print(y_vals3[-10:])\n", - "print(y_vals4[-10:])\n", - "print(test.result_Y.value_counts())\n", - "print(test_labeled.result_Y.value_counts())" - ] - }, - { - "cell_type": "code", - "execution_count": 111, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "<Figure size 1008x504 with 1 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "# Mindless comparison as X is continuous (we should integrate).\n", - "\n", - "thresholds = np.linspace(.0, 1.0)\n", - "\n", - "x_values = np.linspace(-10, 10, 1000)\n", - "\n", - "rates_logistic = np.zeros(0)\n", - "rates_forest = np.zeros(0)\n", - "\n", - "for leniency in thresholds:\n", - " rates_logistic = np.append(rates_logistic, gp(leniency, x_values, logreg, lambda x: scs.norm.pdf(x), 0))\n", - " rates_forest = np.append(rates_forest, gp(leniency, x_values, forest, lambda x: scs.norm.pdf(x), 0))\n", - "\n", - "plt.plot(thresholds, rates_logistic, label=\"Logistic model\")\n", - "plt.plot(thresholds, rates_forest, label=\"Random forest\")\n", - "plt.title(\"Generalized performance\")\n", - "plt.legend()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### On COMPAS data\n", - "\n", - "\n", - "#### Predictive models\n", - "\n", - "Let's build the predictive models (first here random forest and logistic regression). Some of our variables are string so they will first have to be transformed to be dummy / indicator variables." - ] - }, - { - "cell_type": "code", - "execution_count": 112, - "metadata": {}, - "outputs": [], - "source": [ - "# convert string values to dummies, drop first so full rank\n", - "compas_dummy = pd.get_dummies(compas, columns=['c_charge_degree', 'race', 'age_cat', 'score_text', 'sex'], drop_first=True)\n", - "\n", - "########\n", - "\n", - "predict_columns = ['priors_count', 'days_b_screening_arrest', 'length_of_stay',\n", - " 'c_charge_degree_M', 'race_Asian', 'race_Caucasian', 'race_Hispanic',\n", - " 'race_Native American', 'race_Other', 'age_cat_Greater than 45',\n", - " 'age_cat_Less than 25', 'score_text_Low', 'score_text_Medium', 'sex_Male']\n", - "\n", - "response_column = 'two_year_recid'\n", - "\n", - "# instantiate the model (using the default parameters)\n", - "logreg_c = LogisticRegression(solver='lbfgs', max_iter=1000)\n", - "\n", - "# fit, reshape X to be of shape (n_samples, n_features)\n", - "logreg_c = logreg_c.fit(compas_dummy[predict_columns], compas_dummy[response_column])\n", - "\n", - "# predict probabilities and attach to data\n", - "#label_probs_logreg = logreg_c.predict_proba(test.X.values.reshape(-1, 1))\n", - "#test = test.assign(B_prob_0_machine=label_probs_logreg[:, 0])\n", - "\n", - "########\n", - "\n", - "# instantiate the model\n", - "forest_c = RandomForestClassifier(n_estimators=300, max_depth=5, random_state=0)\n", - "\n", - "# fit, reshape X to be of shape (n_samples, n_features)\n", - "forest_c = forest_c.fit(compas_dummy[predict_columns], compas_dummy[response_column])\n", - "\n", - "# predict probabilities and attach to data\n", - "#label_probs_forest = forest.predict_proba(test.X.values.reshape(-1, 1))\n", - "#test = test.assign(B_prob_0_forest=label_probs_forest[:, 0])" - ] - }, - { - "cell_type": "code", - "execution_count": 113, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "<Figure size 1008x576 with 1 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "failures_compas = np.zeros((11, 2))\n", - "\n", - "for r in np.arange(0, 11):\n", - " ## Causal model with logistic regression\n", - " failures_compas[r, 0] = ep([r / 10], compas_dummy, response_column, predict_columns, logreg_c, 1)\n", - " \n", - " ## Causal model with random forest classifier\n", - " failures_compas[r, 1] = ep([r / 10], compas_dummy, response_column, predict_columns, forest_c, 1)\n", - "\n", - "# klassifikaatioille scipy.stats semin kautta error barit xerr ja yerr argumenttien kautta\n", - "\n", - "plt.figure(figsize=(14, 8))\n", - "plt.plot(np.arange(0, 11) / 10, failures_compas[:, 0], label='Causal model, log.')\n", - "plt.plot(np.arange(0, 11) / 10, failures_compas[:, 1], label='Causal model, for.')\n", - "\n", - "plt.title('Failure rate vs. Acceptance rate - COMPAS')\n", - "plt.xlabel('Leniency')\n", - "plt.ylabel('Empirical performance')\n", - "plt.legend()\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Of course if leniency is one, then the empirical performance should always converge to the proportion of false positives in the data." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.7" - }, - "toc": { - "base_numbering": 1, - "nav_menu": { - "height": "168px", - "width": "176px" - }, - "number_sections": true, - "sideBar": true, - "skip_h1_title": true, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": true, - "toc_position": { - "height": "calc(100% - 180px)", - "left": "10px", - "top": "150px", - "width": "251.4px" - }, - "toc_section_display": true, - "toc_window_display": true - }, - "varInspector": { - "cols": { - "lenName": 16, - "lenType": 16, - "lenVar": 40 - }, - "kernels_config": { - "python": { - "delete_cmd_postfix": "", - "delete_cmd_prefix": "del ", - "library": "var_list.py", - "varRefreshCmd": "print(var_dic_list())" - }, - "r": { - "delete_cmd_postfix": ") ", - "delete_cmd_prefix": "rm(", - "library": "var_list.r", - "varRefreshCmd": "cat(var_dic_list()) " - } - }, - "position": { - "height": "465.65px", - "left": "843.6px", - "right": "16.2px", - "top": "159px", - "width": "676.2px" - }, - "types_to_exclude": [ - "module", - "function", - "builtin_function_or_method", - "instance", - "_Feature" - ], - "window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/analysis_and_scripts/Bachelors_thesis_analyses_OLD.ipynb b/analysis_and_scripts/Bachelors_thesis_analyses_OLD.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..7ba50d2cfff4e1bfbad0a9ba94eb3ef8e4501d2e --- /dev/null +++ b/analysis_and_scripts/Bachelors_thesis_analyses_OLD.ipynb @@ -0,0 +1,3237 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "toc": true + }, + "source": [ + "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n", + "<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Data-sets\" data-toc-modified-id=\"Data-sets-1\"><span class=\"toc-item-num\">1 </span>Data sets</a></span><ul class=\"toc-item\"><li><span><a href=\"#COMPAS-data\" data-toc-modified-id=\"COMPAS-data-1.1\"><span class=\"toc-item-num\">1.1 </span>COMPAS data</a></span></li><li><span><a href=\"#Synthetic-data\" data-toc-modified-id=\"Synthetic-data-1.2\"><span class=\"toc-item-num\">1.2 </span>Synthetic data</a></span></li></ul></li><li><span><a href=\"#Algorithms\" data-toc-modified-id=\"Algorithms-2\"><span class=\"toc-item-num\">2 </span>Algorithms</a></span><ul class=\"toc-item\"><li><span><a href=\"#Contraction-algorithm\" data-toc-modified-id=\"Contraction-algorithm-2.1\"><span class=\"toc-item-num\">2.1 </span>Contraction algorithm</a></span></li><li><span><a href=\"#Causal-model\" data-toc-modified-id=\"Causal-model-2.2\"><span class=\"toc-item-num\">2.2 </span>Causal model</a></span></li></ul></li><li><span><a href=\"#Performance-comparison\" data-toc-modified-id=\"Performance-comparison-3\"><span class=\"toc-item-num\">3 </span>Performance comparison</a></span><ul class=\"toc-item\"><li><span><a href=\"#On-synthetic-data\" data-toc-modified-id=\"On-synthetic-data-3.1\"><span class=\"toc-item-num\">3.1 </span>On synthetic data</a></span><ul class=\"toc-item\"><li><span><a href=\"#Predictive-models\" data-toc-modified-id=\"Predictive-models-3.1.1\"><span class=\"toc-item-num\">3.1.1 </span>Predictive models</a></span></li><li><span><a href=\"#Visual-comparison\" data-toc-modified-id=\"Visual-comparison-3.1.2\"><span class=\"toc-item-num\">3.1.2 </span>Visual comparison</a></span></li></ul></li><li><span><a href=\"#On-COMPAS-data\" data-toc-modified-id=\"On-COMPAS-data-3.2\"><span class=\"toc-item-num\">3.2 </span>On COMPAS data</a></span><ul class=\"toc-item\"><li><span><a href=\"#Predictive-models\" data-toc-modified-id=\"Predictive-models-3.2.1\"><span class=\"toc-item-num\">3.2.1 </span>Predictive models</a></span></li></ul></li></ul></li></ul></div>" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Bachelors thesis' analyses\n", + "\n", + "*This Jupyter notebook is for the analyses and model building for Riku Laine's bachelors thesis*\n", + "\n", + "Table of contents is provided above. First I will briefly present the COMPAS data set and then create the synthetic data set as done by Lakkaraju *et al.* ([link](https://helka.finna.fi/PrimoRecord/pci.acm3098066)). Then I will proceed to implement algorithms. Finally I will do the side-by-side comparisons of the results on the synthetic data. Finally I run the causal model on the COMPAS data.\n", + "\n", + "## Data sets\n", + "\n", + "*Below I load the COMPAS data set and generate the synthetic one.*\n", + "\n", + "### COMPAS data\n", + "\n", + "The following data filtering procedure follows the one described in the [ProPublica methodology](https://www.propublica.org/article/how-we-analyzed-the-compas-recidivism-algorithm)." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "# Imports\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "from datetime import datetime\n", + "import matplotlib.pyplot as plt\n", + "import scipy.stats as scs\n", + "import scipy.integrate as si\n", + "import seaborn as sns\n", + "import numpy.random as npr\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "# Settings\n", + "\n", + "%matplotlib inline\n", + "\n", + "plt.rcParams.update({'font.size': 16})\n", + "plt.rcParams.update({'figure.figsize': (14, 7)})\n", + "\n", + "# Suppress deprecation warnings.\n", + "\n", + "import warnings\n", + "\n", + "def fxn():\n", + " warnings.warn(\"deprecated\", DeprecationWarning)\n", + "\n", + "with warnings.catch_warnings():\n", + " warnings.simplefilter(\"ignore\")\n", + " fxn()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(7214, 53)\n", + "['id' 'name' 'first' 'last' 'compas_screening_date' 'sex' 'dob' 'age'\n", + " 'age_cat' 'race' 'juv_fel_count' 'decile_score' 'juv_misd_count'\n", + " 'juv_other_count' 'priors_count' 'days_b_screening_arrest' 'c_jail_in'\n", + " 'c_jail_out' 'c_case_number' 'c_offense_date' 'c_arrest_date'\n", + " 'c_days_from_compas' 'c_charge_degree' 'c_charge_desc' 'is_recid'\n", + " 'r_case_number' 'r_charge_degree' 'r_days_from_arrest' 'r_offense_date'\n", + " 'r_charge_desc' 'r_jail_in' 'r_jail_out' 'violent_recid'\n", + " 'is_violent_recid' 'vr_case_number' 'vr_charge_degree' 'vr_offense_date'\n", + " 'vr_charge_desc' 'type_of_assessment' 'decile_score.1' 'score_text'\n", + " 'screening_date' 'v_type_of_assessment' 'v_decile_score' 'v_score_text'\n", + " 'v_screening_date' 'in_custody' 'out_custody' 'priors_count.1' 'start'\n", + " 'end' 'event' 'two_year_recid']\n" + ] + } + ], + "source": [ + "# Read file\n", + "compas_raw = pd.read_csv(\"../data/compas-scores-two-years.csv\")\n", + "\n", + "# Check dimensions, number of rows should be 7214\n", + "print(compas_raw.shape)\n", + "print(compas_raw.columns.values)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(6172, 13)" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Select columns\n", + "compas = compas_raw[[\n", + " 'age', 'c_charge_degree', 'race', 'age_cat', 'score_text', 'sex',\n", + " 'priors_count', 'days_b_screening_arrest', 'decile_score', 'is_recid',\n", + " 'two_year_recid', 'c_jail_in', 'c_jail_out'\n", + "]]\n", + "\n", + "# Subset values, see reasons in ProPublica methodology.\n", + "compas = compas.query('days_b_screening_arrest <= 30 and \\\n", + " days_b_screening_arrest >= -30 and \\\n", + " is_recid != -1 and \\\n", + " c_charge_degree != \"O\"')\n", + "\n", + "# Drop row if score_text is na\n", + "compas = compas[compas.score_text.notnull()]\n", + "\n", + "compas.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " <th>2</th>\n", + " <th>3</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>id</th>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>name</th>\n", + " <td>miguel hernandez</td>\n", + " <td>kevon dixon</td>\n", + " <td>ed philo</td>\n", + " <td>marcu brown</td>\n", + " </tr>\n", + " <tr>\n", + " <th>first</th>\n", + " <td>miguel</td>\n", + " <td>kevon</td>\n", + " <td>ed</td>\n", + " <td>marcu</td>\n", + " </tr>\n", + " <tr>\n", + " <th>last</th>\n", + " <td>hernandez</td>\n", + " <td>dixon</td>\n", + " <td>philo</td>\n", + " <td>brown</td>\n", + " </tr>\n", + " <tr>\n", + " <th>compas_screening_date</th>\n", + " <td>2013-08-14</td>\n", + " <td>2013-01-27</td>\n", + " <td>2013-04-14</td>\n", + " <td>2013-01-13</td>\n", + " </tr>\n", + " <tr>\n", + " <th>sex</th>\n", + " <td>Male</td>\n", + " <td>Male</td>\n", + " <td>Male</td>\n", + " <td>Male</td>\n", + " </tr>\n", + " <tr>\n", + " <th>dob</th>\n", + " <td>1947-04-18</td>\n", + " <td>1982-01-22</td>\n", + " <td>1991-05-14</td>\n", + " <td>1993-01-21</td>\n", + " </tr>\n", + " <tr>\n", + " <th>age</th>\n", + " <td>69</td>\n", + " <td>34</td>\n", + " <td>24</td>\n", + " <td>23</td>\n", + " </tr>\n", + " <tr>\n", + " <th>age_cat</th>\n", + " <td>Greater than 45</td>\n", + " <td>25 - 45</td>\n", + " <td>Less than 25</td>\n", + " <td>Less than 25</td>\n", + " </tr>\n", + " <tr>\n", + " <th>race</th>\n", + " <td>Other</td>\n", + " <td>African-American</td>\n", + " <td>African-American</td>\n", + " <td>African-American</td>\n", + " </tr>\n", + " <tr>\n", + " <th>juv_fel_count</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>decile_score</th>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <th>juv_misd_count</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>juv_other_count</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>priors_count</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>days_b_screening_arrest</th>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>c_jail_in</th>\n", + " <td>2013-08-13 06:03:42</td>\n", + " <td>2013-01-26 03:45:27</td>\n", + " <td>2013-04-13 04:58:34</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>c_jail_out</th>\n", + " <td>2013-08-14 05:41:20</td>\n", + " <td>2013-02-05 05:36:53</td>\n", + " <td>2013-04-14 07:02:04</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>c_case_number</th>\n", + " <td>13011352CF10A</td>\n", + " <td>13001275CF10A</td>\n", + " <td>13005330CF10A</td>\n", + " <td>13000570CF10A</td>\n", + " </tr>\n", + " <tr>\n", + " <th>c_offense_date</th>\n", + " <td>2013-08-13</td>\n", + " <td>2013-01-26</td>\n", + " <td>2013-04-13</td>\n", + " <td>2013-01-12</td>\n", + " </tr>\n", + " <tr>\n", + " <th>c_arrest_date</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>c_days_from_compas</th>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>c_charge_degree</th>\n", + " <td>F</td>\n", + " <td>F</td>\n", + " <td>F</td>\n", + " <td>F</td>\n", + " </tr>\n", + " <tr>\n", + " <th>c_charge_desc</th>\n", + " <td>Aggravated Assault w/Firearm</td>\n", + " <td>Felony Battery w/Prior Convict</td>\n", + " <td>Possession of Cocaine</td>\n", + " <td>Possession of Cannabis</td>\n", + " </tr>\n", + " <tr>\n", + " <th>is_recid</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>r_case_number</th>\n", + " <td>NaN</td>\n", + " <td>13009779CF10A</td>\n", + " <td>13011511MM10A</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>r_charge_degree</th>\n", + " <td>NaN</td>\n", + " <td>(F3)</td>\n", + " <td>(M1)</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>r_days_from_arrest</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>0</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>r_offense_date</th>\n", + " <td>NaN</td>\n", + " <td>2013-07-05</td>\n", + " <td>2013-06-16</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>r_charge_desc</th>\n", + " <td>NaN</td>\n", + " <td>Felony Battery (Dom Strang)</td>\n", + " <td>Driving Under The Influence</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>r_jail_in</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2013-06-16</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>r_jail_out</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2013-06-16</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>violent_recid</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>is_violent_recid</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>vr_case_number</th>\n", + " <td>NaN</td>\n", + " <td>13009779CF10A</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>vr_charge_degree</th>\n", + " <td>NaN</td>\n", + " <td>(F3)</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>vr_offense_date</th>\n", + " <td>NaN</td>\n", + " <td>2013-07-05</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>vr_charge_desc</th>\n", + " <td>NaN</td>\n", + " <td>Felony Battery (Dom Strang)</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>type_of_assessment</th>\n", + " <td>Risk of Recidivism</td>\n", + " <td>Risk of Recidivism</td>\n", + " <td>Risk of Recidivism</td>\n", + " <td>Risk of Recidivism</td>\n", + " </tr>\n", + " <tr>\n", + " <th>decile_score.1</th>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <th>score_text</th>\n", + " <td>Low</td>\n", + " <td>Low</td>\n", + " <td>Low</td>\n", + " <td>High</td>\n", + " </tr>\n", + " <tr>\n", + " <th>screening_date</th>\n", + " <td>2013-08-14</td>\n", + " <td>2013-01-27</td>\n", + " <td>2013-04-14</td>\n", + " <td>2013-01-13</td>\n", + " </tr>\n", + " <tr>\n", + " <th>v_type_of_assessment</th>\n", + " <td>Risk of Violence</td>\n", + " <td>Risk of Violence</td>\n", + " <td>Risk of Violence</td>\n", + " <td>Risk of Violence</td>\n", + " </tr>\n", + " <tr>\n", + " <th>v_decile_score</th>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>6</td>\n", + " </tr>\n", + " <tr>\n", + " <th>v_score_text</th>\n", + " <td>Low</td>\n", + " <td>Low</td>\n", + " <td>Low</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>v_screening_date</th>\n", + " <td>2013-08-14</td>\n", + " <td>2013-01-27</td>\n", + " <td>2013-04-14</td>\n", + " <td>2013-01-13</td>\n", + " </tr>\n", + " <tr>\n", + " <th>in_custody</th>\n", + " <td>2014-07-07</td>\n", + " <td>2013-01-26</td>\n", + " <td>2013-06-16</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>out_custody</th>\n", + " <td>2014-07-14</td>\n", + " <td>2013-02-05</td>\n", + " <td>2013-06-16</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>priors_count.1</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>start</th>\n", + " <td>0</td>\n", + " <td>9</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>end</th>\n", + " <td>327</td>\n", + " <td>159</td>\n", + " <td>63</td>\n", + " <td>1174</td>\n", + " </tr>\n", + " <tr>\n", + " <th>event</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>two_year_recid</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " 0 \\\n", + "id 1 \n", + "name miguel hernandez \n", + "first miguel \n", + "last hernandez \n", + "compas_screening_date 2013-08-14 \n", + "sex Male \n", + "dob 1947-04-18 \n", + "age 69 \n", + "age_cat Greater than 45 \n", + "race Other \n", + "juv_fel_count 0 \n", + "decile_score 1 \n", + "juv_misd_count 0 \n", + "juv_other_count 0 \n", + "priors_count 0 \n", + "days_b_screening_arrest -1 \n", + "c_jail_in 2013-08-13 06:03:42 \n", + "c_jail_out 2013-08-14 05:41:20 \n", + "c_case_number 13011352CF10A \n", + "c_offense_date 2013-08-13 \n", + "c_arrest_date NaN \n", + "c_days_from_compas 1 \n", + "c_charge_degree F \n", + "c_charge_desc Aggravated Assault w/Firearm \n", + "is_recid 0 \n", + "r_case_number NaN \n", + "r_charge_degree NaN \n", + "r_days_from_arrest NaN \n", + "r_offense_date NaN \n", + "r_charge_desc NaN \n", + "r_jail_in NaN \n", + "r_jail_out NaN \n", + "violent_recid NaN \n", + "is_violent_recid 0 \n", + "vr_case_number NaN \n", + "vr_charge_degree NaN \n", + "vr_offense_date NaN \n", + "vr_charge_desc NaN \n", + "type_of_assessment Risk of Recidivism \n", + "decile_score.1 1 \n", + "score_text Low \n", + "screening_date 2013-08-14 \n", + "v_type_of_assessment Risk of Violence \n", + "v_decile_score 1 \n", + "v_score_text Low \n", + "v_screening_date 2013-08-14 \n", + "in_custody 2014-07-07 \n", + "out_custody 2014-07-14 \n", + "priors_count.1 0 \n", + "start 0 \n", + "end 327 \n", + "event 0 \n", + "two_year_recid 0 \n", + "\n", + " 1 \\\n", + "id 3 \n", + "name kevon dixon \n", + "first kevon \n", + "last dixon \n", + "compas_screening_date 2013-01-27 \n", + "sex Male \n", + "dob 1982-01-22 \n", + "age 34 \n", + "age_cat 25 - 45 \n", + "race African-American \n", + "juv_fel_count 0 \n", + "decile_score 3 \n", + "juv_misd_count 0 \n", + "juv_other_count 0 \n", + "priors_count 0 \n", + "days_b_screening_arrest -1 \n", + "c_jail_in 2013-01-26 03:45:27 \n", + "c_jail_out 2013-02-05 05:36:53 \n", + "c_case_number 13001275CF10A \n", + "c_offense_date 2013-01-26 \n", + "c_arrest_date NaN \n", + "c_days_from_compas 1 \n", + "c_charge_degree F \n", + "c_charge_desc Felony Battery w/Prior Convict \n", + "is_recid 1 \n", + "r_case_number 13009779CF10A \n", + "r_charge_degree (F3) \n", + "r_days_from_arrest NaN \n", + "r_offense_date 2013-07-05 \n", + "r_charge_desc Felony Battery (Dom Strang) \n", + "r_jail_in NaN \n", + "r_jail_out NaN \n", + "violent_recid NaN \n", + "is_violent_recid 1 \n", + "vr_case_number 13009779CF10A \n", + "vr_charge_degree (F3) \n", + "vr_offense_date 2013-07-05 \n", + "vr_charge_desc Felony Battery (Dom Strang) \n", + "type_of_assessment Risk of Recidivism \n", + "decile_score.1 3 \n", + "score_text Low \n", + "screening_date 2013-01-27 \n", + "v_type_of_assessment Risk of Violence \n", + "v_decile_score 1 \n", + "v_score_text Low \n", + "v_screening_date 2013-01-27 \n", + "in_custody 2013-01-26 \n", + "out_custody 2013-02-05 \n", + "priors_count.1 0 \n", + "start 9 \n", + "end 159 \n", + "event 1 \n", + "two_year_recid 1 \n", + "\n", + " 2 3 \n", + "id 4 5 \n", + "name ed philo marcu brown \n", + "first ed marcu \n", + "last philo brown \n", + "compas_screening_date 2013-04-14 2013-01-13 \n", + "sex Male Male \n", + "dob 1991-05-14 1993-01-21 \n", + "age 24 23 \n", + "age_cat Less than 25 Less than 25 \n", + "race African-American African-American \n", + "juv_fel_count 0 0 \n", + "decile_score 4 8 \n", + "juv_misd_count 0 1 \n", + "juv_other_count 1 0 \n", + "priors_count 4 1 \n", + "days_b_screening_arrest -1 NaN \n", + "c_jail_in 2013-04-13 04:58:34 NaN \n", + "c_jail_out 2013-04-14 07:02:04 NaN \n", + "c_case_number 13005330CF10A 13000570CF10A \n", + "c_offense_date 2013-04-13 2013-01-12 \n", + "c_arrest_date NaN NaN \n", + "c_days_from_compas 1 1 \n", + "c_charge_degree F F \n", + "c_charge_desc Possession of Cocaine Possession of Cannabis \n", + "is_recid 1 0 \n", + "r_case_number 13011511MM10A NaN \n", + "r_charge_degree (M1) NaN \n", + "r_days_from_arrest 0 NaN \n", + "r_offense_date 2013-06-16 NaN \n", + "r_charge_desc Driving Under The Influence NaN \n", + "r_jail_in 2013-06-16 NaN \n", + "r_jail_out 2013-06-16 NaN \n", + "violent_recid NaN NaN \n", + "is_violent_recid 0 0 \n", + "vr_case_number NaN NaN \n", + "vr_charge_degree NaN NaN \n", + "vr_offense_date NaN NaN \n", + "vr_charge_desc NaN NaN \n", + "type_of_assessment Risk of Recidivism Risk of Recidivism \n", + "decile_score.1 4 8 \n", + "score_text Low High \n", + "screening_date 2013-04-14 2013-01-13 \n", + "v_type_of_assessment Risk of Violence Risk of Violence \n", + "v_decile_score 3 6 \n", + "v_score_text Low Medium \n", + "v_screening_date 2013-04-14 2013-01-13 \n", + "in_custody 2013-06-16 NaN \n", + "out_custody 2013-06-16 NaN \n", + "priors_count.1 4 1 \n", + "start 0 0 \n", + "end 63 1174 \n", + "event 0 0 \n", + "two_year_recid 1 0 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Calculate length of stay\n", + "out = pd.to_datetime(compas.c_jail_out, format=\"%Y-%m-%d %H:%M:%S\")\n", + "in_ = pd.to_datetime(compas.c_jail_in, format=\"%Y-%m-%d %H:%M:%S\")\n", + "\n", + "compas['length_of_stay'] = (out - in_).astype('timedelta64[D]')\n", + "\n", + "# Structure of the data\n", + "display(compas_raw.head(4).T)\n", + "#print(np.sum(compas_raw.c_arrest_date.isnull()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Columns:**\n", + "\n", + "* id = identification number\n", + "* name \n", + "* first (name)\n", + "* last (name)\n", + "* compas_screening_date = date of COMPAS filling\n", + "* sex\n", + "* dob = date of birth\n", + "* age\n", + "* age_cat\n", + "* race\n", + "* juv_fel_count = No. of juvenile felonies\n", + "* decile_score = decile score of COMPAS\n", + "* juv_misd_count = No. of juvenile misdemeanors\n", + "* juv_other_count = No. of other crimes juvenile \n", + "* priors_count = No. of priors \n", + "* days_b_screening_arrest = date of a defendants Compas scored crime - date for person's arrest (c_offense_date - screening_date) \n", + "* c_jail_in = jailing date of COMPAS scored crime\n", + "* c_jail_out = jailing date of COMPAS scored crime\n", + "* c_case_number = case number of COMPAS scored crime\n", + "* c_offense_date = offense date of COMPAS scored crime\n", + "* c_arrest_date = arrest date of COMPAS scored crime\n", + "* c_days_from_compas = \n", + "* c_charge_degree\n", + "* c_charge_desc\n", + "* is_recid\n", + "* r_case_number\n", + "* r_charge_degree\n", + "* r_days_from_arrest\n", + "* r_offense_date\n", + "* r_charge_desc\n", + "* r_jail_in\n", + "* r_jail_out\n", + "* violent_recid\n", + "* is_violent_recid\n", + "* vr_case_number\n", + "* vr_charge_degree\n", + "* vr_offense_date\n", + "* vr_charge_desc\n", + "* type_of_assessment\n", + "* decile_score.1\n", + "* score_text\n", + "* screening_date\n", + "* v_type_of_assessment\n", + "* v_decile_score\n", + "* v_score_text\n", + "* v_screening_date\n", + "* in_custody\n", + "* out_custody\n", + "* priors_count.1\n", + "* start\n", + "* end\n", + "* event\n", + "* two_year_recid\n", + "\n", + "Let's obtain the basic statistics for each of the variables." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>count</th>\n", + " <th>unique</th>\n", + " <th>top</th>\n", + " <th>freq</th>\n", + " <th>mean</th>\n", + " <th>std</th>\n", + " <th>min</th>\n", + " <th>25%</th>\n", + " <th>50%</th>\n", + " <th>75%</th>\n", + " <th>max</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>age</th>\n", + " <td>6172</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>34.5345</td>\n", + " <td>11.7309</td>\n", + " <td>18</td>\n", + " <td>25</td>\n", + " <td>31</td>\n", + " <td>42</td>\n", + " <td>96</td>\n", + " </tr>\n", + " <tr>\n", + " <th>c_charge_degree</th>\n", + " <td>6172</td>\n", + " <td>2</td>\n", + " <td>F</td>\n", + " <td>3970</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>race</th>\n", + " <td>6172</td>\n", + " <td>6</td>\n", + " <td>African-American</td>\n", + " <td>3175</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>age_cat</th>\n", + " <td>6172</td>\n", + " <td>3</td>\n", + " <td>25 - 45</td>\n", + " <td>3532</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>score_text</th>\n", + " <td>6172</td>\n", + " <td>3</td>\n", + " <td>Low</td>\n", + " <td>3421</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>sex</th>\n", + " <td>6172</td>\n", + " <td>2</td>\n", + " <td>Male</td>\n", + " <td>4997</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>priors_count</th>\n", + " <td>6172</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>3.24644</td>\n", + " <td>4.74377</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>4</td>\n", + " <td>38</td>\n", + " </tr>\n", + " <tr>\n", + " <th>days_b_screening_arrest</th>\n", + " <td>6172</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>-1.74028</td>\n", + " <td>5.08471</td>\n", + " <td>-30</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>30</td>\n", + " </tr>\n", + " <tr>\n", + " <th>decile_score</th>\n", + " <td>6172</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>4.4185</td>\n", + " <td>2.83946</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>4</td>\n", + " <td>7</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <th>is_recid</th>\n", + " <td>6172</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>0.484446</td>\n", + " <td>0.499799</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>two_year_recid</th>\n", + " <td>6172</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>0.45512</td>\n", + " <td>0.498022</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>c_jail_in</th>\n", + " <td>6172</td>\n", + " <td>6172</td>\n", + " <td>2014-01-05 10:19:57</td>\n", + " <td>1</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>c_jail_out</th>\n", + " <td>6172</td>\n", + " <td>6161</td>\n", + " <td>2013-09-14 05:58:00</td>\n", + " <td>3</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>length_of_stay</th>\n", + " <td>6172</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>14.6228</td>\n", + " <td>46.6935</td>\n", + " <td>-1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>5</td>\n", + " <td>799</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " count unique top freq mean \\\n", + "age 6172 NaN NaN NaN 34.5345 \n", + "c_charge_degree 6172 2 F 3970 NaN \n", + "race 6172 6 African-American 3175 NaN \n", + "age_cat 6172 3 25 - 45 3532 NaN \n", + "score_text 6172 3 Low 3421 NaN \n", + "sex 6172 2 Male 4997 NaN \n", + "priors_count 6172 NaN NaN NaN 3.24644 \n", + "days_b_screening_arrest 6172 NaN NaN NaN -1.74028 \n", + "decile_score 6172 NaN NaN NaN 4.4185 \n", + "is_recid 6172 NaN NaN NaN 0.484446 \n", + "two_year_recid 6172 NaN NaN NaN 0.45512 \n", + "c_jail_in 6172 6172 2014-01-05 10:19:57 1 NaN \n", + "c_jail_out 6172 6161 2013-09-14 05:58:00 3 NaN \n", + "length_of_stay 6172 NaN NaN NaN 14.6228 \n", + "\n", + " std min 25% 50% 75% max \n", + "age 11.7309 18 25 31 42 96 \n", + "c_charge_degree NaN NaN NaN NaN NaN NaN \n", + "race NaN NaN NaN NaN NaN NaN \n", + "age_cat NaN NaN NaN NaN NaN NaN \n", + "score_text NaN NaN NaN NaN NaN NaN \n", + "sex NaN NaN NaN NaN NaN NaN \n", + "priors_count 4.74377 0 0 1 4 38 \n", + "days_b_screening_arrest 5.08471 -30 -1 -1 -1 30 \n", + "decile_score 2.83946 1 2 4 7 10 \n", + "is_recid 0.499799 0 0 0 1 1 \n", + "two_year_recid 0.498022 0 0 0 1 1 \n", + "c_jail_in NaN NaN NaN NaN NaN NaN \n", + "c_jail_out NaN NaN NaN NaN NaN NaN \n", + "length_of_stay 46.6935 -1 0 1 5 799 " + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "compas.describe(include='all').T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Notes:**\n", + "\n", + "* Mean age is roughly 34.5 years ranging from 18 to 96\n", + "* Defendants have an average of 3.2 priors (sd 4.7) and more than half have 1 or more prior.\n", + "* 48.4% have recidivated in general and 45.5% recidivated within a two-year period following their arrest." + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 900x900 with 30 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>age</th>\n", + " <th>priors_count</th>\n", + " <th>days_b_screening_arrest</th>\n", + " <th>decile_score</th>\n", + " <th>length_of_stay</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>age</th>\n", + " <td>1.00</td>\n", + " <td>0.12</td>\n", + " <td>-0.07</td>\n", + " <td>-0.40</td>\n", + " <td>0.01</td>\n", + " </tr>\n", + " <tr>\n", + " <th>priors_count</th>\n", + " <td>0.12</td>\n", + " <td>1.00</td>\n", + " <td>0.02</td>\n", + " <td>0.45</td>\n", + " <td>0.19</td>\n", + " </tr>\n", + " <tr>\n", + " <th>days_b_screening_arrest</th>\n", + " <td>-0.07</td>\n", + " <td>0.02</td>\n", + " <td>1.00</td>\n", + " <td>0.09</td>\n", + " <td>0.06</td>\n", + " </tr>\n", + " <tr>\n", + " <th>decile_score</th>\n", + " <td>-0.40</td>\n", + " <td>0.45</td>\n", + " <td>0.09</td>\n", + " <td>1.00</td>\n", + " <td>0.21</td>\n", + " </tr>\n", + " <tr>\n", + " <th>length_of_stay</th>\n", + " <td>0.01</td>\n", + " <td>0.19</td>\n", + " <td>0.06</td>\n", + " <td>0.21</td>\n", + " <td>1.00</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " age priors_count days_b_screening_arrest \\\n", + "age 1.00 0.12 -0.07 \n", + "priors_count 0.12 1.00 0.02 \n", + "days_b_screening_arrest -0.07 0.02 1.00 \n", + "decile_score -0.40 0.45 0.09 \n", + "length_of_stay 0.01 0.19 0.06 \n", + "\n", + " decile_score length_of_stay \n", + "age -0.40 0.01 \n", + "priors_count 0.45 0.19 \n", + "days_b_screening_arrest 0.09 0.06 \n", + "decile_score 1.00 0.21 \n", + "length_of_stay 0.21 1.00 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Distributions of the continuous variables \n", + "sns.pairplot(compas[[\n", + " 'age', 'priors_count', 'days_b_screening_arrest', 'decile_score',\n", + " 'length_of_stay'\n", + "]])\n", + "plt.show()\n", + "\n", + "# Correlations of the continuous variables\n", + "display(compas[[\n", + " 'age', 'priors_count', 'days_b_screening_arrest', 'decile_score',\n", + " 'length_of_stay'\n", + "]].corr().round(2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Notes:**\n", + "\n", + "* Some notable correlations: `age` and `decile_score` ($\\rho\\approx-0.40$, Spearman -0.44) and `decile_score` and `priors_count` ($\\rho\\approx0.45$, Spearman 0.44)\n", + "* Spearman correlation was for `length_of_stay` and `priors_count` 0.27 and for `length_of_stay` and `decile_score` 0.27" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 1286\n", + "2 822\n", + "4 666\n", + "3 647\n", + "5 582\n", + "6 529\n", + "7 496\n", + "9 420\n", + "8 420\n", + "10 304\n", + "Name: decile_score, dtype: int64\n" + ] + } + ], + "source": [ + "# Decile scores should be evenly distributed but are not.\n", + "print(compas.decile_score.value_counts())" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "25 - 45 3532\n", + "Less than 25 1347\n", + "Greater than 45 1293\n", + "Name: age_cat, dtype: int64\n" + ] + } + ], + "source": [ + "print(compas.age_cat.value_counts())" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "African-American 3175\n", + "Caucasian 2103\n", + "Hispanic 509\n", + "Other 343\n", + "Asian 31\n", + "Native American 11\n", + "Name: race, dtype: int64\n" + ] + } + ], + "source": [ + "print(compas.race.value_counts())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A very small number of Asian and Native American defendants." + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Black defendants: 51.44%\n", + "White defendants: 34.07%\n", + "Hispanic defendants: 8.25%\n", + "Asian defendants: 0.50%\n", + "Native American defendants: 0.18%\n", + "---\n", + "Defendants of other race: 5.56%\n" + ] + } + ], + "source": [ + "print(\"Black defendants: %.2f%%\" % (3175 / 6172 * 100))\n", + "print(\"White defendants: %.2f%%\" % (2103 / 6172 * 100))\n", + "print(\"Hispanic defendants: %.2f%%\" % (509 / 6172 * 100))\n", + "print(\"Asian defendants: %.2f%%\" % (31 / 6172 * 100))\n", + "print(\"Native American defendants: %.2f%%\" % (11 / 6172 * 100))\n", + "print(\"---\")\n", + "print(\"Defendants of other race: %.2f%%\" % (343 / 6172 * 100))" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Low 3421\n", + "Medium 1607\n", + "High 1144\n", + "Name: score_text, dtype: int64\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 1008x504 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "print(compas.score_text.value_counts())\n", + "\n", + "# Recidivists spent longer in incarceration in some point\n", + "compas.boxplot(column=['length_of_stay'], by=['is_recid'])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "F 3970\n", + "M 2202\n", + "Name: c_charge_degree, dtype: int64" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "compas.c_charge_degree.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(-0.001, 0.5] 2085\n", + "(0.5, 5.5] 2866\n", + "(5.5, 10.5] 729\n", + "(10.5, 20.5] 402\n", + "(20.5, 40.5] 90\n", + "Name: priors_count, dtype: int64" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "compas.priors_count.value_counts(\n", + " sort=False, bins=[0, 0.5, 5.5, 10.5, 20.5, 40.5])" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th>race</th>\n", + " <th>African-American</th>\n", + " <th>Asian</th>\n", + " <th>Caucasian</th>\n", + " <th>Hispanic</th>\n", + " <th>Native American</th>\n", + " <th>Other</th>\n", + " </tr>\n", + " <tr>\n", + " <th>sex</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Female</th>\n", + " <td>549</td>\n", + " <td>2</td>\n", + " <td>482</td>\n", + " <td>82</td>\n", + " <td>2</td>\n", + " <td>58</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Male</th>\n", + " <td>2626</td>\n", + " <td>29</td>\n", + " <td>1621</td>\n", + " <td>427</td>\n", + " <td>9</td>\n", + " <td>285</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + "race African-American Asian Caucasian Hispanic Native American Other\n", + "sex \n", + "Female 549 2 482 82 2 58\n", + "Male 2626 29 1621 427 9 285" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tab = compas.groupby(['sex', 'race']).size()\n", + "tab.unstack()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 1008x504 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 1008x504 with 2 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.bar(range(1, 11), compas.decile_score.value_counts(), ec='black')\n", + "plt.title(\"Decile scores of all defendants\")\n", + "plt.ylabel(\"Frequency\")\n", + "plt.xlabel(\"Decile score\")\n", + "plt.xticks(range(1, 11))\n", + "plt.show()\n", + "\n", + "fig, ax = compas.query(\"race in ['Caucasian', 'African-American']\").hist(\n", + " \"decile_score\",\n", + " by=\"race\",\n", + " sharey=True,\n", + " xrot='horizontal',\n", + " ec='black',\n", + " bins=np.arange(0.5, 11.5, 1.0),\n", + " rwidth=0.8)\n", + "\n", + "fig.text(-1.5, 350, \"Frequency\", rotation='vertical')\n", + "fig.text(11.5, -60, \"Decile score\", horizontalalignment='center')\n", + "plt.tight_layout(w_pad=0)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 1008x504 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "compas['age'].plot.kde()\n", + "plt.title(\"Histogram of defendants' ages\")\n", + "plt.xlabel(\"Age of defendant\")\n", + "plt.xlim(10,90)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th>is_recid</th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " </tr>\n", + " <tr>\n", + " <th>age_cat</th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>25 - 45</th>\n", + " <td>1784</td>\n", + " <td>1748</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Greater than 45</th>\n", + " <td>847</td>\n", + " <td>446</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Less than 25</th>\n", + " <td>551</td>\n", + " <td>796</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + "is_recid 0 1\n", + "age_cat \n", + "25 - 45 1784 1748\n", + "Greater than 45 847 446\n", + "Less than 25 551 796" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th>is_recid</th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " </tr>\n", + " <tr>\n", + " <th>sex</th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Female</th>\n", + " <td>740</td>\n", + " <td>435</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Male</th>\n", + " <td>2442</td>\n", + " <td>2555</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + "is_recid 0 1\n", + "sex \n", + "Female 740 435\n", + "Male 2442 2555" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>is_recid</th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " </tr>\n", + " <tr>\n", + " <th>race</th>\n", + " <th>age_cat</th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th rowspan=\"3\" valign=\"top\">African-American</th>\n", + " <th>25 - 45</th>\n", + " <td>847.0</td>\n", + " <td>1051.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Greater than 45</th>\n", + " <td>261.0</td>\n", + " <td>207.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Less than 25</th>\n", + " <td>294.0</td>\n", + " <td>515.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th rowspan=\"3\" valign=\"top\">Asian</th>\n", + " <th>25 - 45</th>\n", + " <td>10.0</td>\n", + " <td>4.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Greater than 45</th>\n", + " <td>7.0</td>\n", + " <td>4.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Less than 25</th>\n", + " <td>4.0</td>\n", + " <td>2.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th rowspan=\"3\" valign=\"top\">Caucasian</th>\n", + " <th>25 - 45</th>\n", + " <td>620.0</td>\n", + " <td>508.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Greater than 45</th>\n", + " <td>442.0</td>\n", + " <td>186.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Less than 25</th>\n", + " <td>167.0</td>\n", + " <td>180.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th rowspan=\"3\" valign=\"top\">Hispanic</th>\n", + " <th>25 - 45</th>\n", + " <td>180.0</td>\n", + " <td>111.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Greater than 45</th>\n", + " <td>81.0</td>\n", + " <td>28.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Less than 25</th>\n", + " <td>51.0</td>\n", + " <td>58.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th rowspan=\"3\" valign=\"top\">Native American</th>\n", + " <th>25 - 45</th>\n", + " <td>5.0</td>\n", + " <td>2.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Greater than 45</th>\n", + " <td>NaN</td>\n", + " <td>2.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Less than 25</th>\n", + " <td>NaN</td>\n", + " <td>2.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th rowspan=\"3\" valign=\"top\">Other</th>\n", + " <th>25 - 45</th>\n", + " <td>122.0</td>\n", + " <td>72.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Greater than 45</th>\n", + " <td>56.0</td>\n", + " <td>19.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Less than 25</th>\n", + " <td>35.0</td>\n", + " <td>39.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + "is_recid 0 1\n", + "race age_cat \n", + "African-American 25 - 45 847.0 1051.0\n", + " Greater than 45 261.0 207.0\n", + " Less than 25 294.0 515.0\n", + "Asian 25 - 45 10.0 4.0\n", + " Greater than 45 7.0 4.0\n", + " Less than 25 4.0 2.0\n", + "Caucasian 25 - 45 620.0 508.0\n", + " Greater than 45 442.0 186.0\n", + " Less than 25 167.0 180.0\n", + "Hispanic 25 - 45 180.0 111.0\n", + " Greater than 45 81.0 28.0\n", + " Less than 25 51.0 58.0\n", + "Native American 25 - 45 5.0 2.0\n", + " Greater than 45 NaN 2.0\n", + " Less than 25 NaN 2.0\n", + "Other 25 - 45 122.0 72.0\n", + " Greater than 45 56.0 19.0\n", + " Less than 25 35.0 39.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "tab = compas.groupby(['age_cat', 'is_recid']).size()\n", + "display(tab.unstack())\n", + "\n", + "tab = compas.groupby(['sex', 'is_recid']).size()\n", + "display(tab.unstack())\n", + "\n", + "tab = compas.groupby(['race', 'age_cat', 'is_recid']).size()\n", + "display(tab.unstack())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "From above it is clear that there are no Native American recidivists of age over 45 or under 25. There are some other value combinations that might be problematic. Therefore the procedure of estimating $P(X=x)$ has to be considered carefully." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Synthetic data\n", + "\n", + "In the chunk below, we generate the synthetic data as described by Lakkaraju et al. The default values and definitions of $Y$ and $T$ values follow their description.\n", + "\n", + "**Parameters**\n", + "\n", + "* M = `nJudges_M`, number of judges\n", + "* N = `nSubjects_N`, number of subjects assigned to each judge\n", + "* betas $\\beta_i$ = `beta_i`, where $i \\in \\{X, Z, W\\}$ are coefficients for the respected variables\n", + "\n", + "**Columns of the data:**\n", + "\n", + "* `judgeID_J` = judge IDs as running numbering from 0 to `nJudges_M - 1`\n", + "* R = `acceptanceRate_R`, acceptance rates\n", + "* X = `X`, invidual's features observable to all (models and judges)\n", + "* Z = `Z`, information observable for judges only\n", + "* W = `W`, unobservable / inaccessible information\n", + "* T = `decision_T`, bail-or-jail decisions where $T=0$ represents jail decision and $T=1$ bail decision.\n", + "* Y = `result_Y`, result variable, if $Y=0$ person will or would recidivate and if $Y=1$ person will or would not commit a crime." + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>count</th>\n", + " <th>mean</th>\n", + " <th>std</th>\n", + " <th>min</th>\n", + " <th>25%</th>\n", + " <th>50%</th>\n", + " <th>75%</th>\n", + " <th>max</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>judgeID_J</th>\n", + " <td>50000.0</td>\n", + " <td>49.500000</td>\n", + " <td>28.866359</td>\n", + " <td>0.000000</td>\n", + " <td>24.750000</td>\n", + " <td>49.500000</td>\n", + " <td>74.250000</td>\n", + " <td>99.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>acceptanceRate_R</th>\n", + " <td>50000.0</td>\n", + " <td>0.478235</td>\n", + " <td>0.230644</td>\n", + " <td>0.103756</td>\n", + " <td>0.264643</td>\n", + " <td>0.473985</td>\n", + " <td>0.647587</td>\n", + " <td>0.890699</td>\n", + " </tr>\n", + " <tr>\n", + " <th>X</th>\n", + " <td>50000.0</td>\n", + " <td>-0.003875</td>\n", + " <td>0.996715</td>\n", + " <td>-4.659953</td>\n", + " <td>-0.671782</td>\n", + " <td>-0.001726</td>\n", + " <td>0.668077</td>\n", + " <td>3.831790</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Z</th>\n", + " <td>50000.0</td>\n", + " <td>0.006964</td>\n", + " <td>0.998001</td>\n", + " <td>-4.852118</td>\n", + " <td>-0.666258</td>\n", + " <td>0.004730</td>\n", + " <td>0.679477</td>\n", + " <td>4.241772</td>\n", + " </tr>\n", + " <tr>\n", + " <th>W</th>\n", + " <td>50000.0</td>\n", + " <td>0.010863</td>\n", + " <td>0.996944</td>\n", + " <td>-4.029138</td>\n", + " <td>-0.666574</td>\n", + " <td>0.012306</td>\n", + " <td>0.679578</td>\n", + " <td>4.285856</td>\n", + " </tr>\n", + " <tr>\n", + " <th>result_Y</th>\n", + " <td>50000.0</td>\n", + " <td>0.496500</td>\n", + " <td>0.499993</td>\n", + " <td>0.000000</td>\n", + " <td>0.000000</td>\n", + " <td>0.000000</td>\n", + " <td>1.000000</td>\n", + " <td>1.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>probabilities_T</th>\n", + " <td>50000.0</td>\n", + " <td>0.500627</td>\n", + " <td>0.410701</td>\n", + " <td>-1.350727</td>\n", + " <td>0.214651</td>\n", + " <td>0.500403</td>\n", + " <td>0.786029</td>\n", + " <td>2.005477</td>\n", + " </tr>\n", + " <tr>\n", + " <th>decision_T</th>\n", + " <td>50000.0</td>\n", + " <td>0.477260</td>\n", + " <td>0.499488</td>\n", + " <td>0.000000</td>\n", + " <td>0.000000</td>\n", + " <td>0.000000</td>\n", + " <td>1.000000</td>\n", + " <td>1.000000</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " count mean std min 25% \\\n", + "judgeID_J 50000.0 49.500000 28.866359 0.000000 24.750000 \n", + "acceptanceRate_R 50000.0 0.478235 0.230644 0.103756 0.264643 \n", + "X 50000.0 -0.003875 0.996715 -4.659953 -0.671782 \n", + "Z 50000.0 0.006964 0.998001 -4.852118 -0.666258 \n", + "W 50000.0 0.010863 0.996944 -4.029138 -0.666574 \n", + "result_Y 50000.0 0.496500 0.499993 0.000000 0.000000 \n", + "probabilities_T 50000.0 0.500627 0.410701 -1.350727 0.214651 \n", + "decision_T 50000.0 0.477260 0.499488 0.000000 0.000000 \n", + "\n", + " 50% 75% max \n", + "judgeID_J 49.500000 74.250000 99.000000 \n", + "acceptanceRate_R 0.473985 0.647587 0.890699 \n", + "X -0.001726 0.668077 3.831790 \n", + "Z 0.004730 0.679477 4.241772 \n", + "W 0.012306 0.679578 4.285856 \n", + "result_Y 0.000000 1.000000 1.000000 \n", + "probabilities_T 0.500403 0.786029 2.005477 \n", + "decision_T 0.000000 1.000000 1.000000 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 26137\n", + "1 23863\n", + "Name: decision_T, dtype: int64\n" + ] + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th>decision_T</th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " </tr>\n", + " <tr>\n", + " <th>result_Y</th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0.0</th>\n", + " <td>17790</td>\n", + " <td>7385</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1.0</th>\n", + " <td>8347</td>\n", + " <td>16478</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + "decision_T 0 1\n", + "result_Y \n", + "0.0 17790 7385\n", + "1.0 8347 16478" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Set seed for reproducibility\n", + "npr.seed(0)\n", + "\n", + "def generateData(nJudges_M=100,\n", + " nSubjects_N=500,\n", + " beta_X=1.0,\n", + " beta_Z=1.0,\n", + " beta_W=0.2):\n", + "\n", + " # Assign judge IDs as running numbering from 0 to nJudges_M - 1\n", + " judgeID_J = np.repeat(np.arange(0, nJudges_M, dtype=np.int32), nSubjects_N)\n", + "\n", + " # Sample acceptance rates uniformly from a closed interval\n", + " # from 0.1 to 0.9 and round to tenth decimal place.\n", + " acceptance_rates = np.round(npr.uniform(.1, .9, nJudges_M), 10)\n", + "\n", + " # Replicate the rates so they can be attached to the corresponding judge ID.\n", + " acceptanceRate_R = np.repeat(acceptance_rates, nSubjects_N)\n", + "\n", + " # Sample the variables from standard Gaussian distributions.\n", + " X = npr.normal(size=nJudges_M * nSubjects_N)\n", + " Z = npr.normal(size=nJudges_M * nSubjects_N)\n", + " W = npr.normal(size=nJudges_M * nSubjects_N)\n", + "\n", + " probabilities_Y = 1 / (1 + np.exp(-(beta_X * X + beta_Z * Z + beta_W * W)))\n", + "\n", + " # 0 if P(Y = 0| X = x; Z = z; W = w) >= 0.5 , 1 otherwise\n", + " result_Y = 1 - probabilities_Y.round()\n", + "\n", + " probabilities_T = 1 / (1 + np.exp(-(beta_X * X + beta_Z * Z)))\n", + " probabilities_T += npr.normal(0, np.sqrt(0.1), nJudges_M * nSubjects_N)\n", + "\n", + " # Initialize decision values as 1\n", + " decision_T = np.ones(nJudges_M * nSubjects_N)\n", + "\n", + " # Initialize the dataframe\n", + " df_init = pd.DataFrame(\n", + " np.column_stack((judgeID_J, acceptanceRate_R, X, Z, W, result_Y,\n", + " probabilities_T, decision_T)),\n", + " columns=[\n", + " \"judgeID_J\", \"acceptanceRate_R\", \"X\", \"Z\", \"W\", \"result_Y\",\n", + " \"probabilities_T\", \"decision_T\"\n", + " ])\n", + "\n", + " # Sort by judges then probabilities\n", + " data = df_init.sort_values(\n", + " by=[\"judgeID_J\", \"probabilities_T\"], ascending=False)\n", + "\n", + " # Iterate over the data. Subject is in the top (1-r)*100% if\n", + " # his within-judge-index is over acceptance threshold times\n", + " # the number of subjects assigned to each judge. If subject\n", + " # is over the limit they are assigned a zero, else one.\n", + " data.reset_index(drop=True, inplace=True)\n", + "\n", + " data['decision_T'] = np.where(\n", + " (data.index.values % nSubjects_N) <\n", + " ((1 - data['acceptanceRate_R']) * nSubjects_N), 0, 1)\n", + "\n", + " return data\n", + "\n", + "\n", + "df = []\n", + "df = generateData()\n", + "\n", + "# Basic stats of the created data set.\n", + "display(df.describe().T)\n", + "\n", + "print(df.decision_T.value_counts())\n", + "\n", + "tab = df.groupby(['result_Y', 'decision_T']).size()\n", + "display(tab.unstack())" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(25000, 8)\n", + "(25000, 8)\n", + "(25000, 8)\n", + "(25000, 8)\n" + ] + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th>decision_T</th>\n", + " <th>1</th>\n", + " </tr>\n", + " <tr>\n", + " <th>result_Y</th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0.0</th>\n", + " <td>3650</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1.0</th>\n", + " <td>8216</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + "decision_T 1\n", + "result_Y \n", + "0.0 3650\n", + "1.0 8216" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Split the data set to test and train\n", + "from sklearn.model_selection import train_test_split\n", + "train, test = train_test_split(df, test_size=0.5, random_state=0)\n", + "\n", + "print(train.shape)\n", + "print(test.shape)\n", + "\n", + "train_labeled = train.copy()\n", + "test_labeled = test.copy()\n", + "\n", + "# Set results as NA if decision is negative.\n", + "train_labeled.result_Y = np.where(train.decision_T == 0, np.nan, train.result_Y)\n", + "test_labeled.result_Y = np.where(test.decision_T == 0, np.nan, test.result_Y)\n", + "\n", + "print(train_labeled.shape)\n", + "print(test_labeled.shape)\n", + "\n", + "tab = train_labeled.groupby(['result_Y', 'decision_T']).size()\n", + "tab.unstack()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Algorithms\n", + "\n", + "### Contraction algorithm\n", + "\n", + "Below is an implementation of Lakkaraju's team's algorithm presented in [their paper](https://helka.finna.fi/PrimoRecord/pci.acm3098066). Relevant parameters to be passed to the function are presented in the description." + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "def contraction(df,\n", + " judgeIDJ_col,\n", + " decisionT_col,\n", + " resultY_col,\n", + " modelProbS_col,\n", + " accRateR_col,\n", + " r,\n", + " binning=False):\n", + " '''\n", + " This is an implementation of the algorithm presented by Lakkaraju\n", + " et al. in their paper \"The Selective Labels Problem: Evaluating \n", + " Algorithmic Predictions in the Presence of Unobservables\" (2017).\n", + " \n", + " Parameters:\n", + " df = The (Pandas) data frame containing the data, judge decisions,\n", + " judge IDs, results and probability scores.\n", + " judgeIDJ_col = String, the name of the column containing the judges' IDs\n", + " in df.\n", + " decisionT_col = String, the name of the column containing the judges' decisions\n", + " resultY_col = String, the name of the column containing the realization\n", + " modelProbS_col = String, the name of the column containing the probability\n", + " scores from the black-box model B.\n", + " accRateR_col = String, the name of the column containing the judges' \n", + " acceptance rates\n", + " r = Float between 0 and 1, the given acceptance rate.\n", + " binning = Boolean, should judges with same acceptance rate be binned\n", + " \n", + " Returns:\n", + " u = The estimated failure rate at acceptance rate r.\n", + " '''\n", + " # Sort first by acceptance rate and judge ID.\n", + " sorted_df = df.sort_values(\n", + " by=[accRateR_col, judgeIDJ_col], ascending=False)\n", + "\n", + " if binning:\n", + " # Get maximum leniency\n", + " max_leniency = sorted_df[accRateR_col].values[0].round(1)\n", + "\n", + " # Get list of judges that are the most lenient\n", + " most_lenient_list = sorted_df.loc[sorted_df[accRateR_col].round(1) ==\n", + " max_leniency, judgeIDJ_col]\n", + "\n", + " # Subset to obtain D_q\n", + " D_q = sorted_df[sorted_df[judgeIDJ_col].isin(\n", + " most_lenient_list.unique())].copy()\n", + " else:\n", + " # Get most lenient judge\n", + " most_lenient_ID = sorted_df[judgeIDJ_col].values[0]\n", + "\n", + " # Subset\n", + " D_q = sorted_df[sorted_df[judgeIDJ_col] == most_lenient_ID].copy()\n", + "\n", + " # All observations of R_q have observed outcome labels\n", + " R_q = D_q[D_q[decisionT_col] == 1]\n", + "\n", + " # \"Observations deemed as high risk by B are at the top of this list\"\n", + " R_sort_q = R_q.sort_values(by=modelProbS_col, ascending=False)\n", + "\n", + " number_to_remove = int(\n", + " round((1.0 - r) * D_q.shape[0] - (D_q.shape[0] - R_q.shape[0])))\n", + "\n", + " # \"R_B is the list of observations assigned to t = 1 by B\"\n", + " R_B = R_sort_q[number_to_remove:R_sort_q.shape[0]]\n", + "\n", + " return np.sum(R_B[resultY_col] == 0) / D_q.shape[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Causal model\n", + "\n", + "Our model is defined by the probabilistic expression \n", + "\n", + "\\begin{equation}\\label{model_disc}\n", + "P(Y=0 | \\text{do}(R=r)) = \\sum_x \\underbrace{P(Y=0|X=x, T=1)}_\\text{1} \n", + "\\overbrace{P(T=1|R=r, X=x)}^\\text{2} \n", + "\\underbrace{P(X=x)}_\\text{3}\n", + "\\end{equation}\n", + "\n", + "which is equal to \n", + "\n", + "\\begin{equation}\\label{model_cont}\n", + "P(Y=0 | \\text{do}(R=r)) = \\int_x P(Y=0|X=x, T=1)P(T=1|R=r, X=x)P(X=x)\n", + "\\end{equation}\n", + "\n", + "for continuous $x$. As a picture (Z is latent variable, and can be excluded from the expression as per do-calculus):\n", + "\n", + "\n", + "\n", + "Does this hold for predicting probability for each individual? Of course we build models for each of the terms separately, that is we model the probability of negative outcome and the probability of positive decision when $R=r$ separately.\n", + "\n", + "\\begin{equation}\n", + "P(Y=0 | \\text{do}(R=r), X=x) = P(Y=0|X=x, T=1)P(T=1|R=r, X=x)\n", + "\\end{equation}\n", + "\n", + "<!---\n", + "**Algorithm -- UPDATE!!**\n", + "\n", + "Our model will be constructed sequentially.\n", + "\n", + "Input: Training and test data sets $(\\mathbf{x}, t, y) \\in \\mathcal{D}$ and acceptance rate $r$. \n", + "Returns: $P(Y=0 | \\text{do}(R=r))$\n", + "\n", + "Procedure:\n", + "1. Model $P(X=x)$ in a suitable way and assign to $\\mathcal{M}_0$\n", + "* Build model $\\mathcal{M}_1$ predicting response $Y$ with predictors $X$ from the labeled observations (where $T=1$) in training data.\n", + "* Predict $P(Y=0|X=x)$ for every observation in the test data using model $\\mathcal{M}_1$.\n", + "* Initialize `sum = 0`\n", + "* For every point in the parameter space (for every $x$ in $X$)\n", + " 1. $p_x \\leftarrow P(X=x)$ from $\\mathcal{M}_0$\n", + " * $\\mathcal{D_x} \\leftarrow \\{\\mathcal{D} | X = x\\}$\n", + " * Assign first $r\\cdot 100\\%$ observations from $\\mathcal{D_x}$ to $\\mathcal{D}_{rx}$\n", + " * $p_t \\leftarrow \\dfrac{|\\{\\mathcal{D}_{rx}|T=1\\}|}{|\\mathcal{D}_{rx}|}$ (part 2 of eq. $\\ref{model}$) Pitääkö tähänkin treenaa joku oma luokittelija?\n", + " * $p_y$ will be predicted from the model $\\mathcal{M}_1$\n", + " * `sum +=` $p_y \\cdot p_t \\cdot p_x$\n", + "* Return `sum`\n", + "--->\n", + "**Constructing $P(X=x)$, preliminary ideas:**\n", + "\n", + "* Approximate it with frequencies (makes independence assumption, make variables factors first)\n", + "* Construct Bayesian network using some well-known algorithm.\n", + "\n", + "<!---\n", + "Functions:\n", + "\n", + "* $f(x)$ gives probability of recidivism given personal properties and predictive model. Corresponds to parts 1 and 2 of eq $\\ref{model_disc}$ and $\\ref{model_cont}$.\n", + "* `ep` counts performance of the predictive model given a data, model and leniency rate like Michael's pdf. That is:\n", + "--->" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "def f(x, model, class_value):\n", + " '''\n", + " Returns the probabilities (as vector) of class value (class_value) given \n", + " individual features (x) and the trained, predictive model (model).\n", + " '''\n", + " if x.ndim == 1:\n", + " # if x is vector, transform to column matrix.\n", + " f_values = model.predict_proba(np.array(x).reshape(-1, 1))\n", + " else:\n", + " f_values = model.predict_proba(x)\n", + "\n", + " return f_values[:, model.classes_ == class_value].flatten()\n", + "\n", + "def ep(r, df, result_col, feature_cols, model, failure_value):\n", + " '''\n", + " Returns:\n", + " Empirical performance (float), i.e. percentage of recidivists. \n", + " \n", + " Parameters:\n", + " r = leniency rate(s)\n", + " df = test data, pandas DataFrame\n", + " result_col = String (or list of), column(s) containing the binarized results.\n", + " feature_cols = String (or list of), column(s) containing the individual features.\n", + " model = trained sklearn classifier. \n", + " failure_value = value obtained from the model.classes_ representing the \n", + " unwanted event label (usually 0 or 1).\n", + " '''\n", + " # Initialize DataFrame\n", + " ep_data = pd.DataFrame()\n", + "\n", + " # Attach booleans indicating failure\n", + " ep_data = ep_data.assign(failed=df[result_col] == failure_value)\n", + "\n", + " # Attach prediction values\n", + " ep_data = ep_data.assign(\n", + " failure_predictions=f(df[feature_cols], model, failure_value))\n", + "\n", + " # sort by predictions, most harmless at top\n", + " ep_data.sort_values(by='failure_predictions', inplace=True, ascending=True)\n", + "\n", + " # calculate number of subjects to which assign a positive decision\n", + " if isinstance(r, float):\n", + " to_release = int(round(ep_data.shape[0] * r))\n", + "\n", + " # subset data\n", + " released = ep_data[0:to_release]\n", + "\n", + " return np.sum(released.failed)/df.shape[0]\n", + " else:\n", + " results = np.zeros(0)\n", + " for r_value in r:\n", + " to_release = int(round(ep_data.shape[0] * r_value))\n", + "\n", + " # subset data\n", + " released = ep_data[0:to_release]\n", + "\n", + " results = np.append(results, np.sum(released.failed)/df.shape[0])\n", + " return results\n", + "\n", + "\n", + "def ep2_0(r, df, result_col, decision_col, feature_cols, r_col, y_model,\n", + " t_model, failure_value, pos_decision_value):\n", + " '''\n", + " Returns:\n", + " Empirical performance, i.e. percentage of recidivists. \n", + " \n", + " Parameters:\n", + " r = leniency rate(s)\n", + " df = test data, pandas DataFrame\n", + " result_col = String (list), name of column containing the binarized results.\n", + " feature_cols = String (list), name of columns containge individual features.\n", + " model = trained sklearn classifier \n", + " failure_value = value obtained from the model.classes_ representing the \n", + " unwanted event label (usually 0 or 1).\n", + " '''\n", + " # Initialize DataFrame\n", + " ep_data = pd.DataFrame()\n", + "\n", + " # Attach booleans indicating failure\n", + " ep_data = ep_data.assign(failed=df[result_col] == failure_value)\n", + "\n", + " # Attach prediction values\n", + " ep_data = ep_data.assign(\n", + " failure_predictions=f(df[feature_cols], y_model, failure_value))\n", + "\n", + " ep_data.failure_predictions = ep_data.failure_predictions * f(\n", + " df[[feature_cols, r_col]], t_model, pos_decision_value) * scs.norm.pdf(df[feature_cols])\n", + " # sort by predictions, most harmless at top\n", + " ep_data.sort_values(by='failure_predictions', inplace=True, ascending=True)\n", + "\n", + " # calculate number of subjects to which assign a positive decision\n", + " if isinstance(r, float):\n", + " to_release = int(round(ep_data.shape[0] * r))\n", + "\n", + " # subset data\n", + " released = ep_data[0:to_release]\n", + "\n", + " return np.sum(released.failed)/df.shape[0]\n", + " else:\n", + " results = np.zeros(0)\n", + " for r_value in r:\n", + " to_release = int(round(ep_data.shape[0] * r_value))\n", + "\n", + " # subset data\n", + " released = ep_data[0:to_release]\n", + "\n", + " results = np.append(results, np.sum(released.failed)/df.shape[0])\n", + " return results\n", + "\n", + "\n", + "def gp(r, x_values, y_model, x_model, failure_value):\n", + " '''\n", + " Returns:\n", + " Generalized performance\n", + " \n", + " Parameters:\n", + " r = leniency rate\n", + " df = test data, pandas DataFrame\n", + " feature_cols = String (list), name of columns containing individual features.\n", + " y_model = trained sklearn classifier to predict response\n", + " x_model = model of P(X=x)\n", + " failure_value = value obtained from the model.classes_ representing the \n", + " unwanted event label.\n", + " '''\n", + " preds = f(x_values, y_model, failure_value)\n", + "\n", + " return np.sum(preds * (preds < r) * x_model(x_values))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Performance comparison\n", + "\n", + "Below we try to replicate the results obtained by Lakkaraju and compare their model's performance to the one of ours.\n", + "\n", + "### On synthetic data\n", + "\n", + "#### Predictive models\n", + "\n", + "Lakkaraju says that they used logistic regression to predict recidivism. We construct the models using only *observed observations*, i.e. defendants that were granted bail and are in the train set. We then predict the probability of recidivism for all observations in the test data and attach it to our data set. I also applied random forest classifier." + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "# instantiate the model (using the default parameters)\n", + "logreg = LogisticRegression(solver='lbfgs')\n", + "\n", + "# fit, reshape X to be of shape (n_samples, n_features)\n", + "logreg = logreg.fit(\n", + " train_labeled.X[train_labeled.decision_T == 1].values.reshape(-1, 1),\n", + " train_labeled.result_Y[train_labeled.decision_T == 1])\n", + "\n", + "# predict probabilities and attach to data\n", + "label_probs_logreg = logreg.predict_proba(test.X.values.reshape(-1, 1))\n", + "test = test.assign(B_prob_0_logreg=label_probs_logreg[:, 0])\n", + "\n", + "test_labeled = test_labeled.assign(B_prob_0_logreg=label_probs_logreg[:, 0])\n", + "\n", + "########\n", + "\n", + "# instantiate the model (using the default parameters)\n", + "forest = RandomForestClassifier(n_estimators=400, max_depth=8, random_state=0)\n", + "\n", + "# fit, reshape X to be of shape (n_samples, n_features)\n", + "forest = forest.fit(\n", + " train_labeled.X[train_labeled.decision_T == 1].values.reshape(-1, 1),\n", + " train_labeled.result_Y[train_labeled.decision_T == 1])\n", + "\n", + "# predict probabilities and attach to data\n", + "label_probs_forest = forest.predict_proba(test.X.values.reshape(-1, 1))\n", + "test = test.assign(B_prob_0_forest=label_probs_forest[:, 0])\n", + "\n", + "test_labeled = test_labeled.assign(B_prob_0_forest=label_probs_forest[:, 0])" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.037128152319060165 [0 1] [0. 1.]\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA04AAAGkCAYAAAAG4HVMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3X24pWV9H/rvD4eXEGMZIsZUJTOKaS84FNLO6WUajy/kNIOKg41viWh6JAFrEmMkvlGsNYInqCTYxmOi0RqbMUqMGodLiy8RsSdnsBkJKKPBkA6iEStkAEVxBLnPH+vZYbFYe+6198yevWfvz+e61rVY9/o9z7rX/ezh2d99Py/VWgsAAADzO2S5OwAAALDSCU4AAAAdghMAAECH4AQAANAhOAEAAHQITgAAAB2CEwAAQIfgBAAA0CE4AQAAdKybtbCqnpzklUn+eZJ7knwpyctba58c3l+f5I1JnpbkB5JsT/KS1trnJ9ZzRJLzkzw3yVFJrk7yitbapyfqDknyiiQvSPLQJNcleW1r7f2z9vnBD35w27Bhw6zlAADAGvPZz372ltbaMb26mYJTVb0gyZuHx/kZzVSdnOTI4f1Ksi3JxiQvSnJrknOTXF5VJ7fWvjq2unckeUqSlyX5n0l+JclHq+onW2tXj9Wdn+SlSc5L8tkkP5fkfVV1WmvtI7P0e8OGDdmxY8cspQAAwBpUVV+eqa611lvRhiRfTHJua+1N89ScnuTPkpzSWrt8aPtHSXYl2dpa+7Wh7aSMZpjObK29c2hbl2Rnkutaa1uGtock+UqSC1tr/3Hsc/48yTGttX82y5fbtGlTE5wAAID5VNVnW2ubenWznON0ZkaH5v3+Xmq2JPnaXGhKktba7UkuTXL6RN1dSS4Zq7s7yXuTbK6qw4fmzUkOS7J14nO2JjmxqjbO0G8AAID9Ypbg9Ngkf53k56rqb6vq7qq6vqp+ZazmhCTXTll2Z5Jjq+qBY3W7WmvfmVJ3WJLjxur2JLl+Sl2SHD9DvwEAAPaLWYLTP07y6Iwu/HBhkp9J8vEkb66qFw81R2d0XtOk3cPz+hnrjh57vq3d/zjCyToAAIAlN8vFIQ5J8kNJ/q/W2geGtk8O5z6dW1X/OUklmXayVE15vT/r7l9QdXaSs5Pk2GOP7ZUDAAB0zTLj9PfD88cn2j+W5EeS/GhGM0HTZoHmZprmZpl6dbvHntcPV+vbW939tNbe1lrb1FrbdMwx3asKAgAAdM0SnHbO0z4Xau4Zak6YUnN8khtba3eMrWtjVR05pe57ufecpp1JDk/yqCl1SfKFGfoNAACwX8wSnD44PG+eaN+c5Kutta9ndA+nh1XV4+ferKoHJXnq8N6cbUkOTfLMsbp1SZ6d5GOttT1D82UZBakzJj7zuUmuba3tmqHfAAAA+8Us5zh9JMnlSd5aVQ/O6Ka1z8joIhHPH2q2JdmeZGtVvSz33gC3krxhbkWttaur6pIkb6qqQzO6z9MLM7px7hljdd+oqoszOofqW0muyihcnZL7Xt4cAABgyXWDU2utVdXTkvxWkt/M6Dyjv05yRmvtj4eae6rqtCQXJXlLkiMyClJPbK19ZWKVz0/yuiQXJDkqyTVJTm2tXTVRd16SO5K8OMlDk1yX5FmttUsX80UBAAAWq+5/xe/VY9OmTW3Hjh3L3Q0AAGCFqqrPttY29epmOccJAABgTROcAAAAOgQnAACADsEJAACgQ3ACAADomOU+TgAAwBqy4ZUfXtL133DhU5Z0/UvBjBMAAECH4AQAANAhOAEAAHQITgAAAB2CEwAAQIfgBAAA0CE4AQAAdAhOAAAAHYITAABAh+AEAADQITgBAAB0CE4AAAAdghMAAECH4AQAANAhOAEAAHQITgAAAB2CEwAAQIfgBAAA0CE4AQAAdAhOAAAAHYITAABAh+AEAADQITgBAAB0CE4AAAAdghMAAECH4AQAANAhOAEAAHQITgAAAB2CEwAAQIfgBAAA0CE4AQAAdAhOAAAAHYITAABAh+AEAADQITgBAAB0CE4AAAAdghMAAECH4AQAANAhOAEAAHQITgAAAB2CEwAAQIfgBAAA0CE4AQAAdAhOAAAAHTMFp6p6QlW1KY/bJurWV9Xbq+qWqvp2VX2iqk6csr4jquqNVXVTVd1ZVdur6nFT6g6pqnOr6oaq+m5VXVNVT1/81wUAAFi4hc44/VqSnxx7/J9zb1RVJdmW5NQkL0ry9CSHJrm8qh4+sZ53JDkryauTnJbkpiQfraqTJ+rOT/KaJG9O8qQkVyZ5X1U9eYH9BgAAWLR1C6z/Ymvtynne25LksUlOaa1dniRVtT3JriQvzyh0papOSvKcJGe21t45tF2RZGeS1w7rSVU9JMlLk1zYWrto+IzLq+q4JBcm+cgC+w4AALAo+/Mcpy1JvjYXmpKktXZ7kkuTnD5Rd1eSS8bq7k7y3iSbq+rwoXlzksOSbJ34nK1JTqyqjfux7wAAAPNaaHB6d1V9v6r+vqr+uKqOHXvvhCTXTllmZ5Jjq+qBY3W7WmvfmVJ3WJLjxur2JLl+Sl2SHL/AvgMAACzKrIfq3Z7kt5NckeSbSX4iyb9Psr2qfqK19o0kRye5Ycqyu4fn9UnuGOpu3Uvd0WPPt7XWWqfuPqrq7CRnJ8mxxx47rQQAAGBBZgpOrbW/SvJXY01XVNWnk/yPjM5delWSSjIZcjK0T77en3WTfX1bkrclyaZNm6YtDwAAsCCLPseptXZVki8l+d+Hpt2ZPgu0fni+dca63WPP64er9e2tDgAAYEnt68UhxmeFdmZ0XtKk45Pc2Fq7Y6xuY1UdOaXue7n3nKadSQ5P8qgpdUnyhX3oNwAAwMwWHZyqalOSH0/ymaFpW5KHVdXjx2oelOSpw3sZqzs0yTPH6tYleXaSj7XW9gzNl2UUpM6Y+OjnJrm2tbZrsX0HAABYiJnOcaqqd2d0P6arktyW0cUhzk3yd0l+dyjblmR7kq1V9bKMDs07N6NZqTfMrau1dnVVXZLkTVV16LDeFybZmLGQ1Fr7RlVdnOTcqvrW8NnPTnJK7nt5cwAAgCU161X1rk3y80lelOTIJF9P8oEk/7G1dkuStNbuqarTklyU5C1JjsgoSD2xtfaVifU9P8nrklyQ5Kgk1yQ5dThvatx5GV2J78VJHprkuiTPaq1dupAvCQAAsC/q/lf7Xj02bdrUduzYsdzdAACAg8qGV354Sdd/w4VPWdL1L0RVfba1tqlXt68XhwAAAFj1BCcAAIAOwQkAAKBDcAIAAOgQnAAAADoEJwAAgA7BCQAAoENwAgAA6BCcAAAAOgQnAACADsEJAACgQ3ACAADoEJwAAAA6BCcAAIAOwQkAAKBDcAIAAOgQnAAAADoEJwAAgA7BCQAAoENwAgAA6BCcAAAAOgQnAACADsEJAACgQ3ACAADoEJwAAAA6BCcAAIAOwQkAAKBDcAIAAOgQnAAAADoEJwAAgA7BCQAAoENwAgAA6BCcAAAAOgQnAACADsEJAACgQ3ACAADoEJwAAAA6BCcAAIAOwQkAAKBDcAIAAOgQnAAAADoEJwAAgA7BCQAAoENwAgAA6BCcAAAAOgQnAACADsEJAACgQ3ACAADoEJwAAAA6FhWcquqyqmpVdcFE+/qqentV3VJV366qT1TViVOWP6Kq3lhVN1XVnVW1vaoeN6XukKo6t6puqKrvVtU1VfX0xfQZAABgsRYcnKrq55OcNKW9kmxLcmqSFyV5epJDk1xeVQ+fKH9HkrOSvDrJaUluSvLRqjp5ou78JK9J8uYkT0pyZZL3VdWTF9pvAACAxVpQcKqqo5JcnOScKW9vSfLYJM9rrb2ntXbZ0HZIkpePreOkJM9J8pLW2h+01v48ybOS3JjktWN1D0ny0iQXttYuaq1d3lp7QZLLk1y4kH4DAADsi4XOOL0hyc7W2numvLclyddaa5fPNbTWbk9yaZLTJ+ruSnLJWN3dSd6bZHNVHT40b05yWJKtE5+zNcmJVbVxgX0HAABYlJmDU1U9NskvJPnleUpOSHLtlPadSY6tqgeO1e1qrX1nSt1hSY4bq9uT5PopdUly/Kx9BwAA2BczBaeqOjTJW5Nc1Fq7bp6yo5PcOqV99/C8fsa6o8eeb2uttU4dAADAkpp1xukVSX4gyev2UlNJJkPOXPtS1t33zaqzq2pHVe24+eab91YKAAAwk25wqqpjk5yX5D8kObyqjhouEpGx1w/IaCZo2izQ3EzT3CxTr2732PP64Wp9e6u7j9ba21prm1prm4455pi9fTUAAICZzDLj9MgkR2R0UYZbxx7J6Kp3tyY5MaNzj06YsvzxSW5srd0xvN6ZZGNVHTml7nu595ymnUkOT/KoKXVJ8oUZ+g4AALDPZglOVyd54pRHMgpTT8wo7GxL8rCqevzcglX1oCRPHd6bsy2j+zs9c6xuXZJnJ/lYa23P0HxZRkHqjIn+PDfJta21XTP0HQAAYJ+t6xW01m5L8qnJ9uEIui+31j41vN6WZHuSrVX1soxmos7N6JykN4yt7+qquiTJm4aLTuxK8sIkGzMWklpr36iqi5OcW1XfSnJVRuHqlNz38uYAAABLqhucZtVau6eqTktyUZK3ZHR43/YkT2ytfWWi/PkZXWjigiRHJbkmyamttasm6s5LckeSFyd5aJLrkjyrtXbp/uo3AABAz6KDU2vtfle3a63tTnLm8NjbsncmOWd47K3u+xmFqwsW208AAIB9NfMNcAEAANYqwQkAAKBDcAIAAOgQnAAAADoEJwAAgA7BCQAAoENwAgAA6BCcAAAAOgQnAACADsEJAACgQ3ACAADoEJwAAAA6BCcAAIAOwQkAAKBDcAIAAOgQnAAAADoEJwAAgA7BCQAAoENwAgAA6BCcAAAAOgQnAACADsEJAACgQ3ACAADoEJwAAAA6BCcAAIAOwQkAAKBDcAIAAOgQnAAAADoEJwAAgA7BCQAAoENwAgAA6BCcAAAAOgQnAACADsEJAACgQ3ACAADoEJwAAAA6BCcAAIAOwQkAAKBDcAIAAOgQnAAAADoEJwAAgA7BCQAAoENwAgAA6BCcAAAAOgQnAACADsEJAACgQ3ACAADoEJwAAAA6BCcAAIAOwQkAAKBjpuBUVZur6pNV9fWq2lNVX62qP6mq4yfqHlFVf1pVt1fVN6vqA1V17JT1ra+qt1fVLVX17ar6RFWdOKXuiKp6Y1XdVFV3VtX2qnrc4r8uAADAws0643R0ks8m+dUkP5Pk3CQnJLmyqn4sSarqyCSfTPJPk/zbJM9L8ugkl1fVD86tqKoqybYkpyZ5UZKnJzl0qHv4xOe+I8lZSV6d5LQkNyX5aFWdvOBvCgAAsEjrZilqrb0nyXvG26rqfyT56yTPSPLbGQWcRyb5J62164eazyX5myQvSPI7w6Jbkjw2ySmttcuHuu1JdiV5eZJfG9pOSvKcJGe21t45tF2RZGeS1w7rAQAAWHL7co7T3w/Pdw3PW5JcOReakqS1tivJXyQ5fWy5LUm+Nheahrrbk1w6pe6uJJeM1d2d5L1JNlfV4fvQdwAAgJktKDhV1QOq6rCqenSStyb5ekZBJhkdunftlMV2Jhk/F2pvdcdW1QPH6na11r4zpe6wJMctpO8AAACLtdAZp88k2ZPkS0n+WUaH231jeO/oJLdOWWZ3kvVjr/dWl7HaXt3R0zpYVWdX1Y6q2nHzzTfP9z0AAABmttDg9Lwkj8no3KNvJvl4VW0Ye79NWaamvN6fdffRWntba21Ta23TMcccs7dSAACAmSwoOLXWvtha+8xwsYifTvLAJK8c3r4102eB1ue+M0e791KXsdpe3e4p7wEAAOx3i744RGvttiTX595zjXZmdF7SpOOTfGHs9d7qbmyt3TFWt3G4zPlk3feGzwYAAFhyiw5OVfUjGd2z6W+Hpm1JHlNVjxyr2ZDkp4b3Mlb3sKp6/Fjdg5I8dUrdoUmeOVa3Lsmzk3ystbZnsX0HAABYiJnu41RVH0xyVZLPZXRu048neUmSuzO6h1OS/EFGN8j9UFW9KqPzk85P8pWMrsA3Z1uS7Um2VtXLMjo079yMzl16w1xRa+3qqrokyZuq6tCM7vP0wiQbk5yxmC8LAACwGLPOOF2Z5GlJ3pXkw0nOSXJFkpNba19Kktbat5OcktEV9/4oybszCjunjB1+l9baPUlOS/LxJG9J8sEk30/yxNbaVyY+9/lJ3pnkguFzH5Hk1NbaVQv+pgAAAIs004xTa+31SV4/Q92NSZ4+Q93uJGcOj73V3ZlRSDtnln4CAAAshUWf4wQAALBWCE4AAAAdghMAAECH4AQAANAhOAEAAHQITgAAAB2CEwAAQIfgBAAA0CE4AQAAdAhOAAAAHYITAABAh+AEAADQITgBAAB0CE4AAAAdghMAAECH4AQAANAhOAEAAHQITgAAAB2CEwAAQIfgBAAA0CE4AQAAdAhOAAAAHYITAABAh+AEAADQITgBAAB0CE4AAAAdghMAAECH4AQAANAhOAEAAHQITgAAAB3rlrsDAADAwmx45YeXuwtrjhknAACADsEJAACgQ3ACAADoEJwAAAA6BCcAAIAOwQkAAKBDcAIAAOgQnAAAADoEJwAAgA7BCQAAoENwAgAA6BCcAAAAOgQnAACADsEJAACgQ3ACAADoEJwAAAA6BCcAAIAOwQkAAKCjG5yq6hlV9f6q+nJV3VlV11XVb1XVD03Ura+qt1fVLVX17ar6RFWdOGV9R1TVG6vqpmF926vqcVPqDqmqc6vqhqr6blVdU1VP37evCwAAsHCzzDi9NMn3k/z7JKcm+b0kL0zy8ao6JEmqqpJsG95/UZKnJzk0yeVV9fCJ9b0jyVlJXp3ktCQ3JfloVZ08UXd+ktckeXOSJyW5Msn7qurJC/uKAAAA+2bdDDVPba3dPPb6iqraneRdSZ6Q5JNJtiR5bJJTWmuXJ0lVbU+yK8nLk/za0HZSkuckObO19s6h7YokO5O8dlhPquohGQW2C1trFw2fe3lVHZfkwiQfWewXBgAAWKjujNNEaJrzl8Pzw4bnLUm+NheahuVuT3JpktPHltuS5K4kl4zV3Z3kvUk2V9XhQ/PmJIcl2TrxuVuTnFhVG3v9BgAA2F8We3GIxw/PXxyeT0hy7ZS6nUmOraoHjtXtaq19Z0rdYUmOG6vbk+T6KXVJcvwi+w0AALBgCw5OVfWwjA6r+0RrbcfQfHSSW6eU7x6e189Yd/TY822ttdapAwAAWHILCk7DzNGHktyd5PnjbyWZDDlz7ZOv92fdtD6eXVU7qmrHzTdPO8oQAABgYWYOTlV1REZXzntkks2tta+Ovb0702eB5maabp2xbvfY8/rhan17q7uf1trbWmubWmubjjnmmPnKAAAAZjZTcKqqQ5O8P8m/TPLk1trnJ0p2ZnRe0qTjk9zYWrtjrG5jVR05pe57ufecpp1JDk/yqCl1SfKFWfoNAACwP8xyA9xDkrw7yU8nOb21duWUsm1JHlZVjx9b7kFJnjq8N153aJJnjtWtS/LsJB9rre0Zmi/LKEidMfE5z01ybWttV6/fAAAA+8ss93H6fzIKOq9L8u2qeszYe18dDtnblmR7kq1V9bKMDs07N6Nzkt4wV9xau7qqLknypmEWa1dGN9PdmLGQ1Fr7RlVdnOTcqvpWkqsyClen5L6XNwcAAFhyswSnJw3P5w2Pcb+Z5DWttXuq6rQkFyV5S5IjMgpST2ytfWVimednFMIuSHJUkmuSnNpau2qi7rwkdyR5cZKHJrkuybNaa5fO8sUAAGC5bHjlh5e7C+xn3eDUWtswy4paa7uTnDk89lZ3Z5Jzhsfe6r6fUbi6YJbPBwAAWCqLvQEuAADAmiE4AQAAdAhOAAAAHYITAABAh+AEAADQITgBAAB0CE4AAAAdghMAAECH4AQAANAhOAEAAHQITgAAAB2CEwAAQIfgBAAA0CE4AQAAdAhOAAAAHYITAABAh+AEAADQITgBAAB0CE4AAAAdghMAAECH4AQAANAhOAEAAHSsW+4OAADActjwyg8vdxc4iJhxAgAA6BCcAAAAOgQnAACADsEJAACgQ3ACAADoEJwAAAA6BCcAAIAOwQkAAKBDcAIAAOgQnAAAADoEJwAAgA7BCQAAoGPdcncAAACm2fDKDy93F+AfmHECAADoEJwAAAA6BCcAAIAOwQkAAKBDcAIAAOgQnAAAADoEJwAAgA7BCQAAoMMNcAEAWBQ3qGUtMeMEAADQITgBAAB0CE4AAAAdghMAAECH4AQAANAx01X1qurhSV6RZFOSk5L8QJKNrbUbJuqOSHJ+kucmOSrJ1Ule0Vr79ETdIcP6XpDkoUmuS/La1tr7p3z2WUl+I8nGJDckubi19vszf0MAgDXMle9g/5h1xum4JM9KcmuS/76XunckOSvJq5OcluSmJB+tqpMn6s5P8pokb07ypCRXJnlfVT15vGgITW9N8v4kpyZ5X5K3VNULZ+w3AADAPpv1Pk6fbq39SJJU1S8l+ZnJgqo6KclzkpzZWnvn0HZFkp1JXptky9D2kCQvTXJha+2iYfHLq+q4JBcm+chQty7J65L8UWvtvLG6f5zk/Kp6e2vtroV+YQAAgIWaacaptXbPDGVbktyV5JKx5e5O8t4km6vq8KF5c5LDkmydWH5rkhOrauPw+ieTHDOl7o+S/HCSx87SdwAAgH21Py8OcUKSXa2170y078woKB03VrcnyfVT6pLk+LG6JLm2UwcAALCk9mdwOjqjc6Am7R57f+75ttZam6EuU9Y5WXcfVXV2Ve2oqh0333zzTB0HAADYm/0ZnCrJZBiaa19sXeapnVdr7W2ttU2ttU3HHHPMQhYFAACYataLQ8xid5Jjp7SvH3t/7nl9VdXErNO0umQ0s3TTWN3RE+8DABy0XC4cDg77c8ZpZ5KNVXXkRPvxSb6Xe89p2pnk8CSPmlKXJF8Yq0vuPddpvjoAAIAltT9nnLYl+c0kz0zyruQfLin+7CQfa63tGeouyyhInTHUz3lukmtba7uG19uT3DLUfWKibneSv9iPfQcAmMqMEJAsIDhV1TOG//wXw/OTqurmJDe31q5orV1dVZckeVNVHZpkV5IXJtmYUfhJkrTWvlFVFyc5t6q+leSqjMLVKUlOH6u7q6r+Q0Y3vP27jMLTKUnOTPKi1tr3FveVAQAAFmYhM07vm3j9luH5iiRPGP77+RndtPaCJEcluSbJqa21qyaWPS/JHUlenOShSa5L8qzW2qXjRa2136+qluQ3krwsyY1JfrW19pYAAAAcIDMHp9ba5FXvptXcmeSc4bG3uu9nFK4umGGdb03y1hm7CQAAsN/tz4tDAAAArEqCEwAAQIfgBAAA0CE4AQAAdOzP+zgBACwL91oClpoZJwAAgA4zTgDAkjMjBBzszDgBAAB0CE4AAAAdDtUDgIPEUh7udsOFT1mydQOsBmacAAAAOgQnAACADofqAQCuegfQYcYJAACgQ3ACAADoEJwAAAA6BCcAAIAOwQkAAKDDVfUAWDNcOQ6AxTLjBAAA0CE4AQAAdAhOAAAAHYITAABAh+AEAADQ4ap6ACyIK9MBsBaZcQIAAOgQnAAAADocqgewyjiUDgD2PzNOAAAAHYITAABAh+AEAADQITgBAAB0uDgEwDJwAQcAOLgITgBTCDYAwDiH6gEAAHSYcQIOSmaEAIADyYwTAABAhxknYMmYFQIAVgszTgAAAB2CEwAAQIfgBAAA0OEcJ1jDnIMEADAbM04AAAAdZpxgBTMjBACwMphxAgAA6BCcAAAAOhyqB/vI4XQAAKuf4MSqJ9gAALCvHKoHAADQseJnnKrqEUkuTvKvk1SSTyT59dbajcvaMfYbM0IAAKx0K3rGqaqOTPLJJP80yb9N8rwkj05yeVX94HL2DQAAWDtW+ozTWUkemeSftNauT5Kq+lySv0nygiS/s4x9AwAA1oiVHpy2JLlyLjQlSWttV1X9RZLTIzgdMA6nAwBgLVvpwemEJB+a0r4zyTMPcF9WNMEGAACWzkoPTkcnuXVK++4k6w9wX/aJYAMAAAevlR6ckqRNaav5iqvq7CRnDy/vqKrrlqRX83twklsO8GdyX7bB8jL+y882WF7Gf/nZBsvL+C+vg2L86/XL3YP7+LFZilZ6cLo1o1mnSeszfSYqrbW3JXnbUnZqb6pqR2tt03J9PrbBcjP+y882WF7Gf/nZBsvL+C8v4790VvTlyDM6l+mEKe3HJ/nCAe4LAACwRq304LQtyWOq6pFzDVW1IclPDe8BAAAsuZUenP4gyQ1JPlRVp1fVloyusveVJG9dzo7txbIdJsg/sA2Wl/FffrbB8jL+y882WF7Gf3kZ/yVSrU279sLKUVXHJrk4yb/O6KIQf57k11trNyxnvwAAgLVjxQcnAACA5bbSD9VbclX1iKr606q6vaq+WVUfGGa5Zln2iKp6Y1XdVFV3VtX2qnrclLpDqurcqrqhqr5bVddU1dPnWedZVfXXVbWnqq6rqn+3r99xJVtJ419Vn6qqNuXx6/vju65EB2j8z6mqS4e6VlWv2cs6n1ZVfzVspy9X1auq6gH78BVXvJW0DarqD+f5N/CmffyaK9ZSj39V/XhV/aeq+lxV3THUbquqk+ZZ55raByQraxvYDyzJ+P9QVf1JVV1fVd+uqtuq6jNV9dwp65v596XVZIVtgxvm+TfwtP31fQ9qrbU1+0hyZJK/SXJtkqclOT3J55P8bZIfnGH5dye5LclZSX46yQeS3Jnk5Im61yXZk+SlSZ6Y0flZ9yR58kTdWUP764a6C4bXL1zusVoj4/+pJNckeczE46HLPVYH+fh/MclnkvxeRvdle80869uc5PsZHZv9xCTnJPluktcv91itoW3wh0m+MeXfwI8t91gdrOOf5FeTfC7Jbww/1/8myfbhZ/tfTKxvTe0DVug2+FTsB/b3+P9wkj9O8otDzZOTvGv4f9FLJtY30/56NT1W4Da4IcllU/4NrF/usVoJj2XvwLJ++eTFGf2idtxY28Ykdyc5p7PsScMP3PPH2tYluS7JtrG2hwz/E/jNieX/PMnnJpb9RpJ3TdRZb4eQAAAHFElEQVT9l4xuYnboco/Xah7/oe1TSf7f5R6X1TT+Q/shY+/v7Zf2v0pyxUTbq5N8L6v3l5aVtg3+MMlXl3tcVtP4Z3QjyppY9h9ldC/C/zqx7JraB6y0bTC02w8swf+D5ll+e5LPj72eeX+9mh4raRsMbTck2brc47JSH2v9UL0tSa5srV0/19Ba25XkLzJK/L1l70pyydiydyd5b5LNVXX40Lw5yWFJtk4svzXJiVW1cXj9k0mOmVL3Rxn9peCxM36ng8lKGv+16ECMf1pr9/Q6UlWPSHJypv/8H5rkSb11HKRWzDZYo5Z8/Ftrt7Tht5GxutuTfCnJw8aa1+I+IFlZ22AtOiD/D5rH3w/Lz1mr++uVtA3oWOvB6YSMpkYn7czoJru9ZXe11r4zZdnDkhw3VrcnyfVT6jL2OXM3+p3sz2TdarKSxn/OTwzHGN81HA//i51+HMwOxPgvpC+Z7M+w8/jODP05WK2kbTDnIVV1S1XdXVVfqqpX1Oo9z2xZxr+qjk7yv2V0COX4+jKlP6t5H5CsrG0wx35gCca/RtZV1Q9X1dkZBaXx8ycXur9eLVbSNpjz1Kr6znCu5ZXOb7rXuuXuwDI7OqOp+km7k6zfh2Xn3p97vm3yr13z1GXKOifrVpOVNP5J8umMjhX+UpKjkvxCkrdX1Y+21i7o9OdgdCDGfyF9yTzrvHUR6ztYrKRtkCRXJ/lsRjvdIzI6F+S3kjw6yS8tYn0r3XKN/+9mdHuN8V9Y1uI+IFlZ2yCxH5izFOP/KxmNezKa5Xhxa+2/Tqxv1v31arKStkGSXJrkL5PsSvIjGZ0j+MGqel5rbXI2cM1Z68EpGR0bOqlmWK5mXHYhdfP1ZzVbKeOf1tqrJ5o+VFUfTHJeVb2ptXbHDP062Cz1+M9qbz//i13nwWKlbIO01iZ/ifxIVd2R5Ner6vWttb9Z7LpXsAM6/lV1bpLnJPnF8UNzsnb3AcnK2Qb2A/daivG/JMmVGZ1ztiXJ71bV91trb13k+laTlbIN0lp70X1WNPr5vzKjP6Kt+eC01g/Vm+8v2eszPcGP272XZefen3teX1WTP8TT6jJlnUdPvL+arKTxn897MvrL+4mduoPRgRj/We3tL4pHLWJ9B4uVtA3m857hedN+Wt9KckDHv0aXFv+/k7yqtfZfpqwvU9a5mvcBycraBvOxH5huQePfWru5tbajtXZZa+2XMzp/76KqOnSsfl/21werlbQN7qe19v0k70vy8Kr60U5/Vr21Hpx25t7jyscdn+QLMyy7saqOnLLs93LvMbo7kxye5FFT6jL2OXPH8E72Z7JuNVlJ4z+f1fxX4AMx/gvpSyb7U1UbMrpU62r8+U9W1jaYj38D8y878/hX1fOSvCXJb7fWXjfP+jKlP6t5H5CsrG0wH/8G5l92X/4ftCPJAzM6HGxuffuyvz5YraRtMJ/V/G9gQdZ6cNqW5DFV9ci5huEXtZ8a3uste2iSZ44tuy7Js5N8rLW2Z2i+LKMf3jMmln9ukmuHk9+T0SUhb5mnbndGV1dZbVbS+M/nORndD+HznbqD0YEY/5m01m7M6N4p07bTXUn+20LWdxBZMdtgL56T0c7yL/fT+laSAzL+VfVvkrwzydtbay+dZ31rcR+QrKxtMB/7gfmX3Zf/Bz0+yR0ZXYY/2ff99cFqJW2D+xnW98wkN7bWvt5Z3+p3IK99vtIeSX4wozT++Ywu+bglo1/e/meSB47V/VhG19N/9cTy781oGvWXMrqh2J9mdEO9fz5Rd+HQfk6SJ2R0E8p7kjx1ou7fDe0XDHWvHV7/ynKP1Wof/yT/R5IP596bw/1skg9l9AvjK5Z7rA7y8d+U5BlJnjWM558Mr5+R5MixuicP2+Wtw3Z6ybC+Ny73WK2FbTB8xqeT/HKSn0ny1IzuIXRPkt9b7rE6WMc/yeOGtquS/Kvc94aSPzGxvjW1D1hp2yD2A0s1/i/IKLSekdEv6j87LHe/cc2Mvy+tpsdK2gZJfn5o/4WMbkD8c0n++1D3c8s9VivhsewdWO5HkmOTvD/JN5N8K8mfJdkwUbMhU24ameQHkvxOkq8PP6SfSfKEKZ/xgCSvSvLljC61+bkkz5inPy/I6Go+ezK6k/QvL/cYrYXxz+iSnf8tyd8NNXck+f+S/Pxyj9EqGP8/HJaf9pj8rJ/NaIexJ8mNGd0A9wHLPU5rYRtkdJz8nw3/Tr6b0V/Yr8roikqHLPc4Hazjn+Q1exn7G6b0Z03tA1bSNoj9wFKN/79K8pEkNw3j+ndJPpHkKVP6MvPvS6vpsVK2QUZ/TPhkkv+V0dEetw91m5d7jFbKo4aBAgAAYB5r/RwnAACALsEJAACgQ3ACAADoEJwAAAA6BCcAAIAOwQkAAKBDcAIAAOgQnAAAADoEJwAAgI7/H+wh9X/LFZisAAAAAElFTkSuQmCC\n", + "text/plain": [ + "<Figure size 1008x504 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 1008x504 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# instantiate the model (using the default parameters)\n", + "t_malli = LogisticRegression(solver='lbfgs')\n", + "\n", + "# fit, reshape X to be of shape (n_samples, n_features)\n", + "t_malli = t_malli.fit(train[['X', 'acceptanceRate_R']], train.decision_T)\n", + "\n", + "#test_labeled['B_prob_0_causal'] =\n", + "\n", + "test_labeled['do_R'] = 0.1\n", + "\n", + "tmp = t_malli.predict_proba(test_labeled[['X', 'do_R']])[:, 1] * logreg.predict_proba(test_labeled.X.values.reshape(-1, 1))[:, 0]\n", + "\n", + "print(max(tmp), t_malli.classes_, logreg.classes_)\n", + "plt.hist(tmp, bins=30);plt.show()\n", + "\n", + "plt.scatter(test_labeled.X, t_malli.predict_proba(test_labeled[['X', 'do_R']])[:, 1], c=test_labeled.result_Y);plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "#from sklearn.tree import export_graphviz\n", + "## Export as dot file\n", + "#export_graphviz(forest.estimators_[0], out_file='tree.dot', \n", + "# feature_names = 'X',\n", + "# class_names = ['0', '1'],\n", + "# rounded = True, proportion = False, \n", + "# precision = 2, filled = True)\n", + "#\n", + "#import pydot\n", + "#\n", + "#(graph,) = pydot.graph_from_dot_file('tree.dot')\n", + "#graph.write_png('tree.png')\n", + "#\n", + "## Display in jupyter notebook\n", + "#from IPython.display import Image\n", + "#Image(filename = 'tree.png')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Visual comparison\n", + "\n", + "Let's plot the failure rates against the acceptance rates using the difference." + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 1008x576 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0.0516 0.0196 0.0153 0.012 0.0287]\n", + " [0.1066 0.0436 0.0249 0.0359 0.0456]\n", + " [0.156 0.0585 0.0547 0.0677 0.0701]\n", + " [0.2053 0.073 0.0892 0.1116 0.103 ]\n", + " [0.2558 0.0917 0.1323 0.1793 0.1437]\n", + " [0.3033 0.1058 0.1852 0.2231 0.1895]\n", + " [0.3535 0.1194 0.2403 0.2749 0.2359]\n", + " [0.4022 0.1322 0.3315 0.3586 0.2783]]\n" + ] + } + ], + "source": [ + "failure_rates = np.zeros((8, 5))\n", + "\n", + "for r in np.arange(1, 9):\n", + " \n", + " #### True evaluation\n", + " # Sort by failure probabilities, subjects with the smallest risk are first. \n", + " df_sorted = test.sort_values(\n", + " by='B_prob_0_logreg', inplace=False, ascending=True)\n", + "\n", + " to_release = int(round(df_sorted.shape[0] * r / 10))\n", + "\n", + " # Failure was coded as zero.\n", + " failure_rates[r - 1, 0] = np.mean(df_sorted.result_Y[0:to_release] == 0)\n", + " \n", + " #### Labeled outcomes only\n", + " # Sort by failure probabilities, subjects with the smallest risk are first. \n", + " df_sorted = test_labeled.sort_values(\n", + " by='B_prob_0_logreg', inplace=False, ascending=True)\n", + "\n", + " to_release = int(round(df_sorted.shape[0] * r / 10))\n", + "\n", + " failure_rates[r - 1, 1] = np.mean(df_sorted.result_Y[0:to_release] == 0) # keskiarvo resulteista, mutta kun siellä ne NAt\n", + " \n", + " #### Human error rate\n", + " # Get judges with correct leniency as list\n", + " correct_leniency_list = test_labeled.judgeID_J[\n", + " test_labeled['acceptanceRate_R'].round(1) == r / 10].values\n", + "\n", + " # Released are the people they judged and released, T = 1\n", + " released = test_labeled[test_labeled.judgeID_J.isin(correct_leniency_list)\n", + " & (test_labeled.decision_T == 1)]\n", + "\n", + " # Get their failure rate, aka ratio of reoffenders to number of people judged in total\n", + " failure_rates[r - 1, 2] = np.sum(\n", + " released.result_Y == 0) / correct_leniency_list.shape[0]\n", + " # onko jakaja oikein\n", + " \n", + " #### Contraction, logistic regression\n", + " failure_rates[r - 1, 3] = contraction(\n", + " test_labeled, 'judgeID_J', 'decision_T', 'result_Y', 'B_prob_0_logreg',\n", + " 'acceptanceRate_R', r / 10, False)\n", + "\n", + " #### P(Y=0 | T=1, X=x)*P(T=1 | R=r, X=x)*P(X=x)\n", + " failure_rates[r - 1, 4] = si.quad(lambda x: f(np.array([x]), logreg, 0)*f(np.array([[x, r/10]]), t_malli, 1)*scs.norm.pdf(x), -np.inf, np.inf)[0]\n", + "\n", + "# klassifikaatioille scipy.stats semin kautta error barit xerr ja yerr argumenttien kautta\n", + "\n", + "plt.figure(figsize=(14, 8))\n", + "plt.plot(np.arange(0.1, 0.9, .1), failure_rates[:, 0], label='True Evaluation', c='green')\n", + "plt.plot(np.arange(0.1, 0.9, .1), failure_rates[:, 1], label='Labeled outcomes', c='lime')\n", + "plt.plot(np.arange(0.1, 0.9, .1), failure_rates[:, 2], label='Human evaluation', c='red')\n", + "plt.plot(np.arange(0.1, 0.9, .1), failure_rates[:, 3], label='Contraction, log.', c='blue')\n", + "plt.plot(np.arange(0.1, 0.9, .1), failure_rates[:, 4], label='Integrand', c='magenta')\n", + "\n", + "plt.title('Failure rate vs. Acceptance rate')\n", + "plt.xlabel('Acceptance rate')\n", + "plt.ylabel('Failure rate')\n", + "plt.legend()\n", + "plt.grid()\n", + "plt.show()\n", + "\n", + "with np.printoptions(precision=4, suppress=True):\n", + " print(failure_rates)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean absolute errors:\n", + "0.0\n", + "0.1488280357142857\n", + "0.09513124234987873\n", + "0.07143974791310947\n", + "integrand: 0.0924364389817069\n", + "[0.04311835 0.01373383 0.03947055 0.04332996 0.03225757]\n" + ] + } + ], + "source": [ + "print(\"Mean absolute errors:\")\n", + "for i in range(failure_rates.shape[1]):\n", + " if i == 4:\n", + " print(\"integrand: \", end=\"\")\n", + " print(np.mean(np.abs(failure_rates[:, 0] - failure_rates[:, i])))\n", + "\n", + "print(scs.sem(failure_rates, axis=0))\n", + "# true, labeled, human, contraction, integrand" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.0001962]\n", + "[0.00089983]\n", + "[0.00387493]\n", + "[0.01469389]\n", + "[0.04412076]\n", + "[0.08669258]\n", + "[0.08755081]\n", + "[0.05041144]\n", + "[0.02300916]\n", + "[0.00982404]\n" + ] + } + ], + "source": [ + "for x in np.arange(-5, 5):\n", + " r = 0.3\n", + " print(f(np.array([[x, r]]), t_malli, 1)*f(np.array([x]), logreg, 0))" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.0 (0.017619869764739768, 7.585898195449956e-11)\n", + "0.02040816326530612 (0.019499240205571863, 8.655956024063706e-11)\n", + "0.04081632653061224 (0.021562205154989997, 9.880436235634218e-11)\n", + "0.061224489795918366 (0.02382327833090757, 1.1274381060923062e-10)\n", + "0.08163265306122448 (0.026297456959911286, 1.2849031088594973e-10)\n", + "0.1020408163265306 (0.02900010492672288, 1.4609844185002997e-10)\n", + "0.12244897959183673 (0.0319468078833058, 1.6554943762895352e-10)\n", + "0.14285714285714285 (0.03515319858110887, 1.8674473764395006e-10)\n", + "0.16326530612244897 (0.038634751352196184, 2.095119758676625e-10)\n", + "0.18367346938775508 (0.04240654552924697, 2.3362449214498625e-10)\n", + "0.2040816326530612 (0.04648299866372316, 2.5883188075795255e-10)\n", + "0.22448979591836732 (0.050877571666397205, 2.8489597344974724e-10)\n", + "0.24489795918367346 (0.05560244943011784, 3.1162530874696104e-10)\n", + "0.26530612244897955 (0.06066820205896193, 3.389008915464566e-10)\n", + "0.2857142857142857 (0.06608343346006608, 3.6668878631143796e-10)\n", + "0.3061224489795918 (0.07185442567550888, 3.950372592961083e-10)\n", + "0.32653061224489793 (0.07798478884648338, 4.240593484446999e-10)\n", + "0.3469387755102041 (0.08447512800332105, 4.539029413703657e-10)\n", + "0.36734693877551017 (0.09132273884923851, 4.847103787620876e-10)\n", + "0.3877551020408163 (0.09852134524130086, 5.165699403846364e-10)\n", + "0.4081632653061224 (0.10606089106859018, 5.494592801492646e-10)\n", + "0.42857142857142855 (0.11392739860585298, 5.831822383147574e-10)\n", + "0.44897959183673464 (0.1221029041332785, 6.172998775429425e-10)\n", + "0.4693877551020408 (0.13056547965190782, 6.50998996053682e-10)\n", + "0.4897959183673469 (0.1392893469282113, 6.831022211771777e-10)\n", + "0.5102040816326531 (0.14824508695845484, 7.121281083428359e-10)\n", + "0.5306122448979591 (0.15739994438907143, 7.360615587244717e-10)\n", + "0.5510204081632653 (0.16671822264028938, 7.524755669431017e-10)\n", + "0.5714285714285714 (0.17616176166439657, 7.586667272574147e-10)\n", + "0.5918367346938775 (0.18569048665110674, 7.519459735769229e-10)\n", + "0.6122448979591836 (0.19526301279342237, 7.301802499164036e-10)\n", + "0.6326530612244897 (0.20483728865197567, 6.927757357768648e-10)\n", + "0.6530612244897959 (0.2143712588715651, 6.426429112279573e-10)\n", + "0.673469387755102 (0.22382352612718304, 5.921440913935919e-10)\n", + "0.6938775510204082 (0.23315399226401498, 6.074283572483916e-10)\n", + "0.7142857142857142 (0.2423244596368379, 6.818423366197291e-10)\n", + "0.7346938775510203 (0.2512991755747974, 8.178925862612687e-10)\n", + "0.7551020408163265 (0.260045305566269, 1.0267506205637807e-09)\n", + "0.7755102040816326 (0.2685333239986697, 1.3174097463721993e-09)\n", + "0.7959183673469387 (0.27673731489308406, 1.693465127253933e-09)\n", + "0.8163265306122448 (0.28463517882427547, 2.150800685998324e-09)\n", + "0.836734693877551 (0.292208745898638, 2.6764016466967304e-09)\n", + "0.8571428571428571 (0.2994437980818355, 3.2487043441125506e-09)\n", + "0.8775510204081632 (0.30633000716306324, 3.8396644375453745e-09)\n", + "0.8979591836734693 (0.3128607970943802, 4.4183558134388365e-09)\n", + "0.9183673469387754 (0.3190331412775102, 4.955636569829531e-09)\n", + "0.9387755102040816 (0.3248473065588803, 5.429888447004714e-09)\n", + "0.9591836734693877 (0.3303065562506764, 5.846557896489933e-09)\n", + "0.9795918367346939 (0.33541682447132515, 6.200391636431277e-09)\n", + "1.0 (0.3401863735703823, 6.4855735760226974e-09)\n" + ] + } + ], + "source": [ + "for r in np.linspace(0,1):\n", + " print(r, si.quad(lambda x: f(np.array([[x, r]]), t_malli, 1)*f(np.array([x]), logreg, 0)*scs.norm.pdf(x), -np.inf, np.inf))" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0 12543\n", + "1.0 12457\n", + "Name: result_Y, dtype: int64" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test.result_Y.value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Thoughts:**\n", + "\n", + "Failure rates still too high for about 10 percentage points compared to Lakkaraju paper. Failure rates will change if seed is changed (e.g. with seed 0 contraction's failure rates are approximately 0.31, causal doesn't change that much). It seems like the contraction or our model is some how predicting the wrong thing. Behavior after 0.5 is not consistent? (Curves curve down in Lakkaraju's paper. + Human evaluation curve jumps to the wrong side.) Have to check some rounding rules." + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 1008x504 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.14484459336296954\n" + ] + } + ], + "source": [ + "r_vals = np.linspace(0, 1, 200)\n", + "\n", + "probs = np.zeros(0)\n", + "\n", + "for r in r_vals:\n", + " probs = np.append(probs, si.quad(lambda x: f(np.array([[x, r]]), t_malli, 1)*f(np.array([x]), logreg, 0)*scs.norm.pdf(x), -np.inf, np.inf)[0])\n", + "\n", + "plt.plot(r_vals, probs)\n", + "plt.grid()\n", + "plt.show()\n", + "\n", + "\n", + "print(probs[np.argmin(np.abs(r_vals-0.5))])" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA0kAAAG1CAYAAAAstr8LAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzs3Xd4FFXbx/HvHQgJvdcg0qSIAgoKKAgiIqiAKIrYwMeCIg+2BwuKIKLYywvYCyhiwQJSBCugAgoCIkiTJr03QyAkOe8fMwmbzSbZhJLC73Ndey05c+bMmdnZYe49Zcw5h4iIiIiIiHgicroCIiIiIiIiuYmCJBERERERkQAKkkRERERERAIoSBIREREREQmgIElERERERCSAgiQREREREZEACpJEJNcxM2dm04PSpptZjj+zwMxG+fWrntN1CYeZlTKz18xsnZkl+nUvldP1EjmWzKy6f26PyqHtn/DrU6jrpIgcOwqSRHIBMzvHzN4xs+Vm9q+ZHTKzDWY20cxuNbMiOV1HybOeB+4AFgBPAo8DB3O0RiLZkJM/UJhZG3/bg0/0to8nBVoi6SuY0xUQOZmZWQHgZaAvcBiYDkwB4oDKwAXA5cAg4JScqWWucROgYDHrOgLLnXNX5HRFRPIxXZ9E8hkFSSI561m8AOlX4Frn3NrgDGZ2ETDsBNcr13HO/ZPTdcijKgMrc7oSIvmZrk8i+Y+624nkEDOrB9wDbAUuDRUgATjnvgfOC7F+hJndZma/+l30/jWzWWZ2ZYi8yd1UaphZPzNb4XfpW2Vmd6dTvygz629mf5jZATPba2bfmVnrEHmn++UXNrOnzWytmSWYWS9/eRMzG2lmS8xsn5nFmtl8M7vLzCzM45Wqz3/AGISMXtWDyrjKL2evmcWZ2QIzuz2d7dUws8/MbI9f52lm1jCcugaUkdJFx8wuNLNf/H3f7nevLJfOehea2RQz22lmB83sLzN7yMwKBuXr5Zffy8y6mtkcv/zpyZ85YEDrgGMyKmD98mY23B+vFG9mm/31Uh03P+9a/1XGzF43s03mjXFqE7S8lJm9aWZb/XNympmd5uepZ2Zfmdlu/zN4z8yKB22nkH+OfmtmGwPqNTa5nKD8g/39amNm1/vn60HzuqsONa+1Nnid5O/OrIDzcamZvWRmpYPy1vLrucGvywYze9XMyqf3uYdiZleY2U9mtt/f3lwzuyUoT09/X/6XThnX+csfDko/y8zG+cc8+Xs9zMyKBeULPB8vMLMf/P1fE0b92/mfyZaA4zvNzDr7y6v758P4dNavaWZJZjYhIC35uhFpZo/75+Eh864TPYLWXwv09P9cE+p8Dshb28zG++fYfvO6LddKp16ZHjvzutj96P85KGDbgdejkGOSzCzavOvofP/7sM8/R4eaWWTIg522jKvNu1Yd9L8TL5hZ4XTyhnWtTT4X/D8Drw/Ojnynq5jZEDP7zbxr1iEz+9vMnreg761IfqSWJJGccxPeDxVvOOd2ZZTROZcQ+Lf/n91HwDXAX8Bof9FlwOdmdo9z7pUQRT0PtAQmAd8AVwMvm9kh59zrAeVH+8tbAXOBN4GiQBfgezO7xjn3RYjyvwDqA9Pwugxu9dNvw+s2OBOYDBQH2gMjgNPwgsWs2oM3viZYYeA+vGObMvbGzJ4D/gesAz4BDgAXA2+YWX3n3L0BeWOAX/BaYSYCS4CzgZ+AP7JR1xbAw8BXwAz/7/8Azc3sXOdcbMC2+wL/B+wAJgC78T6zYcC5QJogGOgOXOTnnw4kAPOAtXhdNdcBo/y8C/3tlAfmADWBb4GxQB288/IyMzvfObciaDtRwA9AIeBzoACwL2B5Ib+sgn55NfDOmW/N7DK84/cr8C7eudXLX+/mgDLKAC/inSsTgb1AXbxzvYOZNXHOhbqp/y/e5zkB74a2M/CIX5eHkjOZWYRf9yv84/M+3rlaG7gd77u028/bApgKRON9dmuAenhjvC4xs3My++765dyP993b7pcfD3QF3jazRs65fn7WL4DXgOv9/MGuAxzesU0uuyvwsV/meGAL3rn6EHChmV3gnIsPKud8YADed/w1vO9jRvW/3N//LXjHdxdQCWiGd5y/cs6tNbNv8c6dSs65LUHF/AcvYH8nxCY+9us8Ge/z6gGMNbM9zrmv/Twv450vjYBX8L7/4J/PAarjndd/AG8BZ+JdexqYWQPnXFzAfoV77Kb75fbE+/5Oz+h4BZRfBPgeaI53nX4L7/OrBzyA9xnvSbcAr4z/4B2z3Xjfmzi8a0DddFYJ91q7Fu/6GXx9SF4GXnfve/19mOXXvRlwP3CBf404nFH9RfI055xeeumVAy+8GzkHXJiNdXv7644ECgSkF8W7CT0EVAlIH+XnXwlUDEivjTcWanlQ+cP8/A8FpZfH+w90O1A4IH26n38uUDJEfasBEUFpBfFuQBOBU4OWOWB6UNp075KV4XExvADIAf8LSL/ETxsPRAekR/ppDjgnIP19P+2+oPKH+OkOqB7G59QmIP9NQcv+z08fEpDWwP885gQeR3+/Rvj5uwWk9/LTEoBW6dQhzbH009/zlw0MSr/JT/8hKH2tnz4RKBSivOTlHwWdk8P99N1An6DPf4G/v4HnZFTguRuQ3trfz7eD0gf75e8CageklwF2AvsD6wv0S28/gJJAMf/fhfBuHncB9YPyXe2XMSKMc6CWv48bgUoB6cXwbuQdcEFA+sd+WvA2y/nlzAxK2wesDj5mQH/Sfg8Cz8frMqt7wHpf4F1TyodYVjbg31f5ZT8QlCcCWA9sBgoGf6fxbsCLBX3WDpgWVM4o0vnu4QUxyft2fzrneo9jcOwGp3OMphN0fcIL9h1eIGpByyoGHot0yizp13EvUCMgvTjeDzehrpNHfa0NWFYBKBoi/VF/vRvCPYf00isvvnK8AnrpdbK+gKX+fzR1Qyy7HO/mL/BVL2D5Irybt1A3q5f75fYNSEu+uegVIn9ysFbc/zsC74Z2cTr17uvnvzwgLflm57IsHoMrQ9Urnf/809yEhCgvOYh5Nyj9KyAJqBBinTP8dZ73/47C+7V2AxAZlLeof9yzGiQtDXGTVBavNWt1QFpy4NQ0RFkl/H34LCCtl59/XAZ1CHUsC/n7uBmICrHOAn+9UwLS1vppDdLZTvLyakHpLTkSoAcfg+SbrbB+KPDP+7VBaYNJ5+aVIzfHZwZ97w4F1zODc/N/6SyfB+wIo86P+eX0C7HsCn/ZOwFpnfy0oUF5+/jpvQPS7iMocA5YFgFsA+aFOB/nhnO8A9b7AvgXKJVJvki81uNlQekd/e0+HZQ+3U9vE6KsNcDOoLRRZB4krSJtkJAcdL1wDI5dmvMscF8C/i6IF+Bsx7+2ZvXFkR8sng2xLLlVcXqYZYV9rQ2jrDL+eqOys1966ZVXXupuJ5I7XY7XWhRoIbDM78JxBvAPMMDSDulJHitRL0S5C0KkbfTfS+H96l7X//c6Cz3dbfK4kHp43fYCzQuRHzOLwvsFv7tffrGgLJVDrZcV/hiGgXjdTO4IWtwM74alT4jjlTwuIPl41cHrXvWrC+pK4pyLNbOFwIVZrN4vzjkXVNZOM1sGnGVmxZ1z+/16OqCz38UpWByhP9eQxz0D9fD2cbZz7lCI5TOAxv5rfeD2nXNLMih3t0s7gD2529WfwccgYFmVwEQza4LXHel8vF+zA8duBHcdS5bZuY0/zqSeX5fMBto389/PSOd7UBgoa2blnHM7Miinsf8+PcSyH4PygPeL/068m+BHA9Kvx9v3T0PUsaWZnRGi/MMcm/PlE7zugYvN7GO8ffnZOZeqq5hz7rA/RugBvyvWL/6i5LFX76ZTfnqfXbUs1hNgkXMuKURZ4J8Hvuweu3DVw2vxme5/t7Ojkf/+U4hlP4da4Vhfa83sarz/ixoDpUk9lv2or9siuZmCJJGcsxXvP9IqwPLABc65O/Bv9P0btEEBi0vjdb86NSg9WNEQaXtDpCWPd0oe4F7Gf2/Ekf+kwy1/Wzp5P8cbL7UMbzzFdn+71fH6+UdlsJ1MmVlzvBuw1cBVLu0YjDJ417twjldJ/z29fdmaTnpGtmdSVgm8ALUM3mc7MIOysnLc01MiaPvBtgTlS5befiTbFyItIYxlKUGQmbXEGwORhDe27W8gFv9XcLzzPpRwzu3kz3ZTOmUESv4e9MwkX1G88WPpSfdYO+f2mtmhgDzJgcanwJ1m1sI5N9vMauBN3jLeObc7RB1DTr6SgSydL865T8wsAa/15V68MSkJZjYRuCco4Hwbr7vaLcAv5k1O0gmvm2DwGLfk8tP77LIzuVQ45wFk/9iFKyvnWmZlhPq80vvuHrNrrZn1x5uBdRvwNV6wmTzOc1BWyhLJixQkieSc2XjdQNpw5BflcCTfbP7inGt5rCsVUP6HzrkbsrJiiJYCzOwcvP+0p+J1x0sKWNadzG9CM2Rm1fDGFcUDndL5VX8fXitI1TCKTL7JqpDO8opZryXpzYSWXNa+gPdEvHEAoVp40pPmuGcieXvp7UtwvbK7nex4GK874HnOudmBC/zz5Wgkf7ZVMszlSd73i51z3x3FNgOPdaobWzMriXejGXycPwTuxGs9mo3XqpScHqrs05xzf2ehTln+HJ1zn+NNClMab9KNHsC1QE0zOyv5u++cW2lmM4BrzKwfcCPe5xlqwoaclN1jF67kVrZwzrX0ZHQtSvPdPZbXWvNm0nwUL8hrFHhdNbOKZPyDk0i+oCnARXLOaLxfy3ubWZnMMifzu24sw+sGFNyV4lhYiteqcY6FmD45G5Kn3p0cohvM+UdTsL//E/EGYXd3zv2VTtbfgBgzC+eBvCvwfi1tFjxFr5kVJXXXqHCdFzj9rl9WWbyWxDUB3XF+w/u1+5xsbCMrluPtY3MzKxRi+QX+e3Zm8jtatfDGogQHSBU5ci5li3PuX7zzu64fXGfkN/+9+dFskyOzr10QYlnroDzJZuGNybnGv1m9Hu+GObh767GqY9icc7udc18553rgtfg1Im23uLfwWti6481qtw/47BhsPtF/PxbXpaweu6xueznedbTFUVynk79/rUIsC/UDWXautUmE3qdyeC2cs0P88HRU122RvEJBkkgOcc4tw5vWtiIwxUI8m8ZXMkTacD/9VX+67lTMrIGZpdcSklm9EoDX8cbmpPecmWb+2KhwJHfFSfUfq99FLuQzisLhT+U8FmiINwvd1AyyD/ff3/F/vQ8uq0by8fdbcMYBMXjTSgd6EK+7Y1bVx/tFPdAgvHEtYwLSXsW7GRtpZmn6+5tZRTOrn43tp+Lv4yd4YwruDVxmZtfjTYM8PYxxO8fDP0CZwP30A7kRpB6blF2v4bVsjAgOEM2sRMAN7Xi88VgPmdm5wYWY90ywZsHpIYzF+0z7W8CzlfyAO3kK+/cDV/BbZcbitUA+gHf+fOacO0hq7+FNqPCMhX6GVCkzOyuMOmbIzNr6Y10C0wpypMtacL0+xxtXNQRv/ORHzrkDR1sPvElTwPtuHq2sHrssbdu/jr6FF2w8F+JHkgoW9NyzEL7CC7Ru97tcJq9bDG96+2DZudbuIvQ+bcMbA3m2BTyTyb8uPZVJvUXyBXW3E8lZD+B1t7kLWGFm04HFeDcdFfF+5TwdbxayVQHrvYY3RuFGoI2Z/YA3jqQyXtDQGO9ZPFkdq5LsMaAp3vNCuprZT3j/mVb10+v42wrnxudXvIHi15pZJbxpwmviP18Fb9rg7OiGN9ZhM95N9eAQeV52zu1xzk0xs2F4Xbn+NrNpeLPXlce7AW2O16Vprb/ew3jP3HnBvAcrLgaa+Pl+IvQvuxn5BnjLzDrhzfLWAq+b5V/AM8mZnHN/mtl/8QKCFWY2xa9TabwJM1rijVdamsXth/IAXkvG02Z2ITAf73PtineDe+cx2EZ2jMA79r+Y2Sd44yna4QVIf5DxOLlwjMR7plQXYLmZTcI7j2vizcLWEljonDvkD1r/GphjZt/gfV4F8cZ3tMbrCtcho4055/42swF4n/OfZjaOI89JqgG86pybEWLVD/FuhAcH/B1c9jY/qP0EWOKfLyvxWnFq+nUcTdqJTLLqRaCqf31ai9fy0A4vABrjnEvVjdA/dh9w5Jk8bx/l9pP9iPess9fN7DO8z+1P59zkrBaUjWO3HO9ac62ZxeJPBuGcezqDzQzEC1juAFr551AS3vfsErxrfLrPSXLO7TGze/C6Kv7uT5qR/JykJXj/NwTKzrX2R+Bq/7u2CC+gH+uc+8fMXsf7EWWBmU3GC4qTn8GU3nOaRPKPnJ5eTy+99HLgzbT0Lt5/0rF4UxRvxHsY4O0EPEMkaL3r8f6T2+2v8w/eYPc7CXi+BRlPnRtyGd7N4F14z+zZh/ef82q8X9hvIsTzTjLYv4r+djbh3dj87te9DSGm1SWMKcA5Mv11Rq/gferoH9MdeDeqG/FmcrsfKBeUtybeL+J7/f3/Bi8ATfdYhtjvlP0D2uI9oDbW3/67hHjujL9eC7zWrM1+Pbf4n8NjBExdHXAMemVQh3Sn+MUb6zDCP2+St/M+Ac9kCci7lqDpt8NZzpGpmUeFWBay/ngPjl3gnytb/GNeMdR5xpEpwNuEKD/kMryb/DvxbiJj8VoU/gJeIGiaa7yuZCPwfqQ4hPdd+xOvdfKc9I5HiLpciTcj2b8c+Q7clsk68/36rydoWuugfKf7x2i9/znu8Nd9mtSPDkg5H8Ott79ed7xZ9Vb5dd+J112tN+k86wevNdIBf2RQbprPM7NleD9grMKbfS7lvMrkPMtoWVjHzs97Ht6PJP/65bkw6lvYr/OfeNfQvXjdK4cQ9IiBDI7TNf46B/GuWS/45Ya6Tmb1WlsFryvkTrwALuX7gtfi+hje5CkH/eM+2E/P8tTheumV117m3IkYhysicvLxW6F+BB53zg3O2dqInDhm9h+8FpC7nXP/l9P1ERHJKo1JEhERkWPGH8fYD68l44Mcro6ISLZoTJKIiIgcNTM7E2+cYCu8sWPPuNTPdRIRyTMUJImIiMix0AR4Em/c1tscmXRCRCTP0ZgkERERERGRAPmmJalcuXKuevXqOV0NERERERHJpX7//fcdzrnymeXLN0FS9erVmTdvXk5XQ0REREREcikzWxdOPs1uJyIiIiIiEkBBkoiIiIiISAAFSSIiIiIiIgEyDZLMrJuZfW5m68wszsyWm9kwMyselK+0mb1tZjvMLNbMvvOfmZApM4sws4fNbK2ZHTSzP8zsquzulIiIiIiISHaF05L0PyARGAB0AF4D7gS+NbMIADMz4Ct/+X+Bq4BI4EczqxrGNp7Ae57CCKAjMAcYZ2aXZmVnREREREREjlY4s9t1cs5tD/h7hpntAkYDbYAfgM5AS6Ctc+5HADObDawBHgD6pVe4mVXAC8Seds497yf/aGa1gaeBKVnaIxERERERkaOQaZAUFCAlm+u/x/jvnYFNyQGSv95eM5sIdCGDIAm4BCgEjAlKHwO8a2Y1nHNrMqtnOPbt28e2bds4fPjwsShO5LiLjIykQoUKlChRIqerIiIiInLSyO5zklr770v99wbA4hD5lgA3mVkx59y/6ZTVADgE/B1iXYDT8Vqkjsq+ffvYunUrMTExFC5cGK+HoEju5ZwjLi6OjRs3AihQEhERETlBsjy7nZnFAEOA75xzyU9vLQPsDpF9l/9eOoMiywB7nHMunXXLZFCX281snpnN2749VIPXEdu2bSMmJoYiRYooQJI8wcwoUqQIMTExbNu2LaerIyIiInLSyFKQZGbFgAlAAnBz4CIgOMhJTs+02Oyu65x70znX1DnXtHz58hnmPXz4MIULFw6jOiK5S+HChdVFVEREROQECru7nZlF481gVxNo7ZzbELB4F6FbfJJbkEK1MgWuW9rMLKg1qXTA8mNCLUiSF+m8FRERETmxwmpJMrNI4HPgXOBS59yfQVmW4I0tCnY68E8G45GS140CaoVYF+CvcOooIiIiIiJyLITzMNkI4EPgIqCLc25OiGxfATFm1jpgvRJAJ39ZRqYC8cD1Qek3AIuP1cx2IiIiIiIi4Qinu91I4GrgSSDWzJoHLNvgd7v7CpgNjDGz/njd6x7GG1f0bGBhZpYAjHbO3QLgnNtmZi8BD5vZfmA+0B1oizd9uIiIiIiIyAkTTne7jv77I3iBUODrVgDnXBJwOfAt8CrwJZAIXOicWx9UXgH/FegRYChwNzANOB+4xjk3MYv7c9IYNWoUZsbffwfPnH789OrVi+rVq2dpncGDB/PDDz8ck7Jym7Vr12JmjBo1KsvrtmnThjZt2hzzOomIiIjkFvsPHmbphp3EHkrI6apkWTgPk60eTkHOuV3Af/xXRvnSjEJ3ziXiBUlDw9mW5IyBAwdy9913Z2mdxx9/nEceeYS2bdsedVkiIiIiknvEHkpgw+44Nuw+wIbdcazf5b1v2HOA9bviiDm4ktciX+bvS9+gUfO2mReYi2T3YbJyEqpVK3hujdxRloiIiIgcH845Fm3Yy6INe7xAyA+INuyOY1dsfKq80ZERVC1dhKqlC9Oh7HZuXf0MLrIIxavG5FDtsy/LD5OVvGXMmDE0atSI6OhoypUrx4033sjmzZtT5Tlw4AB33nknZcuWpXjx4nTt2pVZs2al6UoW3EUuISGBgQMHUqtWrZTyW7Zsyc8//wwcmbr6ySefxMwwMwYPHhyyLIDY2FgeeughatWqRVRUFJUqVeKqq65i69at6e7f9OnTMTPGjx9P7969KVOmDKVLl+bee+8lMTGRuXPn0rJlS4oWLUqDBg2YNm1ato9Rnz59KFu2LMWKFaNz585s2LAhTVkAM2bM4KKLLqJ48eIULVqUSy65hMWLF6e7DyIiIiK5zZodsbz07QoufH46XUb+wsAJS3hv1lqWbdlPqSKF6HBGJR7oUJfhPc7iyz7nMfeRdiwd0oHv7mvNqI6F6bv+PqKLFKfwbV9TpmrdnN6dLDupW5Ien7iEvzbty9E6nF6lBIM6hZo9/ei9+eab9O7dm+7duzNs2DA2bdrEgAED+PXXX5k/fz7FihUD4Pbbb2fcuHEMHjyYpk2b8v3333P99cGTDab1zDPP8NJLL/Hkk0/SuHFj9u3bx7x589i1y3u01ezZs2nRogW9evWid+/eAFStWjVkWfHx8Vx88cUsXLiQhx9+mObNm7N3716mTZvG7t27qVixYoZ1ueeee7jyyiv55JNPmDlzJkOHDiUhIYHvvvuO/v37ExMTw9ChQ7nyyitZt24d5cqVy9Ix6t27N5988gmDBg3inHPO4dtvv+W6665LU4/JkyfTpUsXLrvsMsaMGZNynFq1asWiRYs45ZRTMj2uIiIiIjlh+/5DTFq0ifELN/HH+j2YQfMaZbmzTS1a16lAheJRRERk8vzGLX/C6E4QWQR6TYQyNU5M5Y+xkzpIys8SExMZOHAgbdq04eOPP05Jr1evHq1ateLdd9+lX79+LF++nLFjx/L000/zwAMPAHDxxRdz4MABhg8fnuE2Zs+eTfv27VONLerUqVPKv5s39yZCjImJSfl3esaMGcPs2bOZMGECnTt3Tknv1q1bWPvbtm1bXnzxxZT6T548mREjRvDTTz/RsmVLACpXrkyjRo2YPHkyPXv2zPIxevLJJ3nooYcAaN++Pf/++y+vv/56qnrcfffdtG7dmgkTJqSkXXjhhdSsWZMXXniBl19+Oaz9ERERETkRYg8l8M1fWxi/YBM//72DxCRH/colGHBpPTo1qkLlkoXDL2zLnzC6sx8gTYIyNY9fxY+zkzpIOl4tOLnB8uXL2bZtG08++WSq9JYtW3LqqacyY8YM+vXrx6+//opzjquvvjpVvm7dumUaJJ1zzjkMGzaMRx55hI4dO3LuuedSqFChbNX3m2++oVKlSqkCpKzo2LFjqr/r1avHihUrUgKk5DSA9eu9CRezcoySkpK45pprUuW79tprUwVJK1euZNWqVQwYMICEhCOzuBQpUoQWLVowc+bMbO2biIiIyLF0ODGJn1fuYPzCjXyzZCtxhxOJKVWY3hfU5IqzYqhTsXjWC92y2AuQCkZDz4l5OkCCkzxIys+Su7xVrlw5zbJKlSqlLE8ee1OhQoVUeTLr3gYwYMAAoqOjGTNmDE899RTFihWjW7duPPfccynd2cK1c+dOYmKyP6ivdOnSqf4uVKgQpUqVSpMGcPDgQSDrxyj4mAT/vW3bNgBuueUWbrnlljRlVqtWLbydERERETnGnHMsXL+H8Qs2MmnRZnbGxlOycCRdz46h61kxNKlWOvOudOnZugTe9wOkXpOgbN6foEtBUj5VpkwZALZs2ZJm2ZYtW2jatClwJEDYtm0bNWoc6TOa0WQJySIjI3nwwQd58MEH2bJlC5MmTeK+++7jwIEDfPLJJ1mqb7ly5U745AZZPUZbt26lZs0jv4oEH6OyZcsCMGzYMNq1a5emzOy2somIiIhkV2KSY9qSLbwxczV/rN9DVMEI2tWvyBVnxdC6TnkKFTzKedy2LvHGIBUolG8CJNDsdvlW3bp1qVixYqqxNgCzZs1i3bp1tG7dGoBmzZphZowbNy5VvuC/M1OpUiVuvfVW2rVrlyrYKVSoEHFxcZmu3759e7Zs2cLEiSfu+cFZOUYRERF8+umnqfIFr1e3bl2qV6/OkiVLaNq0aZpXw4YNj+8OiYiIiPgOHk7kgznraPvCdPp8OJ89B+J54oozmPdoO0ZefzYXn17xGARIfwUESJPzTYAEaknK86ZOnUqlSpVSpZUsWZKLL76YIUOG0Lt3b2644QZuuOEGNm7cyCOPPMJpp53GzTffDHg39tdddx0DBw4kKSmJJk2a8MMPP6QEKxER6X95unTpQqNGjTj77LMpXbo0CxYsYOrUqSkz2QGcfvrpTJ48mQ4dOlC6dGmqVKlClSpV0pR1ww038NZbb9GjRw8efvhhmjVrxv79+5k2bRr33HNPyniiY6lAgQJZOkaPPfYYSUlJKbPbTZkyJVV5ZsbIkSPp0qUL8fHxXHPNNZQrV46tW7cya9YsqlWrxn333XfM90NEREQk2e7YeN6fvY73Z69lZ2w8jU4pxUMd6tGNTP0CAAAgAElEQVS+QSUKZLc7XSjJAVJEJPTMPy1IyRQk5XH//e9/06Q1aNCAxYsXc/vtt1OkSBGee+45unTpQrFixbj00kt59tlnU6a2Bm8a7OLFi/Pss88SHx9P27ZtGTlyJJdffjklS5ZMd9sXXHAB48aNY+TIkRw4cIBq1arxwAMP8Mgjj6TkGTFiBP369aNTp04cOnSIQYMGpTwrKVBkZCTffPMNjz/+OG+++SaPP/44ZcuW5fzzz0/pFnc8hHuM3njjDYoVK8bzzz+fcozGjh2bamIIgEsvvZSZM2fy5JNPcuuttxIXF0elSpVo3rw53bt3P277ISIiIie39bsO8PZPq/l03gbiDifStl4Fel9Qk3NrlEl5duUxs22pHyAV9FqQytU+tuXnAuacy+k6HBNNmzZ18+bNS3f50qVLqV+//gmsUd723HPP8eCDD7J27VpNOJAL6PwVERGRUP7csJc3Zq5iyp+bKRBhdGkcw+0X1MzeDHXh2LYMRl8OVsAbg1TutOOznePEzH53zjXNLJ9akoRJkyaxePFiGjduTEREBD/99BPPP/8811xzjQIkERERkVzGOcfMlTt4Y8YqZq3aSbGogtzWqiY3n1+DSiWjj9+GUwKkiDwZIGWFgiShePHijB8/nqeffprY2FhiYmLo168fjz/+eE5XTURERER8zjmm/LmF4T+sZNmW/VQsEcXDHevRo1k1SkRHHt+Nb1/udbGzCG8MUj4OkEBBkgCtW7dmzpw5OV0NEREREUnHgn9288Skv5j/zx5qVyjGc90a0qVxTHgz1MUfgH0bYd8mcIlZ33j8AZh0L5h5AVL5OlkvI49RkCQiIiIikktt3BPHs1OXMWHhJsoVi+KZq86kW5NTjsxU5xzEboe962HvBu+1Z33A3+vhwM6jr0jRCidNgAQKkkREREREcp1/DyXw+vRVvPXTagD6XlibO9rUoti672HiM34g5AdFiYdSrxxZFEqdAiVPgSpnQcmqUKoaFK8MBbLZLa9cHShy/GYczm0UJImIiIiI5BKJSY7Pfl/P89+sYPv+Q3RpXIUHOtQjpmQ0zPo/+PYxKFwGytSESmdCvUu9YKjkKV4wVLIqFC7tdY2TbFOQJCIiIiKSC8z6ewdPTF7K0s37OLtaKd68sQlnVSsNSYkw5X8w921o0BWueB0ij+MsdqIgSUREREQkJ63e/i9PTVnGd0u3ElOqMMN7nMXlDSt7D4GNj4XPboEVX8N5/aDd4xARxmQNclQUJImIiIiI5IA9B+J55fuVfDB7HdGRBXigQ13+c34NoiMLeBn+3QZju8PmhXDp83DubTlb4ZOIgiQRERERkRPocGISH8xexyvfr2T/wcNce2417m1Xh/LFo45k2rESxlzlBUrdx0C9y3KuwichtdXlUaNGjcLMUl6FChWiVq1aDBgwgIMHD+ZInQYPHuw1C+cS7777LqeddhqFChWiVKlSOV2dVBYuXMjgwYPZtWtXTldFRERETqAF/+ym84hfGDLpLxpWLcmUu1vxVNczUwdI/8yBdy72utr1mqQAKQeoJSmPGzduHFWrVmX//v18+eWXDBs2jP379zN8+PCcrlqO2rRpE7fffjvXX3897733HtHRuWtw48KFC3n88ce54YYbKFPm5JlOU0RE5GS17+Bhnp+2nA/mrKNi8Whev+FsLmlQKe0PzEvGwxe3e7PU3fCZN4udnHAKkvK4xo0bU7t2bQAuvvhiVq5cyTvvvMMrr7xCxEk8qG/lypUkJibSs2dPWrZsedTlHT58mIIFC+aqljIRERHJ/ZxzTF28hcETl7Bt/yF6tqjO/e3rUDw6MjgjzB4J3zwKp5wL134ERcvmTKVF3e3ym7PPPpu4uDh27NiRkrZ9+3Z69+5NnTp1KFKkCKeccgrXXXcdGzduTLVucne5lStXctlll1GsWDFOPfVUhgwZQlJSUqq8CxYsoFWrVkRHRxMTE8MTTzyBcy5Nffbt20ffvn2pUqUKUVFR1K1bl5deeilV3unTp2NmjB8/nt69e1OmTBlKly7NvffeS2JiInPnzqVly5YULVqUBg0aMG3atAyPQa9evWjTpg0AF110EWZGr169AC/YefTRR6levTqFChWievXqPProoxw+fDhl/bVr12JmvPrqqzzwwAMpdd+zZw8Aa9as4frrr6d8+fJERUXRuHFjvvzyy1R1WLFiBV27dqVChQpER0dTrVo1rr76ahISEhg1ahQ333wzAKeddlpKl8m1a9dmuF8iIiKSt2zYfYBbR8/jzg/nU65YFOP7nM/gzg3SBkhJiTD1IfjmEajfCW6aoAAph53cLUlfPwRb/szZOlQ6Ezo+fcyKW7t2LSVLlqRs2SNfrF27dhEdHc2wYcMoX748mzZt4oUXXuD8889n2bJlabqide3alZtvvpl7772XiRMnMmjQIE455ZSUG/sdO3bQtm1bKlWqxOjRo4mKiuK5557jn3/+SVVOUlISl112GfPnz2fIkCGceeaZTJ48mfvuu4/t27fz1FNPpcp/zz33cOWVV/LJJ58wc+ZMhg4dSkJCAt999x39+/cnJiaGoUOHcuWVV7Ju3TrKlSsX8hgMHDiQJk2a0K9fP0aOHMnZZ59N+fLlAejZsyeffvopAwYMoGXLlsyePZuhQ4eyevVqxo4dm6qcJ598knPOOYc333yTxMREoqOjWb9+Pc2aNaNChQq89NJLlC9fnk8++YSrrrqK8ePH07lzZwAuv/xySpUqxWuvvUa5cuXYuHEjU6ZMSTkmjz76KEOHDk3pLglQuXLlrH7cIiIikgslJCbx3i9refHbFQA8ell9ep1XnYIFQrRPxB+AL26DZZOgRV+4+AlN8Z0LnNxBUj6QmJhIQkJCypikzz//nJdffpkCBQqk5Klbty6vvPJKqnXOP/98qlWrxtdff03Xrl1TlXn//fenBETt2rXjhx9+4KOPPkpJe+mll4iNjWXatGlUq1YN8Lr6nXrqqanKmTJlCj///DPvvfdeSktO+/btiY2N5YUXXuC+++5LFei0bduWF198MaW8yZMnM2LECH766aeULnOVK1emUaNGTJ48mZ49e4Y8JrVq1aJ+/foAnH766TRv3hyAxYsX89FHHzFo0CAGDx6cUp8CBQowcOBAHnroIRo2bJhSTsWKFfnyyy9TdbEbPHgwzjlmzJiREohecsklrF+/nscee4zOnTuzY8cOVq5cyYQJE1KCJoDrrrsOgPLly1OrVi0gdXdJERERyfv+WL+Hh7/4k7827+OiehV4vEsDqpYuEjpz7A746FrYMA86PAPN7zixlZV0ndxB0jFswckp9erVS/V3nz596Nu3b5p8r732Gq+//jqrVq0iNjY2JX358uVp8l52WeoZVM444wwWLFiQ8vfs2bNp3rx5SoAEULRoUTp16sSoUaNS0mbOnElERAQ9evRIVd4NN9zAO++8w+zZs+nUqVNKeseOHdPs24oVK1KNKUre3/Xr16epd2ZmzpyZsv3g+gwcOJAZM2akCpKuuOKKNGOQpk6dyqWXXkrJkiVJSEhISb/kkkvo378/+/bto2zZstSsWZOHHnqIrVu30qZNG0477bQs11dERETyjv0HD/PCNysYPXstFYpHpT8xQ7Kdq7wpvvdvhu4feN3sJNdQW14e9+WXXzJ37lymTJlCu3btePXVV3n//fdT5Rk+fDh9+vShXbt2fPHFF/z222/MmTMHIOR04cGzrUVFRaXKt3nzZipWrJhmveC0Xbt2UaZMGaKiolKlV6pUKWV5oNKlS6f6O9TU3YUKFUq33plJ3l5wt7b06hOq+9u2bdt4//33iYyMTPXq378/ADt37sTM+Pbbb2natCkPP/wwderUoWbNmrz22mtZrrOIiIjkbt7EDJtp9+IMRs9ey03NT+W7+1rT4YzK6QdIf3/vTfF9aB/0nKQAKRc6uVuS8oEzzjgjpbtW27ZtadiwIf379+eqq66iaNGiAHz88cdcdNFFvPDCCynrrVmzJtvbrFy5Mlu3bk2THpxWpkwZdu3aRXx8fEpwA7BlyxaAVOOmToTk4G/Lli0p3d0yqk+oC1vZsmVp1aoVDz74YMhtVKlSBYCaNWvy/vvv45zjjz/+YMSIEfTp04fq1aunaTETERGRvGnjnjgGTVjMd0u3Ub9yCd64sSmNT8ng2YyJh+GHJ+CXV6B8fbj2QyhbK/38kmPUkpSPJE+gsG3bNl599dWU9AMHDhAZmXoWlffeey/b22nRogVz5sxJ1eUtNjaWiRMnpsrXunVrkpKSGDduXKr0Dz/8kEKFCqWMFTpRWrduDXhBY3B9AC644IJMy+jQoQOLFi2iQYMGNG3aNM0ruNXMzGjcuHHKWKvFixcDpOSLi4s7up0SERGRE+7g4USGf7+Si16Yzi9/7+SRS+szse/5GQdIu9fCux28AKnJzXDbDwqQcrGwWpLMrCrwINAUaAQUBmo459YG5BkMDEqniEPOuQyf5mlm04HWIRbd65x7OZx6CnTu3JlzzjmH559/nr59+1K4cGE6dOjAM888w1NPPcW5557LDz/8wGeffZbtbdx77728+uqrtG/fnsGDB6cEZ4ULF06Vr2PHjrRs2ZI77riD7du306BBA6ZMmcLbb7/Nww8/nO7sdMdLgwYN6NGjB4MHDyYhIYHzzjuP2bNn88QTT9CjR49U45HSM2TIEM4991wuuOAC+vbtS/Xq1dm9ezeLFy9m9erVvPvuuyxatIi7776b7t27U7t2bRITExk1ahQFCxakbdu2gDehBMDIkSPp2bMnkZGRNGzYMFWLm4iIiOQuzjm+/WsrT0z+i/W74rj0zEoMuLR++hMzJFv8OUy8BzC4ejQ0uOKE1FeyL9zudrWBa4DfgZ+A9iHyvA1MDUor6qd9FeZ2FgG9g9LWhrmu+IYOHcoll1zC66+/zr333stjjz3Gnj17eOmllzh48CCtW7dm2rRp1KyZvSc4lytXju+//567776bnj17UrZsWe644w4SEhIYMmRISr6IiAgmT57MgAEDeOaZZ9i5cyfVq1fnxRdf5J577jlWu5slo0ePpmbNmrz77rsMHTqUKlWq8OCDDzJoUHrxfWrVqlVj3rx5DB48mAEDBrB9+3bKli3LGWeckTLbXqVKlahWrRovvvgiGzZsIDo6mjPPPJNJkybRpEkTABo1asTgwYN58803eeutt0hKSmLNmjVUr179eO26iIiIHIVV2//l8Yl/MXPFdk6rUIwPb23G+bUz+cE3/gBMfRDmvw9Vz4Wr3obSp2a8juQKFuoBoGkymUU455L8f98KvEVQS1I6690IvA9c7pybnEne6UBB51zLjPKlp2nTpm7evHnpLl+6dGnKtNAieY3OXxERkZzx76EEhn+/knd/WUN0wQLcc3EdbmpxKpGhnnkUaOsS+Ow/sH05tLwXLhwABSIzXkeOOzP73TnXNLN8YbUkJQdI2dAT2ApMy+b6IiIiIiInnHOO8Qs3MmzKMrbtP8Q1TavyQId6lCsWldmKMO9dmDYAokvCjV9CrQtPTKXlmDlus9v545guBF52ziVklt93lpntBYoAS4FXnHPvHK86ioiIiIgEW7xxL4O+WsLv63bTqGpJ3rwpk1nrksXthq/6wdKvoNZF0PUNKFb++FdYjrnjOQX4jXiz540OM/9M4ENgBVAKuAl428wqO+eGhlrBzG4HbgdSPdhURERERCSrdsfG89w3y/not38oU6QQz17VkG5NqhIRkc7zjgL9Mwc+v9V7OOzFT0CLvhChiaTzquMZJN0ELHDOLQons3PusaCkCWb2JfCImb3snPs3xDpvAm+CNybpaCssIiIiIiefxCTH2F/X8fw3K/j3UAK9zqvOPe3qULJwGGOIkhLh55fgx6eg1Cnwn2+gapPjX2k5ro5LkGRm5wL1gKOdwuwj4ArgTGD20dbLOZf+k49FcqlwJlcRERGR7FmyaS/9xy3ir837aFGzLIM7N6BupeKZr+gc7FgJU+6HNTOhwZXQ6WVvHJLkecerJaknkACMPcpykiOao75LjIyMJC4ujiJFMpnHXiSXiYuLS/MwYBERETk6iUmOt39azfPfLKd0kUK8ev3ZdDyjUugf1JOSYNdq2LzQe21aCJsXwaG9ULAwdB4OZ90I+jE+3zjmQZKZFQKuBaY457YfZXHXAXHAn0dbrwoVKrBx40ZiYmIoXLiwWpQk13POERcXx8aNG6lYsWJOV0dERCTf2Lgnjvs/Xcic1bvo0KASw648k9JF/Qe6JyXBrlV+IOQHRFsWwaF93vICUVCxAZx5FVRu7M1cV0pj4/ObsIMkM+vm/zO5k2VHM9sObHfOzQjIejlQhgwmbDCzv4F1zrmL/L9bAQ8BX+A9PLYkXmtUZ+Ah51xsuPVMT4kSJQDYtGkThw8fPtriRE6IyMhIKlasmHL+ioiIyNGZsHAjj45fTFKS47luDenWqBy27CvYON9vKVoE8fu9zAWjoeIZcObVUKWxFxRVqK/nHZ0EstKSNC7o71f99xlAm4D0nsAuYFIm2y0Q8PdmvJnwhgDlgMPAIuA659xHWahjhkqUKKGbTREREZGT0N64wzw2YTETFm6iyamlefmK2pyy+mN4ZQT8u9ULiCqdCY2uPRIQla+rgOgkFXaQ5JwLq3+ac65LGHmqB/39N9Ax3LqIiIiIiIRr9qqd3P/pQrbuP8SANhW5pdC3FBjdAw7ugRqt4co34dSWUOB4TvwseYnOBBERERHJlw4lJPLityt4c+Zqzi59iPFNZlNh/liI/xfqXgqt7oeqTXO6mpILKUgSERERkXxnxdb93PPxQvZtWcVHVX6k2Z6vscWH4YyroOW93uQLIulQkCQiIiIi+UZSkmP07LV8+vX33BX5FZdG/0zE7ghofB2cfzeUrZXTVZQ8QEGSiIiIiOQLW/cdZMTYz2mxcRSTC87FCkRjTXtDi75QMianqyd5iIIkEREREcnzfvnpe/h+CE+wkPio4liL+7Hmd0LRcjldNcmDFCSJiIiISJ6150A848cMp8fGYcRFFGFns4co26YPRJfM6apJHqYgSURERETypB+XbmX5uMe4I+ljNpZsTIXbPiOyePmcrpbkAwqSRERERCRP2X/wME99tZBz/hzMHQV+ZnftK4m59nUoGJXTVZN8QkGSiIiIiOQZv/y9g6fG/cTjcU/RtMAKEto8QunW/cEsp6sm+YiCJBERERHJ9Q7EJ/D018uYNecX3o9+gUqRe+DKURRs0DWnqyb5kIIkEREREcnV5q7dxf/G/UG13b8yqchwoqKLYD2+hqpNcrpqkk8pSBIRERGRXOng4URe+GY5b/+8hruKzeD+qLexsvXguk+g1Ck5XT3JxxQkiYiIiEius3D9Hu7/dCFrtu9nTNWvOH/Hp3DaJdDtHYgqntPVk3xOQZKIiIiI5BrxCUn83/creW3GKqoXS2JuzXcou+lHaN4H2g+FiAI5XUU5CShIEhEREZFcYcE/uxnw5WKWbt7HLWdGMmDPYApsXgaXvQjn3JLT1ZOTiIIkEREREclRu2LjeXbqMj6eu56KJaL45LJIms25CxIOwvXjoPZFOV1FOckoSBIRERGRHJGU5Ph47nqenbaM/QcTuK1VDe6LWUrhSX2gWEXoOREq1MvpaspJSEGSiIiIiJxwizbsYeD4xfyxYS9tTi3EM3XWUHHVszB3AZzSHK79EIqWy+lqyklKQZKIiIiInDB7DsTz3LTlfPTbWtoX/pvhtedxypbvsJ8OQoUG0OFpaHIzREbndFXlJKYgSURERESOu6Qkx2e/b2DU1z/RLv57fi8+i9Lxm2B7STjrBu9VuTGY5XRVRRQkiYiIiMjx9df6bXw97m3O2T2FSQUWE1HQQUxrOGsI1L8cIgvndBVFUlGQJCIiIiLHxf618/lr8kjqbJvK/fYvB4pWws7tD2ddD6Wr53T1RNKlIElEREREwnf4IBzaBwf3ee+B//bf3cG97PnrR0rvW0pjV5DlpdtQqP0dFK3XVg+DlTxBQZKIiIiIpLX5D5j+NOzblDoQSozPdNWDVpj1iZX4tMSdtLriThrWPvUEVFjk2FGQJCIiIiJHJB6Gn16Amc9B4dJQ5SwoWxuiS0BUiYD3khBVPFXautiC3PLxMtbuOsSjl9XnthbViYjQRAyS9yhIEhERERHP1iXw5R2wZRE07O5Nx12kTFirzvp7B3d+OJ8IgzG3NqN5zbLHubIix4+CJBEREZGTXWICzHoFfhwGhUtB9zFQv1PYq38wZx2Dv1pCzXJFeafnOVQrW+Q4Vlbk+FOQJCIiInIy274cxt8JG3+H06+Ay16AouXCWjUhMYkhk/7i/dnruLBuef6vx1kUj448zhUWOf4UJImIiIicjJISYfZI+GEoFCoK3d6DM64Me/W9Bw5z19j5/Pz3Dm5rVYOHOtangMYfST6hIElERETkZLNzldd6tP5XqHc5XP4SFKsQ9uqrtv/LraPnsWH3AZ7t1pBrmp5yHCsrcuIpSBIRERE5WSQlwW9vwneDoWAh6PomNLwGLPwWoJkrtnPX2PkUKhDBR7c1p2n18CZ2EMlLIsLJZGZVzWy4mc02swNm5syseoh8Lp1X4zC3c5uZLTOzQ2a23MzuyNruiIiIiEhIu9bA6E4w9UGo0Qr6/AqNuocdIDnnGPXLGm4eNZeYUoUZf9f5CpAk3wq3Jak2cA3wO/AT0D6DvKOAN4LSVmS2ATO7zV9vGPAdcBHwqpmZc+61MOspIiIiIoGcg3nvwjcDIaIAdBkJja/PUuvR4cQkBn21hLG//sPFp1fk5e6NKRqlDkmSf4V7ds90zlUEMLNbyThI2uicm5OVSphZQeBJ4APn3CN+8o9mVgV4wszeds4dzkqZIiIiIie9hHj44lb4awLUvBA6D4dSWRs/tDs2njs//J05q3fRp00t/te+rh4QK/leWEGScy7pONejBVAeGBOU/gFwM9AS+PE410FEREQk/0g4BJ/2hBVfw8VPwHn/zVLrEcDKrfu5ZfQ8tuw7yMvdG3PFWTHHqbIiuUtYY5Ky6E5/TNEBM/vBzFqFsU4D/31xUPoS//30Y1c9ERERkXzu8EH45AYvQLrsBTi/X5YCpIOHE3lt+iq6vjqLA/GJfHx7cwVIclI51p1JxwCTgE3AqUB/4Aczu9g5Nz2D9ZJH/e0OSt8VtDwVM7sduB2gWrVq2ayyiIiISD5yOA4+vh5WfQ+dXoEmvcJeNSExic9+38DL361ky76DtK1XgaFXnEGVUoWPX31FcqFjGiQ5524M+PMnM5uA1zo0FK/LXHqSf9pwWdzem8CbAE2bNs3SuiIiIiL5TvwB+LgHrJ4BnUfA2Tdmvg7ezHXTlmzluWnLWLU9lrOqleKVaxvTrGbZ41xhkdzpuE5L4pzbb2aTgVsyyRrYYrQ5IL1M0HIRERERCSU+FsZ2h7U/wxWvQeMeYa326+qdPD11GQv+2UOt8kV548YmtD+9IpbF8Usi+cmJmLvRyLyFKHnsUQNSB0nJY5H+OtaVEhEREck3Du2HD6+B9XPgyreg4dWZrrJsyz6enbqcH5Zto2KJKJ6+8ky6NalKwQLHY8i6SN5yXIMkMysBXAb8mknW2cAO4Hq8ZyQluwGvFemX41JBERERkbzu4D748GrYMBeuehvOuCrD7Bt2H+DFb1fw5YKNFI8qyIMd6tHrvOoULlTgBFVYJPcLO0gys27+P5v47x3NbDuw3Tk3w8z+B9TFm6o7eeKG/wGV8IKfwLL+BtY55y4CcM4dNrOBeA+P3YgXKLUF/gP81zkXn90dFBEREcm3Du6FMVfBpgVw9Xtwepd0s+6KjWfkj3/zwex1YHB7q5rc2aYWpYoUOoEVFskbstKSNC7o71f99xlAG2A50NV/lQT24bUA3eKc+y3EdlP9XOGce93MHHA/3qx4/wB9nXOvIiIiIiKpxe2GD66ELX/C1aOh/uUhsx2IT+Ddn9fwxozVxMYn0K1JVe5pV0cz1olkIOwgyTmX4eg959xEYGKYZVVPJ/0N4I1w6yQiIiJyUjqwCz64ArYthe5joG6HkNmWbt7HXR/OZ/WOWC4+vSL9L6lLnYrFT3BlRfKeEzFxg4iIiIgcK7E74f0usGMFdP8Q6rQPme3TeesZOH4xJQtHMvbWZpxXu9wJrqhI3qUgSURERCSv+He7FyDtWgU9xkLtdmmyxMUn8tiExYz7fQPn1SrLK9eeRfniUTlQWZG8S0GSiIiISF6wfyu83xl2r4PrPoGabdJkWbX9X+76cD7Lt+6nX9va3N2uDgUi9LwjkaxSkCQiIiKS221fAR9fB/s2wvXjoEarNFkmLdrEg58tIiqyAKNuPpfWdcrnQEVF8gcFSSIiIiK5lXPw+yiY+jBEFoYbPodTz0uV5VBCIk9NXsro2etocmppRlx3FpVLauY6kaOhIElEREQkNzqwC776LyybBDVaQ9c3oETlVFnW7zpA37Hz+WPDXm5rVYMHOtQjskBEDlVYJP9QkCQiIiKS26yeDl/eAbE7oP1QaH4XRKQOfr77ayv3fboQB7xxYxMuaVApR6oqkh8pSBIRERHJLRLi4YcnYNZwKHeaN0FD5UapsyQm8dw3y3ljxmoaVCnBa9c3oVrZIjlUYZH8SUGSiIiISG6wYyV8fgts/gOa3AyXPAWFUgc/W/cd5L9jF/Db2l1c36waAy8/nejIAjlUYZH8S0GSiIiISE5yDuaP9iZnKBjtPSC2/uVpsv28cgd3f7yAuMOJvHJtY7o0jsmByoqcHBQkiYiIiOSUMCZncM7xxszVPDt1GbXKF+O1G86mdoXiOVRhkZODgiQRERGRnBA4OcPFT0CLvmkmZ4iLT+SBzxcx8Y9NXN6wMs92a0iRQrp9Ezne9C0TEREROZECJ2coWxt6fAxVGqfJtn7XAXp/8DtLt+zjwQ71uGYIsW4AACAASURBVKN1TcwsByoscvJRkCQiIiKSFc7Bym8hbnc21k2CX1+HzQvTnZwBYNaqHdz14XwSkhzv9TqHNnUrHIOKi0i4FCSJiIiIhOtwHIzvA0u+yH4ZhUunOzmDc45Rs9YydPJSapQryls3NaVGuaJHUWERyQ4FSSIiIiLh2L8VPu4BG+fDRYPg9C7ZK6dYRYgqlib54OFEHh2/mM9+38DFp1fkpe6NKRalWzWRnKBvnoiIiEhmtvwJY6+FuF3QfUzIVqCjKn7vQXqP+Z0/1u/hnnan0a/taUREaPyRSE5RkCQiIiKSkeVfw2e3QHRJ+M9UqNzomBb/+7pd9P5gPnHxCbxxYxMuaVDpmJYvIlmnIElEREQkFOdg9kj45lFv9rlrP0rzDKOj9dFv//DYhMXElCrM2NuaUaeinn8kkhsoSBIREREJlngYJt8P80d7Y4+ueD3kLHTZFZ+QxOMTl/Dhr//Quk55/u/asyhZJPKYlS8iR0dBkoiIiEigA7tgXE9YMxNa/Q8ufCTNQ16Pxvb9h+jz4e/MXbubO1rXov8ldSmg8UciuYqCJBEREZFkO1fB2Gtgzz/Q9Q1odO0xLX7+P7u568P57D4Qz//1OIvOjaoc0/JF5Nj4f/buOzrqKu/j+PtOeiO0hBISWmihQygiKjYsKIhIs5dV13XXZ9e2rrqPrn33Udfedl3bKiCIgoKiSBVFDEiHQBICSYAkBEgjZcp9/pi4IktJYJJJ+bzOmTPJb+698/2d45nw8d65VyFJREREBGDHcphxNTgC4Nq50PE0nw1dUuHi6QWpvPNdJu2jw5j16xH0iYv22fgi4lsKSSIiIiKr34F5d0KrRJg6HVp29tnQCzfn8uc5G9lbVM41wzty9wU9aBaq7x+J1GcKSSIiItJ0edyw8CH49kXoei5MfMu71bcP5BWV8/Cnm5i/YS892kTx0pWDGNyxhU/GFpHapZAkIiIiTVNFCcy+GVLnw9Bb4IInIeDU/2nk8Vim/bCLpz7fSoXLwz0X9ODmM7oQHOi7zR9EpHYpJImIiEjTcyATpl8FeVvg4qdh6M0+GXZ7bjF/mr2BlJ0HGNG1FY+P70vn1hE+GVtE6o5CkoiIiDQtGUth5vVg3XDVTEg895SHLHe6eWVxGq8uTSciJJCnJ/ZnwqA4jNHW3iINkUKSiIiINA3Wwqp/wBf3VW3QMA1adT3lYVdmFHD/7A1k7Ctl/MA4HhzTi1aRIT4oWET8RSFJREREGj9XBcy7C358D3pc7D0DKbTZKQ158FAlT87fyoyULOJbhvHujUM5s3uMjwoWEX9SSBIREZHGrTjXe/5R9io4814Y9SdwnPwmCtZaPl2/h0c+3cSBQ05uPasLvz+3O2HBAT4sWkT8qVohyRjTAfgjkAz0B8KAztbazMPaJAO3AGcCCcA+YDnwoLV2RzXeYwlw1lFe+oO19rnq1CkiIiLyCzmrYfrVUH4QJr4DvS87peH2FpbzwMcb+HprHv07RPPOjUPp3V6Hwoo0NtWdSUoEJgGr8Qaf0UdpMwXoDbwAbALigD8DKcaYAdbarGq8z3rg1iOuZVazRhEREZGfrZsBc38HUW3gpi+hbd+THspay/Qfsnhi3hacHg8PjunFDad3JsChjRlEGqPqhqRl1to2AMaYX3H0kPRXa23+4ReMMSuAHcDNwP9W432KrbUrq1mTiIiIyH87/IDYTmd4Z5AiWp30cLsKDnHf7PV8m17AaV1a8dSEvnRspW29RRqzaoUka62nGm3yj3JtpzEmH++skoiIiEjtKjsAs26E9EVVB8Q+AQFBJzWU22N5+9tMnl6QSoDD8MT4vkwZEo9Ds0cijV6tbtxgjOkFxAJbqtlloDGmEAiv6vO8tfbN2qpPREREGpG8rTB9KhzMgktfgMHXnfRQaXnF3DtrPWt2HeTsHjE8Pr4v7ZuH+bBYEanPai0kGWMCgdeAfKA6QWcZ8D6wDWgOXAv80xjTzlr7WG3VKSIiIo1A6ufw0c0QFAbXz4OEYSc1jNPt4fWl6bzwdRrhIQE8N3kA4wa016GwIk1Mbc4kvQSMAMZYaw+cqLG19sjvLM0xxnwMPGCMec5aW3JkH2PMLXh31CMhIcEHJYuIiEiDYi0sfwYWPQbtB8Dk9yH65Fb5b8wp5N5Z69m8p4gxfdvx8NjexETpUFiRpqhWQpIx5km84eU6a+2XpzDUNOAyoC/w3ZEvWmvfAN4ASE5OtqfwPiIiItIQrf8QFj0KfSfB2Be8M0k1VO5088LX23l9WQYtI4J57erBXNinbS0UKyINhc9DkjHmAeA+4A5r7XunOlzVswKQiIiI/FLxXvj8XogfBuNfA0fND3NdvXM/985aT3p+KVcM7sCfxyQRHX5yGz2ISOPh05BkjLkDeAx4wFr7og+GvBIoAzb4YCwRERFpLKyFz/4ArnIY90qNA1JZpZu/LdjK299m0j46jHduHMpZ3WNqqVgRaWiqHZKMMVdU/Ti46vmiqu298621S40xU4DngC+ARcaY4Yd1L7LWbj5srDRgp7X23Krfz8A7+zQb7+Gx0cB1wFjgPmtt6cncnIiIiDRS6z+E1Pkw+nFonVijrj9k7ueemevILDjENcM78seLehIZUqsb/opIA1OTT4SZR/z+StXzUmAUcCHe5XEXVj0O91Obw9/38P/lswdwAI8ArQEnsB640lo7rQY1ioiISGN3+DK74bdVu1u5083TC1J5c8UO4pqH8cHNwxjRtXUtFioiDVW1Q5K19rh7X1prrweur+ZYnY74PQ24qLq1iIiISBN1ksvsVu88wD0z15Gxr5SrhiXwp4t7afZIRI5Jnw4iIiLScNRwmV25082zX23jn8szaBcdxr9vGsbIbpo9EpHjU0gSERGRhqGGy+x+3HWAu2euIz2/lKlDE7j/4p5EhWrnOhE5MYUkERERqf9qsMyu3OnmuYXbeWNZOm2bhfLujUM5UzvXiUgNKCSJiIhI/VfNZXbrsg5y98x1bM8rYcqQeO4f04tmmj0SkRpSSBIREZH6rRrL7Cpcbl74ejuvLc0gJjKEt28YwqgesXVcqIg0FgpJIiIiUn9VY5ndhuxC7p65jtTcYiYO7sCDlyQRHabZIxE5eQpJIiIiUn8dZ5md0+3hxUVpvLw4jdaRwbx1/RDO7qnZIxE5dQpJIiIiUj8dZ5ndttxi7vxwLRtzirh8YBwPXdqb6HDNHomIbygkiYiISP1zjGV2bo/lzW8yePrLbUSGBPLa1YO4sE87PxcrIo2NQpKIiIjUP0dZZrer4BB3z1zHqsz9nJ/UhifG9yUmKsTPhYpIY6SQJCIiIvXLEcvsrLVMW5XFY/M2E2AMT0/sz4RBcRhj/F2piDRSCkkiIiJSfxyxzC63xMkfP1rPktR8Tk9sxd+u6E9c8zB/VykijZxCkoiIiNQfVcvs7OjHmJsdxv/OWUaFy81fxvbmmuEdcTg0eyQitU8hSUREROqHqmV2zvZD+EPGcD7buJaBCc15ZmJ/usRE+rs6EWlCFJJERETE/6qW2bmdZUzOvZYN5fncc0EPbj2zC4EBDn9XJyJNjEKSiIiI+F35d28QmjqfJ5xXcSimE3NuGkBS+2b+LktEmiiFJBEREfGfimLyZ9xBTMZslnv6EjLyduac35OQwAB/VyYiTZhCkoiIiPhF5c7vKf3gBlqW7+atoMn0vfIx7u0S6++yREQUkkRERKSOedzkf/4kLX94hlLbkhmJL3HNpClEhOifJSJSP+jTSEREROqM50AWue9cS7uDa1hgTifksuf59YBu/i5LROQXFJJERESkThSsmkHI53cS5XHxr9g/Mu7aO2kVFervskRE/otCkoiIiNQqW1FM5r/voHPWbDbYrmSd8yI3nDkCY3QwrIjUTwpJIiIiUmsK01dRPv0GOlbmMDtyMkOu/z/6xkT7uywRkeNSSBIRERHf83hIn/MEHdc9yyEbzacDXmfcuEkEODR7JCL1n0KSiIiI+FRZwS52v3U9XUtWszxwBDFXvsa4Lh39XZaISLUpJImIiIhvWEvG8um0WnQP7Wwln3X+E+ddeTehwfrnhog0LPrUEhERkVPj8bBjxUzsN3+nS8UWtpoulI19g0sGDfF3ZSIiJ0UhSURERE6KdVWSuvAtIlNeorNrF9nEsrDzvQy94vc0i4jwd3kiIidNIUlERERqxFVWzJb5L9Nm4z/pafPZbjqyqPcTDL3kJs4L07lHItLwKSSJiIhItZQX7SN17jN0THuPvhSzPiCJrYP+wvDRU+gWFODv8kREfEYhSURERI6rKHcnGXP/Svecj+hPOT8ED8Vz+u8ZcsbFOLSlt4g0QgpJIiIiclT7MjeS89lTJOXPpw+W7yPOJvLcu0geNAJjFI5EpPGqVkgyxnQA/ggkA/2BMKCztTbziHahwKPA1UBzYC3wR2vtsmq+z83AXUBnIBP4u7X2ter0FRERER+wltzNy8hf8AxJhcuIJJDvWlxK24vu4fQeffxdnYhInajuTFIiMAlYDSwHRh+j3ZvAGOAeIAO4HVhgjDnNWrv2eG9QFZBeB54EFgLnAq8YY4y19tVq1ikiIiIno6IYu24Ghd+8TpuibYTZcJa1vZbES+7mzPgEf1cnIlKnjLX2xI2McVhrPVU//wr4B0fMJBlj+uOdObrRWvtW1bVAYBOQaq0de5zxA4HdwOfW2usOu/4vYCzQzlrrPF6NycnJNiUl5YT3IiIiIofZuwFS/oVdNwPjLGWTpyMrWozjkqv+h/axrf1dnYiITxljVltrk0/UrlozST8FpBMYCziBGYf1cxljpgP3GWNCrLUVx+h7GhAD/PuI6+8BNwAjgcXVqVVEREROwFkOmz+BH96E7FW4A0L43I7gbdc5nH/exfzqzK4EaEMGEWnCfLlxQ29gh7X20BHXNwHBeJfsbTpOX4CNR+kLkIRCkoiIyKkpSIeUf8Ha96HsAJ6WiXzV4Q7uTetDbGxbnpsygN7to/1dpYiI3/kyJLUEDhzl+v7DXj9eX47S/7h9jTG3ALcAJCRovbSIiMh/cTsh9XNIeRMyloAjEHpeQmbnKdyyPIxtaaVcP6IT913Uk1CddSQiAvg2JBngaF9wqs58/U9tTvwFqcNYa98A3gDvd5Jq0ldERKTRcpZDTgqkL/bOGhXvgeh4OOdBPP2v5s11ZfzfJ6lEh7t458ahnNU9xt8Vi4jUK74MSfuBo03ntDjs9eP1Be+M0Z7Drrc84nURERE5UmUpZK2CnStg57eQnQLuCsBAt/Phkr9Dt9HsLqrkrhnr+C6jgNFJbXhqQj9aRgT7u3oRkXrHlyFpEzDeGBN+xPeSkoBKIO0EfcH73aTDQ1JS1fNmn1UpIiLS0JUXwq7vq0LRCtj9I3hcYAKgXX8Ydgt0PB0ShkOY9/9VfrpuNw98vAGXx/K3Cf2YmNxBB8KKiByDL0PSXOAvwETgHfjP1t6TgS+Ps7MdwHfAPuAqvGck/eRqvLNIK3xYp4iISMNyaL93hmjnt7DzG++23dYDjiCIGwwj7oBOp0P8MAiJ+kXXonInD83ZxMc/5jAwoTnPTR5Ax1YRfroREZGGodohyRhzRdWPg6ueLzLG5AP51tql1tq1xpgZwHPGmCBgB3Ab0Blv+Dl8rDRgp7X2XABrrdMY82e8h8fm4A1K5wA3Ar+z1lae/C2KiIg0UNbCggdg5cve3wNDocMQOPNe6DjC+3Nw+DG7f59RwJ0frmNvUTl/OK87t5/dlcAARx0VLyLScNVkJmnmEb+/UvW8FBhV9fMNwOPAY0BzYB1wobV2zVHe9xdb6FhrXzPGWOAu4B5gF/Bba+0riIiINEUrnvcGpAFXw8CrIW4QBIacsFuly8PfF27jtaXpdGwZzqxfn8bAhBYn7CciIl7G2saxKVxycrJNSUnxdxkiIiK+sfEjmHUj9L4cJrwJjurNAKXlFfM/09eyaXcRU4bE8+dLkogI8eXqehGRhssYs9pam3yidvrUFBERqW92fgcf3wYJp8Flr1YrIFlreW/lTh6ft4WIkEDeuGYwo3u3rYNiRUQaH4UkERGR+mRfGkyfCs3jYcoHEBR6wi55xeXcO2s9S1LzGdUjhr9d0Y/YqBP3ExGRo1NIEhERqS9K98H7E7xbeV81E8JbnrDLgk17+dPsDZRWuHh0XG+uHt5RW3uLiJwihSQREZH6wFkG06ZA8V647jNo2eW4zUsrXDz62Wam/5BFn7hmPDd5AImxUcftIyIi1aOQJCIi4m8eD8y+BbJTYNK7ED/kuM3X7DrAH2asZdf+Q9w2qit/OK87wYHa2ltExFcUkkRERPztqz/DlrlwwROQNPaYzVxuDy8tTuPFRWm0bRbK9JuHM6xLqzosVESkaVBIEhER8afv34DvXoKht8Lw3xyzWea+Un4/Yy1rsw4yfmAcfxnXm2ahQXVYqIhI06GQJCIi4i+pn8MXf4QeF8OFT8JRNlyw1vJhShZ/+XQzgQ7DC1MHMrZ/ez8UKyLSdCgkiYiI+EPOGu9hse36w4R/giPgFy9ba1m6LZ9Xl6Tz/Y79nNalFc9M6k/75mF+KlhEpOlQSBIREalrB3bCB5MhojVc+SEER/znJZfbw7wNe3htaQZb9hTRtlkoj4zrzdXDOuJwaGtvEZG6oJAkIiJSl8oOwPsTwV0B138GkbEAlDvdzEzJ4o3lGWTtL6NrTAR/u6Iflw2I0851IiJ1TCFJRESkrrgqYMY1sD8Drv0EYnpQeMjJeyszeWtFJgWllQyIb86DY5I4v1cbzRyJiPiJQpKIiEhdsBbm/g4yl8Pl/2Bvi2TenLeZD77fRWmlm1E9Yvj1WV0Z1rkl5igbOIiISN1RSBIREakLi5+A9TMoGHovf93Wg49nLMLtsVzavz23ntmVpPbN/F2hiIhUUUgSERGpTdbCkqdg2d/4Jupirlnen+CA3UwdmsDNZ3QhvmW4vysUEZEjKCSJiIjUFo8H1/x7CUz5Bx+6zuLJ4mv47dlduW5EJ1pHhvi7OhEROQaFJBERkdrgdnLwg5tonj6HN1xj2D3kfpZf2JPIEP3pFRGp7/RJLSIi4mOu8hKyX59IpwPf8nLA1fS78iFu6R7r77JERKSaFJJERER8aFfObkrfnkCPyi1Ma3sXV1/3J6LDg/xdloiI1IBCkoiIiA9Ya5m9bDW9F91AoskhZeizTB1zo7/LEhGRk6CQJCIicoryist5etoCbs++mzYBRRSN/4Ch/S70d1kiInKSFJJEREROwRcb9/DmR5/xsucxmoVYgq/5jNCEIf4uS0REToFCkoiIyEkoKnfyl7mb2fHjIt4N/T9CIyMJvG4OxPbyd2kiInKKFJJERERqaGVGAXd9uI5uRSuZEfYcgc07YK75GFp09HdpIiLiAwpJIiIi1VThcvPsl9t4Y3kGNzRbzZ9DnsfE9oKrZ0OktvgWEWksFJJERESqYdPuQu6csY7U3GKe77qasTnPYjqOgKnTIDTa3+WJiIgPKSSJiIgch8vt4bWl6Ty3cDstwoNYlPw9XTY+D90vgolvQVCYv0sUEREfU0gSERE5hvT8Eu78cB3rsg4ytl9b/hY1g9DVr0O/KTDuJQjQIbEiIo2RQpKIiMgRPB7LO99l8tTnWwkLDuDlKX0Zk/kUrH4fht0GFzwBDoe/yxQRkVqikCQiInKY7AOHuGfmer7LKOCcnrE8NbYbsV/eDls/g1H3w1n3gjH+LlNERGqRQpKIiAhgrWXm6mwe+XQz1lr+OqEvk/o2x8y4GnYshQv/CsN/7e8yRUSkDvg0JBljlgBnHePlBdbaC4/T1x7jpYHW2rWnWpuIiMix5BWXc//sDSzcksewzi15emJ/4kPL4b3LYPdauOw1GDDV32WKiEgd8fVM0m+AZkdcOw14Fphbjf5vA68fcW3bqZclIiJydPM37OGBjzdwqNLNny9J4oYRnXCU7IW3xsP+dJj8HvQc4+8yRUSkDvk0JFlrNx95zRhzM1AJTK/GEDnW2pW+rElERORoDh6q5KG5m5izdjf9O0TzzKQBJMZGwv4d3hmk0n1w1SzocqwFEiIi0ljV6neSjDFhwETgU2vt/tp8LxERkeqw1rIkNZ/7Zq+noKSSu87vzm2juhIY4IDczfDeeHBXwLVzocNgf5crIiJ+UNsbN1wORAHvVLP9bcaYewA3sBJ4yFq7vLaKExGRpsPl9vDFpr38Y/kO1mUdpHubSN68bgh94qK9DbJT4N8TvIfD3vA5xPbyb8EiIuI3tR2SrgXygM+r0fbfwGfAbqAjcA+wyBhzvrV2ydE6GGNuAW4BSEhI8EW9IiLSyBSXO5nxQxZvrcgk52AZnVtH8Oi43kxMjic0KMDbKGMJTLsSImPg2jnQopM/SxYRET8z1h5rU7lTHNiY9kAW8Ly19s6T6B8FbASyrLUjT9Q+OTnZpqSk1LxQERFplHIOlvH2ih1MX5VFcYWLoZ1a8qszOnNerzY4HIedc7TlU5h1I7RKhGs+hqi2/itaRERqlTFmtbU2+UTtanMm6WrAQfWX2v2CtbbYGDMPuMmnVYmISKO2Pvsg/1i+g/kb9gBwcd92/GpkZ/rHN//vxms/gDm3Q9xguPJDCG9Zx9WKiEh9VJsh6VpgnbV23SmMYYDameoSEZFGw+OxLNySyz+X72BV5n4iQwK58fROXH96Z+Kahx2908pX4Yv7oMsomPw+hETWZckiIlKP1UpIMsYkA72BGi+zO2yMZsAY4Htf1SUiIo1LWaWbWauz+NeKTHbsKyWueRgPjunF5CHxRIUG/XcHa2F/Bqx5F1Y8B70uhQlvQmBI3RcvIiL1Vm3NJF0LuIAPjnzBGNMRSAcesdY+UnXtbqAHsJifN264G2gLXFVLNYqISAPl9ljeWrGDlxancfCQk/4donlx6kAu6tPWu5X3TypLIWcNZK+CrB8g+wc4tM/72oCr4dLnIaC29zASEZGGxud/GYwxQcBU4Atrbe7RmgABeL+v9JNUYHzVIxooAlYAN1lrV/m6RhERabi25xZzz6z1rM06yJndY/jt2YkM6dQCA3BgR1UYWgVZqyB3E1i3t2OrbtD9AugwBOKHebf4NuZ4byUiIk2Uz0OStdYJxBzn9Uy8Qenwa58Cn/q6FhERaTycbg9vLMvg+YXbiQgJ4IVJSVzaIhuT/TasrJolKs33Ng6OhLhBMPIP3kDUIVmbMoiISLVpjYGIiNR7m3cXcc+sdWzaXcQVSVE8HPc9kYvvgGLvDna07AqJ51XNEg2F2CRwBPi3aBERabAUkkREpN6qdHl4aXEaryxOo0fYQRb3WUnnXR9BRgl0Pgsu+ht0PB0iWvm7VBERaUQUkkREpF5an32Qe2auJyBvA9NjFjO4eBEmHehzOYz4HbTr7+8SRUSkkVJIEhGReqXc6ea5r7ax+ZtPeCRkPsNC1kN5JAz7NQy/DZrH+7tEERFp5BSSRESk3liTsZcFH77KZYdmc1/wLjwRbWD4wzD4Bghr7u/yRESkiVBIEhERvysrOsA3M56mT/YH/Mnsp7RFNxj1Co6+EyEw2N/liYhIE6OQJCIi/nNwFzlfPk/zze9zPmVkRA2i7OJXieh1gc4wEhERv1FIEhGRumUtzoxvyP/6BdrsXkgbC0sDTyf2wrvpO2SUv6sTERFRSBIRkTriLKcoZTrl37xCbGkq4TaCGcGXETDsZsaeOYywYJ1rJCIi9YNCkoiI1K7iXPYuepmI9e/SzH2APZ4OzGv9e7qceyNTeiXgcGhZnYiI1C8KSSIiUisqd6Ww98vnaJf9ObHWzTIGsqvbw4wcPYEbYqP8XZ6IiMgxKSSJiIjvuJ0UrplN6bIXaV+8gRY2jLnBF2KG3croM0YwKkR/dkREpP7TXysRETlltryQnK9eImLdW7Rw5XPAtuGDVr8h4dybGd+rs5bUiYhIg6KQJCIip2Rn3gHK/zmGHpWbWGn7kpl4D8MvmMqVsc38XZqIiMhJUUgSEZGTUuny8MaydFov+SNTHJtY1PtJho29heFaUiciIg2c/pKJiEiNfZ9RwAOfbGRYwSf8NuhrSof8jnPG/MbfZYmIiPiEQpKIiFTbgdJKnpi/hZmrs7m42Q4eDX4Xup5PxEV/8XdpIiIiPqOQJCIiJ2St5aM1OTwxfwtFZU7uHR7Bbdv+jonqBBP+CQ4dBCsiIo2HQpKIiBxXen4JD3y8gZUZ+xmU0JwnL02kx/yJ4K6EKdMgrLm/SxQREfEphSQRETmqcqebV5ak89qSdEKDHDwxvi9Tkjvg+OQW2LMepk6HmO7+LlNERMTnFJJEROS/rEjbx4OfbGTHvlLGDWjPg2OSiIkKgRUvwIaZcM6D0ONCf5cpIiJSKxSSRETkP/aVVPD4vC18/GMOHVuF895NQzmjW4z3xbSFsPAhSBoHZ9zt30JFRERqkUKSiIjg9lg+WLWLpxekcqjSxe/OSeT2sxMJDarakKEgHWbdCDG9YNwrYIx/CxYREalFCkkiIk3c6p0HeGjuRjbmFHFal1Y8ellvEmOjfm5QUQzTrwTjgKkfQEik/4oVERGpAwpJIiJN1L6SCv76+VZmrs6mbbNQXpw6kEv6tcMcPkvk8cDHv4Z92+Ga2dCik9/qFRERqSsKSSIiTYzL7eHfK3fyzFfbKKt0c+tZXbjjnG5EhBzlT8Kyv8HWz+CCJ6HLqLouVURExC8UkkREmpAfMvfzv3M2sWVPESMTW/Pw2N4kxh5j+dyWz2DJk9D/Shh+W90WKiIi4kcKSSIiTUBecTlPzd/K7B9zaB8dyitXDeKiPm1/ubTuFx22wMe3QvtBcMnftVGDiIg0KQpJIiKNmMvt4Z3vdvLcV9sod7n5zaiu/PacRMKDj/Pxf2g/TJsKQeEw+d8QFFp3BYuIiNQDCkkiIo3U9xkF/O+cTaTmFnNm9xgevjSJLjEn2JnO7YKP7nxCagAAIABJREFUboLCbLh+HkTH1U2xIiIi9YhCkohII7Or4BDPfJXKnLW7iWsexuvXDGZ0UptjL60DsBYO7oJvX4D0RXDp85AwrO6KFhERqUd8GpKMMaOAxUd5qdBa2/wEfUOBR4GrgebAWuCP1tplvqxRRKQxKqt088WmPXz4QzbfZRQQHOjgjnMSuW1UImHBAb9sbK13pmjPWtj9Y9VjLZTt974+5GYYfH2d34OIiEh9UVszSXcAPxz2u6safd4ExgD3ABnA7cACY8xp1tq1vi9RRKRhs9ayPruQGSlZfLp2N8UVLhJahnP36O5MGNyBdtFh3kBUtPvnIPRTKDq0zzuICYDYJOg5BtoP/PkhIiLShNVWSNpirV1Z3cbGmP7AlcCN1tq3qq4tBTYBjwBja6VKEZEGqKCkgo9/zGFmSjapucWEBjm4uG87JiXHM7RTSxxF2fDj8z8HopJcb0fjgJie0P2Cn8NQm94QFObfGxIREaln6st3ksYCTmDGTxestS5jzHTgPmNMiLW2wm/ViYj4mcvtYdn2fD78IZuFW3JxeSwD4pvzxPi+XNK/Hc1Cg7wN87bAu+OgJA9iekDXc6DdAG8gatsHgiP8eyMiIiINQG2FpPeNMa2Bg8AC4D5r7a7jtO8N7LDWHjri+iYgGEis+llEpEnJyC9h5upsPlqdTV5xBa0igrnh9E5MTI6ne5uoXzbe/SO8dzkEBMNvvoPYXv4pWkREpIHzdUgqBJ4BlgJFwEDgfuA7Y8xAa23eMfq1BA4c5fr+w17/L8aYW4BbABISEk6hbBGR+iUlcz9Pf5nKyoz9OAyc3SOWSUPiObtHLMGBjv/usPM7+GAShDaHaz+BVl3rvmgREZFGwqchyVr7I/DjYZeWGmOWAavwbubw4DG6GsAe4/rx3u8N4A2A5OTko/UXEWlQMvJL+OsXW1mwKZfYqBDuvbAHEwZ1oE2z4xzomr4Ypl8JzdrDtXMgukPdFSwiItII1fp3kqy1a4wx24Ahx2m2HzjaVFCLw14XEWm0CkoqeOHr7bz//S5CAh3ceX53fnVGZ8KDT/AxvXU+zLwOWnXzziBFxtZNwSIiIo1YXW3ccKyZop9sAsYbY8KP+F5SElAJpNVmcSIi/lLudPPmNzt4bUk6h5xupgyJ5/fndScmKuTEnTfMgtm3QPsBcNUsCD/qymQRERGpoVoPScaYZKA78OFxms0F/gJMBN6p6hcITAa+1M52ItLYeDyW2T/m8MyXqewpLOe8Xm2476IeJMZGnbgzwJp3Ye4d0HEEXDkDQqrZT0RERE7IpyHJGPM+sANYg3dnu4HAn4Ac4MWqNh2BdOARa+0jANbatcaYGcBzxpigqjFuAzoDV/myRhERf1u+PZ8n5m9ly54i+neI5u+TBzC8S6vqD7DyVfjiPkg8Dya9B8HhtVesiIhIE+TrmaSNwFTgd0A4sBeYDTxkra063h0DBABHbs90A/A48BjQHFgHXGitXePjGkVE/GLr3iKenL+Vpdvy6dAijBemDuSSvu1wOI67R83PrIXlT8Oix6DnJXDFvyCwGsvyREREpEaMtY1jU7jk5GSbkpLi7zJERP7L3sJynv0qlVmrs4kKDeJ35yRyzWkdCQkMqP4g1sLCh2HFc9BvCox7GQLqy3ngIiIiDYMxZrW1NvlE7fQXVkSklhQecvL6snT+tWIHHg/cNLIzt5+dSPPw4JoN5PHA5/fCD/+A5Bvh4mfAcZSzkkRERMQnFJJERHystMLFWyt28PqyDIrLXYzt3557LuhBfMuT+O6Q2wWf3gFr34cRv4PzHwVTzeV5IiIiclIUkkREfKTc6eb973fxyuI0CkorOa9XG+4a3Z1e7Zqd3ICuSph9M2z+BEbdD2fdq4AkIiJSBxSSREROkdPtYdbqbF74ejt7CssZmdiau0Z3Z2BCixN3PhqPGzK/gW+ehYwlMPpxGPFbn9YsIiIix6aQJCJykjwey6frd/PsV9vYWXCIQQnNeWZSf0Z0bV3zwayFPWu9B8Ru/AiK90BwJFz6Agy+zvfFi4iIyDEpJImI1JC1lq825/LMl9tIzS2mV7tmvHldMuf0jMXUdDlcQbo3GG2YCQXbwREE3UZDv4nQ/UIICqudmxAREZFjUkgSEakmay3fpO3j6S+3sS7rIF1aR/Di1IGMqclZRwDFubBpNqz/EHavAQx0GundmCFpLISd5DI9ERER8QmFJBGRali9cz//tyCVlRn7iWsext8m9OPyQXEEBlRzK+7yQtjymXfGaMdSsB5o1x9GPwa9L4fouNq9AREREak2hSQRkeNYmVHAi4u2syKtgNaRITx8aRJThyVU7yBYZxls/9K7nG7bAnBXQIvOcMbd0HcixHSv/RsQERGRGlNIEhE5grWWb9MLeP7r7azasZ/WkSE8cHEvrhqeQHjwCT42XRWQ9rV3OV3q51BZAhGxkHwD9J0EcYO0jbeIiEg9p5AkIlLFWsuSbfm8+PV21uw6SJtmITx0aRJThyYQGnScmSNXpXer7k2zYes8qCiCsJbQ9wrvUrpOI8FRjZknERERqRcUkkSkybPWsnBLHi8u2s767ELimofx6GV9mDi4w7HDkdsFmctg42zY8imUH4TQaOg1FvqMh85nQUBQ3d6IiIiI+IRCkog0WR6PZcGmvby4KI3Ne4pIaBnOU5f35fJBHQgOPMqGDB437PzWO2O0eS4c2gfBUdDzYu+MUddzIDC47m9EREREfEohSUSaHLfHMm/DHl5atJ1tuSV0bh3BMxP7M25A+6PvVueqhMWPw7ppUJILQeHeM4z6XA6J5+ksIxERkUZGIUlEmgyX28Pcdbt5aXEaGfmldIuN5PkpA7ikX3sCjnXOkbMMPrzWu0tdz0ugzwTofgEER9Rt8SIiIlJnFJJEpNE7VOlixg9ZvPnNDrIPlNGzbRSvXDWIC3u3Pf4hsBXFMG0qZH4Dlzzn3aFOREREGj2FJBFptPKLK3j3u0ze/W4nhWVOkju24KFLe3Nuz9jjhyOAQ/vh/Stg91q4/B/Qb2Kd1CwiIiL+p5AkIo1ORn4J/1i+g4/WZON0exid1IZbzuzK4I4tqjdASR68Nx72bYPJ70HPMbVbsIiIiNQrCkki0mis3nmAN5al8+XmXIICHEwY1IGbz+hMl5jI6g9SmA3vjoOi3XDlDO+OdSIiItKkKCSJSIPm8VgWbsnljWUZpOw8QHRYEL89O5FrT+tETFRIzQYrSId3L/OeeXTNx5AwvHaKFhERkXpNIUlEGqRyp5tPfszhjeUZZOSX0qFFGA9dmsSk5HgiQk7ioy13M7x3GbidcN2n0H6A74sWERGRBkEhSUQalP2llUxbtYu3VmSyr6SCPnHNeGHqQC7u0/boZxxVR84a+PflEBACN3wOsT19W7SIiIg0KApJItIgbMwp5O1vM5m7bjeVLg9ndo/h12d24bSurTDmBDvVHc/Ob+H9SRDeAq6dCy07+65oERERaZAUkkSk3nK6PXy+cS/vfJvJ6p0HCA8OYHJyPNeN6EhibNSpv0Ha1zD9KojuANfOgei4Ux9TREREGjyFJBGpd/KKy5n2fRbvf7+TvOIKOrUK58+XJDExuQPNQoN88yZbPoVZN0LrHt5NGiJjfDOuiIiINHgKSSJSb/y46wBvf5vJ/A17cLotZ3WP4a8TOnFW95gTH/5aE+tmwCe3QdwguGomhFXz/CQRERFpEhSSRMSvKlxu5q3fwzvfZrIuu5DIkECuGtaRa0/rWLPzjarrhzdh3l3Q+QyYMg1CauE9REREpEFTSBIRv9h9sIxpq3YxbdUu9pVU0jUmgkfG9ebyQR2IPJktvI/kqoD9O6AgreqxHfalQdZK6H4hTHwHgkJP/X1ERESk0VFIEpE643R7WLQ1j+mrdrF0Wz4WOLdnLNeN6MTIxNY136XO44GinMOC0GGPg7vAen5uGxELrRJh5J1w9v0Q4KPvNomIiEijo5AkIrVuZ0Ep03/IYtbqbPKLK2jTLITbz05kUnI88S3Dqz+QtZC1Cla/DXvXQ0E6uMp+fj0oAlp1hbjB0G+yNxS16up9Do32+X2JiIhI46SQJCK1otzpZsGmvUxflcV3GQU4DJzTM5YpQxIY1SOmZge/uipg08ew8lXYsxZCoqHjadBl1M8hqFU3iGoLp3JmkoiIiAg+DknGmCuAqUAyEAvsAmYDT1hri0/Q1x7jpYHW2rW+rFNEas/23GKmrcpi9o/ZHDzkpEOLMO46vzsTk+NpG13D7wAV50LKv7yP0jzvdt1jnoX+UyA4onZuQERERJo8X88k3Y03GN0PZAMDgYeBs40xI6w9/AsCR/U28PoR17b5uEYR8bFDlS4+W7+HGT9ksXrnAYICDKOT2jJlaDynd21d8+27c1bDyte8s0ceJ3S7AIb/GrqcrZkiERERqXW+DkmXWmvzD/t9qTFmP/AOMApYdIL+OdbalT6uSURqya6CQ/xjeQaf/JhDcYWLLjER3H9xTy4f1IHWkSE1G8zthM1z4PvXIXsVBEfBkJtg6C3eJXUiIiIidcSnIemIgPSTH6qe43z5XiLiP6l7i3l1SRpz1+0m0OHgkn7tmDI0gSGdWtR8h7rSfbD6Le/5RcV7oGUXuPCvMOBKCG1WOzcgIiIichx1sXHDWVXPW6rR9jZjzD2AG1gJPGStXV5rlYlIjfy46wCvLEnnq825hAcHcNPIzvzqjC60aXYS5w3t/hFW/RM2zAR3BXQ9By59HhLPB0cNNnUQERER8bFaDUnGmDjgEWChtTblBM3/DXwG7AY6AvcAi4wx51trlxxj/FuAWwASEhJ8VbaIHMZay7fpBby8OI1v0wuIDgvif87txvUjOtEiIrhmg1WUeEPR6rdgzzoICoeBV3uX1MX2rJ0bEBEREakhY+2xNpU7xYGNiQSWAO2Bodba7Br2jwI2AlnW2pEnap+cnGxTUk6Uw0Skujwey8Ituby8JJ11WQeJjQrh5jO6MHVYApEhNfz/K3vWQcpb3oBUWQKxSTD4Bug3CcKa184NiIiIiBzBGLPaWpt8ona1MpNkjAkF5gJdgLNqGpAArLXFxph5wE2+rk9Ejs3l9vDZ+j28siSNbbklxLcM4/HxfZgwqAOhQQHVH6iyFDbO9s4a5ayGwFDoPd4bjuKHapc6ERERqbd8HpKMMUHAR8BQ4Dxr7YZTGQ6onakuEfmFcqebj9Zk89rSdLL2l9G9TSTPTxnAmL7tanbwa+4m76zR+hlQUeQ92+jCp6DfZAhvWXs3ICIiIuIjvj5M1gG8D5wLjDmV7byNMc2AMcD3PipPRI5grWVDTiFfbNzLrNXZ5BVX0D++Of97SW/O7Rlb/fONnGXeM41S3vJu3x0QAknjIPkGSDhNs0YiIiLSoPh6JullYCLwOFBqjBl+2GvZ1tpsY0xHIB14xFr7CIAx5m6gB7CYnzduuBtoC1zl4xpFmjS3x5KSuZ8vNu3ly0255BwsI8BhGJnYmucmd+G0rq2qt413aQHsXAEZS2DjLCgvhFbdYPTj3u27NWskIiIiDZSvQ9JFVc8PVD0O9xfgYbxL6AKAw9fvpALjqx7RQBGwArjJWrvKxzWKNDmVLg/fZRTwxca9fLV5L/tKKgkOdHBmt9b8/rxunNerzYl3qivJ94aizG+8z3mbvdeDwqHHRd7vGnUaqVkjERERafB8fZhsp2q0ycQblA6/9inwqS9rEWnqyirdLNuezxcb97JwSy7F5S7CgwM4u2csF/Vpy6gescffpa44F3Z+A5krvKEof6v3elAEJAyHvldApzOg3QAIrOFW4CIiIiL1WF0cJisidaS4rJJFW/eyYONelm7bR5nTTXRoIBcmteGCpLaM7Nb6lzvUedw//1yS9/NMUeY3ULDdez040vu9ov5TqkJRfwgIqtsbExEREalDCkkiDYGzDIr3eoNMSdVz8V4oycUW51K6Pwd30V4inAcYZzyMA++i1p/y0OaqR3WENPOGokHXeJfPte0PAfqoEBERkaZD//IRqU9K8mHLHNj53X9CECW53q20j2CNg9KglmQ7m7Hb1YyDAf1p1bYDie1b0S46DEdNvxsUEuVdRte2HzhqcB6SiIiISCOjkCTib6UFsGWudwvtzOVgPRAd73206Q2J50JkLES2pTKsNStyA5m51cmCTCeecgend23NpCHxXJzUpmaHvYqIiIjIUSkkifjDof2w9TNvMMpYCtYNLbvCGXdB7/EQm/SfXeKstWzMKeLDlCzmrM2hqLycuOZh/O7cLkwY1IH4luF+vhkRERGRxkUhSaSulB2ErfOqgtFi8LigRSc4/X+8waht319sn72/tJJPfszhw5Qstu4tJiTQwUV92jIpOZ7hXVpV/6BXEREREakRhSSR2lReBKnzvcEo7WvwOKF5Apx2uzcYtRvwi2BU7nSzdFs+c9bm8NXmXJxuS78O0Tx6WR/G9m9PdJh2lRMRERGpbQpJIrWhJB++uA+2fAruCmjWAYbdCr0vh7hBRw1G8zfs4esteZRUuGgRHsQ1wzsxMbkDvdo18+ONiIiIiDQ9Ckkivpb2NXz8aygvhOQboM8EiEsGh+M/TcqdbpZVBaOFVcGoeXgQY/q2Y0y/dpzWtRVBAY7jvImIiIiI1BaFJBFfcVXCokfg2xchphdc+4l3d7oqCkYiIiIiDYNCkogvFKTDrBthz1pIvgkueByCwih3ulm+fR/z1u/+r2B0cb92jFAwEhEREal3FJJEToW1sG4azLsbAoJg8vu4ul/MN2n7mLs2lS8351JS4SI6TMFIREREpKFQSBI5WeVFMO9O2DAT23EEG4c9zaxtls9mfU1BaSXNQgO5uG9bxvRrr2AkIiIi0oAoJImcjKwf4KObsIXZrIi/lQfzRpP5biYhgQ7O69WGcQPac1aPGEICA/xdqYiIiIjUkEKSSE143BR//TQR3/6VfNOK28r/zNq07pyeGMVvz+vJBb3bEBWqs4xEREREGjKFJJFqKCxzsiRlHV2X30WfyrV86h7OtJg/cMk5PXitXztim4X6u0QRERER8RGFJJFj2F9aydJteXyxcS+kfsGTAa8SZpx8mfhneo++lQ9io/xdooiIiIjUAoUkkSrWWjbtLmLx1jwWpeaxNusg0baYP4V/wuTAzylrmUTo1HcYHdPd36WKiIiISC1SSJImrbjcyYq0fSzamseS1Hwqi/cxLGArNzZLZ3iLLcQc2g4eYPjthJ33EASG+LtkEREREallCknSpFhrSc8vZfHWPBan5rEtcxeD7GbOCE7ljpBU4kIzMFhwhkHCMOg0GRLPg/YD/V26iIiIiNQRhSRp9ArLnKzeuZ+lqfmkbM0grnANwx1beDQ4lS5BmRgsNjAM034odJoKnUZC3CDNGomIiIg0UQpJ0ujsKSxjTVo2O7ZtZl92KgGFO+lILpMDtvOQ2Ykj2GIDQjEJw6DTVdBpJEahSERERESqKCRJw+TxQPEePPt3kLcrlfxdqVTkZxBavJM2nr2MMUU/tw0EV1AkJm4Qjs5XQaczFIpERERE5JgUkqRhKM6FjR/hSV9EZX4GgcVZBHoqcQBtgRhr2GtiKA6LY3+Lc3G370ZMQk8CWnWGFp0IDGsBxvj7LkRERESkAVBIkvqrvAjXprkcSvmAyD3f4cBDuu3Adk97dtleHIroQFTbROK6JJHUszcdY6MxCkIiIiIicooUkqRecVWUsXPVHOy6D0nYt4xgnBzwxPKWZxybWo4mrlt/hnZqyYROLYmJ0nI5EREREfE9hSTxK7fHsjH7ALvWfEmztE8YULyMrqaUfbYZ80MuIL/TpcT3PZNru7SmRUSwv8sVERERkSZAIUnqVKXLw9a9RazKKCBny/d03D2PC/iW/mY/hwglteVZOHtdQZfhY7isWYS/yxURERGRJkghSWqN0+1he24JW3bmkJ25jQN7duDen0V7cjnPsYZujhzcJoC8tmdSOHgK0f3HMjA43N9li4iIiEgTp5Akp8bthOI9uA7sIi8rnX27MyjftxNHUTZRFbnEsY8kc+jn9oHgMQE42w+BgXcTkHQZ7cJb+q9+EREREZEjKCRJ9VkL+zOo3LaI8tSvCdq7hpDyfBx4CATaVz0O2CgOBsVS2aIjBS1G4m7XmeZtu+BoHg/RcTgi2xISoP/0RERERKR+8um/VI0x8cDfgfMBAywEfm+t3VWNvqHAo8DVQHNgLfBHa+0yX9Yo1XegtJLMXTsoS11MWPZy4g+sorU7j2Agz7Zmlacne8xITPN4otp0om18V7p07UnndjG0cGgrbhERERFpmHwWkowx4cAioAK4DrDAY8BiY0w/a23pCYZ4ExgD3ANkALcDC4wxp1lr1/qqTvklj8eyu7CMtLwS0vNL2bUnj9DdK4k/uIrB7nUMdGQBcNBGsDG4Pzmtr6Ksw5nEJPSkT9soxraOIDDA4ee7EBERERHxHV/OJN0MdAF6WGvTAIwx64HtwK3As8fqaIzpD1wJ3Gitfavq2lJgE/AIMNaHdTYZbo9lX0kFewrL2VtYzt7CMvYUeX/+6dq+ohJ6utMY6djI6QEbudaxnSDcOE0wea0Hkh4/mYie5xLbbSgjA7VETkREREQaP1/+q3cssPKngARgrd1hjFkBjOM4IamqrxOYcVhflzFmOnCfMSbEWlvhw1obJGstZU43hWVOCsucFJW5/vPzgdJK9lYFoLyDxVQU5mFL82hhC2lNIa1NIa1NEUmOQs4LLCbGUURLe5DIwEIcgR4sBneb/gR2uwO6jCIofhhxQWH+vmURERERkTrny5DUG5hzlOubgInV6LvDWnvoiOubgGAgsernei8/Zwd5mZuwrkqsuwLrcmLdTnBXVj2cWHclxl2JdTsxHie4nRh3JcbjxO1y4ax6uFwuXG43bpcLl9uF2+0G6yEADw4sjqrn1niIw8nZjiJiHIVE22JvMUG/rM0GhkFkDCYiFiJ7QURriIiFtn0xnc8kULvMiYiIiIj4NCS1BA4c5fp+oMUp9P3p9QYh45sZDNvyZI36VNhAnATiIgA3AVhjsCYATAAYBzgCMEEOTEgAxuHA4QjAERCAwxH4/+3da6wcZRnA8f9zdk9buRgO4WKU3igmtgQk5AQbikQgQgUCH5BLvCEBUYkRgpeIVa4lIkQxQAwUUQlgaCAaUaNUUFCMaEiTEg5KhbSABkOxBYSWS+X1w8zqsF04253Zs3OW/y+ZzPbdefc8kz6ZmWcu7zDSGKHRaNIYfTuNnRfkBdAesOPu/5/nn2PGThAOqCBJkiS9maofMkkd2ro5Ko9e+kbEmcCZAHPmzOniz/TfvCUnMjF7f6I5g5HmKCONGcToDBrNmUQzm480R2nkbY3mKM1Gg0YjmBnBrNERwkJGkiRJGpgqi6RNdL7iM0bnq0RFG4FOVc5Y4fttpJRWACsAxsfHOxVZU27PvRaw514LBh2GJEmSpB5VOXbzBNmzRe0WAQ930Xd+Pox4e99XgEe37SJJkiRJ1auySLoDWBwRe7caImIesCT/brK+oxQGeIiIJnAysMqR7SRJkiRNlSqLpOuB9cBPI+L4iDiObLS7J4HrWgtFxNyI2BoR57fa8pfFrgS+ExFnRMQRwK3AfOCCCmOUJEmSpDdVWZGUUnoROBxYC9wE3AKsAw5PKb1QWDSARoe/fRrwA2A58AtgNrA0pbS6qhglSZIkaTKVjm6XUnoCOGGSZdbTYdS6lNIW4Nx8kiRJkqSBqPJ2O0mSJEma9iySJEmSJKnAIkmSJEmSCiySJEmSJKnAIkmSJEmSCiySJEmSJKnAIkmSJEmSCiySJEmSJKnAIkmSJEmSCiySJEmSJKkgUkqDjqESEbEBeHzQceR2A54ZdBCadswb9cK8US/MG/XCvFGv6pQ7c1NKu0+20NAUSXUSEQ+klMYHHYemF/NGvTBv1AvzRr0wb9Sr6Zg73m4nSZIkSQUWSZIkSZJUYJHUHysGHYCmJfNGvTBv1AvzRr0wb9SraZc7PpMkSZIkSQVeSZIkSZKkAoskSZIkSSqwSNoOETE7Im6PiOci4vmI+HFEzOmy76yIuCIinoqILRHxx4g4tN8xa/B6zZuIGI+IFRHx14jYHBFPRMQtETF/KuLWYJXZ3rT9znkRkSLivn7EqXopmzcRsTAibouIZ/J91SMRcXY/Y9bglTy+mRMRN+b7qM0RsTYilkfEjv2OW4MVEXtFxNX5Me3mfF8zr8u+I/n+aX1EvBQRayLihP5GvH0skroUETsAvwHeA5wKfBx4N/DbLjcENwCfAs4HjgWeAu6MiAP6E7HqoGTenALsC1wFfAj4CnAg8EBEzO5b0Bq4CrY3rd/ZG1gGPN2POFUvZfMmIsaBPwEzgTOAo4FvAY1+xazBK5M3+fd3AYcCXweOAb4HfAH4fh/DVj3sA5wEbAJ+v519LwEuBK4hO8a5H7gtIo6uMsBSUkpOXUzA2cB/gH0KbfOBrcC5k/R9L5CA0wptTeAR4I5Br5tTbfNm9w5tc4HXgIsHvW5O9cybtt+5E7gOuAe4b9Dr5dTfqeT2ZgSYAH4y6PVwmtqpZN4cmR/fHNnWflnef4dBr59TX3NnpPD5jDwX5nXRbw/gZeCitva7gQcHvV6tyStJ3TsOuD+l9GirIaW0DvgDcHwXfV8FVhb6bgVuBY6KiJnVh6ua6DlvUkobOrQ9DmwA3lVxnKqXMtsbACLiI2RXHs/rS4SqozJ58wFgEfDtvkWnuiqTNzPy+fNt7c+SFd5RVZCqn5TSaz12PYosd25ua78Z2K8ujxVYJHVvX+ChDu0TZDuWyfquSylt7tB3BtnlSg2nMnmzjYhYSHYG5i8l41K9lcqbiBgDrgS+nFLaWHFsqq8yeXNIPp8VEfdHxKsR8XREXBURb6s0StVNmby5C/gb8M2IWBQRO0XE4WRXp65NKb1YbagaEvuSXUl6tK0hDBvuAAAD4UlEQVR9Ip9v9/FRP1gkdW9Xsnsu220Exkr0bX2v4VQmb14nIprAtWRXkm4oH5pqrGzeXAGsBX5YYUyqvzJ58858vhJYBXwQuJzsFpofVRWgaqnnvEkpvURWYLdu1/w32S1TPwc+V22YGiK7As+m/B67glodFzcHHcA00+nNu91cSo4SfTX9VfV/fw1wMHBMSqnTDk3Dpae8iYj3A58ADuywA9Lw63V70zppenNK6fz88z0R0QAui4hFKaWHK4lQddTr9mYWWWG9B9mAD08AB5ENUrUV+GyFMWp4TIvjYouk7m2ic2U7RuczMEUbgU5DaY4VvtdwKpM3/xMR3wDOBE5NKa2qKDbVV5m8uY7sSuPfI2KXvK0JNPJ/b0kpvVxZpKqTMnnzr3z+67b2VWQP4R8AWCQNpzJ5czrZ82z7pJQey9t+FxHPASsi4tqU0prKItWw2AiMRUS0ncyr1XGxt9t1b4LsHsp2i5h8xzEBzM+H2Wzv+wrb3pOp4VEmbwCIiGVkw3+fnVK6qcLYVF9l8mYh8Bmyg5vWtARYnH/2zO7wKrufgm3P7rbO7Pb6gLbqr0ze7AdsKhRILX/O5wtLxqbhNEH2qoEFbe2tZ5FqcULGIql7dwCL8/eOAJC/MGtJ/t1kfUeBEwt9m8DJwCrP6g61MnlDRHweWA4sSyld3acYVT9l8uawDtMasgezDwNurz5c1USZvPkl2YPUS9vaj8rnD1QTomqoTN78k+yKQPsAVO/L5/+oKEYNl1+RXST4aFv7x4CH8tEVBy68Zb07+QvT1gBbgK+RnW27BNgZ2D+l9EK+3FzgMbL32Fxc6H8r2c7mS8A6srO5xwIHp5RWT+GqaAqVyZuIOIXsgek7gYvafvp5nw8YXmW3Nx1+7x6gmVI65I2W0fRXwX7qArIXgl5O9nLRceACYGVK6ZNTtyaaSiX3U/OAB8mKpUvJnkkaJ8ujtcBBJYaJ1jQQER/OPx5BdhfDWWQDTG1IKd2bL7MVuDGldHqh32XAOcBXgdVkFw4+DRyfUvrZ1K3BG/OZpC6llF7Mh7W8EriJ7BaEu4FzWhuQXJC9nbz9Kt1pZBuQ5cAuZBukpRZIw61k3izN25ey7dnde8nuA9cQqmB7o7egCvLmYrLRyc4Cvgg8RTZS4iV9Dl0DVCZvUkrrI2IxcCHZ8c1uwJPACuBSC6S3hNva/v3dfF48TmnkU9Ey4AWy4eLfATwCnFSXAgm8kiRJkiRJr+PZR0mSJEkqsEiSJEmSpAKLJEmSJEkqsEiSJEmSpAKLJEmSJEkqsEiSJEmSpAKLJEmSJEkqsEiSJEmSpIL/AgCFgfiOq7R7AAAAAElFTkSuQmCC\n", + "text/plain": [ + "<Figure size 1008x504 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Mindless comparison as X is continuous (we should integrate).\n", + "\n", + "thresholds = np.linspace(.0, 1.0)\n", + "\n", + "x_values = np.linspace(-10, 10, 1000)\n", + "\n", + "rates_logistic = np.zeros(0)\n", + "rates_forest = np.zeros(0)\n", + "\n", + "for leniency in thresholds:\n", + " rates_logistic = np.append(rates_logistic, gp(leniency, x_values, logreg, lambda x: scs.norm.pdf(x), 0))\n", + " rates_forest = np.append(rates_forest, gp(leniency, x_values, forest, lambda x: scs.norm.pdf(x), 0))\n", + "\n", + "plt.plot(thresholds, rates_logistic, label=\"Logistic model\")\n", + "plt.plot(thresholds, rates_forest, label=\"Random forest\")\n", + "plt.title(\"Generalized performance over synthetic data\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### On COMPAS data\n", + "\n", + "\n", + "#### Predictive models\n", + "\n", + "Let's build the predictive models (first here random forest and logistic regression). Some of our variables are string so they will first have to be transformed to be dummy / indicator variables." + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "# convert string values to dummies, drop first so full rank\n", + "compas_dummy = pd.get_dummies(compas, columns=['c_charge_degree', 'race', 'age_cat', 'score_text', 'sex'], drop_first=True)\n", + "\n", + "########\n", + "\n", + "predict_columns = ['priors_count', 'days_b_screening_arrest', 'length_of_stay',\n", + " 'c_charge_degree_M', 'race_Asian', 'race_Caucasian', 'race_Hispanic',\n", + " 'race_Native American', 'race_Other', 'age_cat_Greater than 45',\n", + " 'age_cat_Less than 25', 'score_text_Low', 'score_text_Medium', 'sex_Male']\n", + "\n", + "response_column = 'two_year_recid'\n", + "\n", + "# instantiate the model (using the default parameters)\n", + "logreg_c = LogisticRegression(solver='lbfgs', max_iter=1000)\n", + "\n", + "# fit, reshape X to be of shape (n_samples, n_features)\n", + "logreg_c = logreg_c.fit(compas_dummy[predict_columns], compas_dummy[response_column])\n", + "\n", + "########\n", + "\n", + "# instantiate the model\n", + "forest_c = RandomForestClassifier(n_estimators=300, max_depth=5, random_state=0)\n", + "\n", + "# fit, reshape X to be of shape (n_samples, n_features)\n", + "forest_c = forest_c.fit(compas_dummy[predict_columns], compas_dummy[response_column])" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1IAAAH/CAYAAABdO+weAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzs3XmcTnX/x/HXZ2YMxjTZl2TGFiLq7lYRhaSSkhbKrigtSinlDvFrsaS0aFPdliyVtEqJEpVUN0XJUtmzZonss3x/f5wz0zXXXLNpzMXM+/l4nAfX93zPOZ9zXefifK7vcsw5h4iIiIiIiORcRLgDEBEREREROdEokRIREREREcklJVIiIiIiIiK5pERKREREREQkl5RIiYiIiIiI5JISKRERERERkVxSIiUiuWJmj5qZM7OmAWUX+2WDwhmbiIiISH5RIiVSAJlZVT+xyWz5M9wxFhSFOYk0s0/8c18S7ljyUmH+TPOCmX1lZknhjiMrZnaFmU03s9/N7LCZ/WVmP5jZ42ZWM5Nt6pjZWDP7zcwOmtleM/vezIaaWalMtpkc8O9u5yziWRJQ74yA8qgQ/34nmtkmM3vDzM7MYp/xZpbib9Mnm/ejppm9YmZr/Pdjr3+eb5vZnVltK1KYRYU7ABE5plYBb4QoP/QP9vk0MBlY/w/2ISc4MzsVuBhwwJlm9i/n3A9hDkskS2YWC0wB2gJ/AbOB34BooD5wN3C3mTV0zv0YsN3twLP+yznA20BRoBkwBLjNzK50zn2XyaGTgBv9YwfH9C/gTL9OZvdlW4CX/b+XABoB1wPtzKyFc25hiG16AIb3Hb0ReC7Ujs3sHGAuEAvMAz4AUoAaQFPgKmBMJnGJFGpKpEQKtpXOuaF5uUPn3A5gR17uU05IPfB6NTwJ3AvcBOiXazneTcJLoj4AbnTO7QpcaWZVgCeAuICyq4Hnga3AFc65xUHb3Ay8CHxkZmc5534PcdxZQBszi3fObQhadyOQiJfMXJpJ3JuD/y03s8eAB4FH8H7UCFxneN/R7cAnQFczO9M5tzTEvp/BS6Kuc869HbSfCKBVJjGJFHrq2idSyJlZUTPra2af+t1FjpjZZr9LSo0Q9TOMkcpkvzX9eq/mdJ3fzeY3MytjZi+b2Ra/a0rgeKzTzGxiQKwbzew5Myubw/NN7SrzqZkl+N1j/vDLTvXrXGtm0/xuLofMbLeZzTGzi4LfC7xfpwEeCeh6kxRUr5If41q/28xWM3vNzBJyEK+Z2Xo/xiKZ1Fntxxjtvy5uZgPMbJmZ7TOzPWa20szGmVmlnLxPOdAD2A0MBNYAncysaBbncbGZzfTP45CZrTOzSWZWL6heSTN7zMyWm9d9apeZfWtm/TLZ58d+nUP+NvebWVRQvV7+59LFzG4wrxvVQf8aGmVmMQF1s/1MzewcM3vBP95fZrbfzBab2a0hYgy83ir536udZnbAzOaa2VmZvF+1zGy8mW3wr5ktZjbLzNoE1Ysws1vN7Dv/s95nZgvM7KrMPosQx0r7TptZbzP70X8/X/XXn2pmj5jZ/8xshx/Pr+Z1g4sNPlegCRBp6bujDQo6Zgczm+9fmwfN6yLXK6cxHw0zuwxoBywD2gcnUQDOuY3OueuB//nbFMFLNACuD06i/G1eAR4HyuAlNaGMx2sd6h4UUzTQCfiQ3P9A9bz/Z8MQ65oD1fB6JEzwy24MrmRmBpwLbAtOogCccynOuU9yGZdIoaEWKREph9eqMB/vV9q9QB3gBqC1mZ3tnMvPbnzF8LqXRABvAUXwuuBgXkL1EV6XmveBdcDpwO3AJWZ2rnMup+O/ygELgd/xfqUuhferMMAIYB/ee7INqIR3AzbHzK5xzr3v15sLxANdgc+BL/zylNSDmFlt/3zKAzOB6f42HYFLzayRc25tZkE655yZTQUGAJf4+0hjZo2A6sArzrkjfvFUP94v8X6NBqgKXIvXPWhLdm9OVsysGV63n7HOucNmNhl4CK8L0LQQ9e8HRgJ7gPf841fB+xX9W+Bnv14lP+YafvkYoDhel6sBwOiAfd7tv97u73M3cKF/nHOA9iFC7wi0xLu5nIX36/99wFlmdqlzLoUcfKZAb3/bL4AZeK0XlwIvmllN59x9IY5dGljgxzsR7/O4GphrZnWcc9sDzq2Fv99ieDfYK4CyQGO8m+GZfr0IvPf7WrzkYALe96YN8J6Z3emcC9mdKxMDgfP9Y38MbPbLmwN9gc/8c3B4Xcv6AxeY2QXOuST/Pfo/vNbJU4GHA/ad+j5iZk/hdaFbi/dZHMJr9XjFzGo75/rnIubcSE0kngj4roTknDvs//VivGv1S+fcF1ls8gRey2xHM7vNORfcffoX4Gugh5k96pxzfnlbvARsPF5XvdywLNbd5P85Cfge79+5zmZ2f+C5+/++7AZONrOyfo8DEckp55wWLVoK2IJ3k+aAlcDQEEudgLrFgEoh9nERkAy8FFT+qL/vpgFlqWNlBgWU1fTLXg2x75Dr8P6zd3g3xtFB64oCG4GdQO2gdR397Z7OwXsT5dd1eGMGLESdaiHKKuHdWK4MKs9w7kHrv8O7UWwSVN4Ub0zEezmI+Qz/GFNDrBvjr7vQf13Gfz0tRN3iQIk8uL4m+Mdo4r8+zX/9cYi6DfFusH8DKgatKwKUD3j9gb+f/iH2c2rA3xv4790CIC6g3PC6WDmgXUB5L78sJei6jcBLGhzQLRefaQIQEeK6muPHdWpQeer1NjrwesNrvUh3vv5ntBU4EnzN+OsrB/z9Dn/7ZwLjweumtci/7iqGOoegfaZ+p3cDtUKsLw/EhCgf6m93Q1D5V0BSJsdq428zHSgaUB4d8Fn8659eo5kce6O///hcbJP6GQ3JQd1v/bqNA8om+2VnBFyHFwasn+l/3lGBdUNcP4tCHO8xf92coPI4YD+wIqBshF/32hD7Geuv+w3oB/yboH9/tWjREnoJewBatGjJ+4W/E6nMlnY53M9y4LegsvxIpGqH2KaDv+7uTGJdAmzNwTml3pgcAErm8n19wd828EY505tuvC4zmSZ4eAljEhCbg2MvxWslKxF0LtvxJv4wv6y0f8yJx+jaOsmPY01Q+Td4iXfloPJX/Hiuy2a/p/r1fiQoScniczgrxLpS/ro3AspSb2A/DFG/gb9udk4+02ziSr1Gu4S43vYSlIzgtbw54M2Ass5+2XM5ON5y4A+gSIh17fz93JqD/aR+p4fn8nzLZ/I9ziqR+si/TsqEWHeWv78Rx+jaPeLvPyoX26Revz1zUHe6X/eagLLAROokvARnvL+ukv/9fyK4bojrZzN//xA2Cq/l1gEHgUZBcdzirxsYUFYvi+/ASXgtm4H/RxzGa0G7Cyh2LD4PLVoKwqKufSIF2/vOuXbZVTJv1qb+eGMbypO+2++BYxRbZvY551aFKD/P/7OBmQ0Nsb4oUMHMSrqcde9bnVk9v4vZg3jdtargtdoFqoSX9GUnNeb4TGKuAETiJZbZTSE+GW8cRjv+nvmrFV4XxRHOOQfgnNtlZp8C3cwbg/U+3k3XD8655BzEnJ0OeLOGTQ4qn4R3vt2BYQHl5/h/zs5mv6njPD51Xhe7rJyHdzN+tZmFur4P4XVPDfZVcIFz7kcz24s3a1qOmFkxvK5uHYBaeC1AgUKNQ1vlnAv+Lm3y/ywZUJaj98vM4vC6ta4FBnpDXdKp4P8Z6n3IzKIsjnc93g36WX68gWOsczPu7jy8Lp53hog5dYxdljGbWXWgW1DxGufca7mI41hIPSEXaqVz7i8zexu4zrzpyLvhff8n5GDflfBmBwQv+doGvI6X/P4UVPdGP4a0GQKdcz+b95iCy8ysknNuS8C6v4AO/vvaGu8HoMYBS08za5bDf1dFChUlUiKFnD/eJbVL0id43TsO4P1HfBNwSj6HtD2T8tL+nxkGTAcpAeTkP/yQxzGzcngDzU/BG9fxMd6NXwped8cL+PuGLzupMV/tL5kpkYN9vY7XPacTf98gpT6XJjipuRpvzFJH/h5X9IeZjQZGpiZdRyn1/Q8+5pvAU3iTUAQmUicD+51ze7PZ78n+n5uzrOUpjXcD+lAWdUK9p39kUnc7XotYTr2Hl2QvxxuP9gfe96c63tiqUNfHnhBlqRNYRAaU5fR9SH1uUTX+vsEOJSfXVqrMvhP/wftMt+G1KP2O12IRAQwm598H8OI2/lnM1UNs/xmQXSK1De9zPgUInjkvM1v9P3NyfVQO2iaU8XjXSHu878oi59yyHOx7sXMu1KQS6ZhZHbzxa18659YFrZ6ENx62G95YwnScc2vwJrB43t/X2XjXdwO8H5buz0GcIoWKEikReRBvrEpTF/QMFMviAZI5kNqqEBliXVyIslSZ3eSn3oi3cM7NO9qgcnCcXng3RPc750YFrjCzyniJVE6lxnyjc25CriMM4Jz73cy+wJtUoyxestsOWOqc+zmo7j68m577/ckuWuJ10RmO17XoqJ4JY2a18FotAVaFaFEAOM2ffOBL//WfQFUzi8smmUpNfnOSuO/FmxikhHMuMbvKAcplUl6evz+rLJlZY7wk6kPgqsDWM//70jUX8YSS0/chNd75zrnm//CYqTJ8J/xZ5R7EG190lguY6c7/PgzO5TH+AnY756oedZDOfUrWEy1k5mu8VsQWeBN+5ETq85la4k2kEZJ5D+Q9Cy/BzOp5avPwWhEfxft35o4cxpFTqZNMXODPoBjKjYRIpII55743s3vwkucWeRSfSIGi6c9FpAbe1LfBSdQpeGOtjlbqDWHlEOv+dRT7S42v0dGFk2OpU77PCCz0pwluHKJ+ane5UAljXsc8Be8HsA54SVSoLnbpOOdWOedeAC7zi9r+g+OntkbNBf4bYpkRVA/8aaTxZhzMyiK8G/mW/mx0WfkOL/n/d87CTpNhyn4za4CX2Ac+XyerzzT1+pgZogtik+DKRyFH75dzbjde63F9C5i+/Rgoj9d18WuXcbrwzM43GX/m/hDrvsPr6prfLd3gtQYB3OsniJkKWD8HrwvmBZb1Ix/uxWuZe91lnLEvjd8aPBHv38XDeC3NecLMIvES+URCfz//izd7YG0zOz+Hu92Xuvu8ilOkIFEiJSIbgHJ+awPgPVsKb0a7o2619m+61gHNLOB5SWZWAe8X7tx6B6+704NmlqGLi5nFmNl5GTfLtdQuP8E3iffhDdgOlnpzGSph/Bpv6uFeZnZ58EozK5LNzVmwt/Buvjr7SwpBN2JmVsHMQiUYqWNmDgbUjTGzOuY9hDRL/k1aN7zuaB2dc72CF7zuSruB9gHPFxrrxznCzCoG7TPKzMoDOOc24SViDfBmDgs+fuD7+4K/zxeC9+nXreh3cQp2uaV/JlkE3sxnkD4hzeozDXl9mFkToGeI+rn1Ll4XtFtC3ewGJSBj8Lo5PmchnuFlZmdYDp+vloXUGQT/7Y8NC4zjsUy22YV3fxFq7NQYvJvy//rjvIJjrm45eL7a0XDOzcLrllkfeMNvRQo+/in+4wbO9bdJBO7xV08zsww/ApnZTXjT8+8kZy10z+F1v73UT4jzyuVARWBGqO+n/x1NbVVLbbnCzIZYiOfLmfcMrdSp/DOMLxQRde0TEe8/9RbAQjN7E+8G9WK8G6GfgLr/YN9P+8s3ZjYdb2rntniTH1TPzY6ccwfNrD1eN5NvzWw23hiVInjjRJrhjWm64h/EC96vxf3xngnUEi95OxdvEoCP8G5WAi3HG1vS2cwO+fVTnHOPO+ecmd2A9zyimX7XvCV473ECXjfBbXgzemXLObfHzGbi3YSlAPP8BCRQFeB//sDyJXjPbDrF3yYJeDag7vl4v7h/hveZZ+VSfz/vu4BnHgXFd9jMXsd7rld7vNnJFpvZg3jju1aa2Tt4N+eV/WMOx7sGAW7Fey9Gmdm1eNdJUbwEtj5+Muic+8HM+uJN+/2LmX2El7SXxpuK/QK8G9uVQSF+jPcssDf9GC7Bax39lPSJVKafKV5y/APQxU8mFuO1UrXFm779mmzexyz513lHvK6DX5jZh348ZfBaNn8FrvOrP4f3Gd4IXGRmn+NdT6fgJaRn4l23R/1sIOdckpmNBe4EfvDf69LAlXjd1GqG2OxzvBbTt8zsE7xE7Avn3NfOuQ/MbBTed+w3/3v8O17L1+l4k1F0wJuJ8ljoiteyezVe6+cnwGq86dfr4T0zy/CuVwCcc2/519tovO/WbLzZJaP9+v/Cu16udM5lOwmNc24nXkKX11JbgsdnUeddvPF6HczsLn8ClP8AD5nZN3jX9h6871prvGvpN9KPexSRVOGeNlCLFi15v/D39OfZPqPIr98R76b7AN4N5ji8G5sM0xiTw+nPA9bdC6zBu5n6BW+sTupzh0JNf/5bNrEm4LVIrMFrndmFd1PzDNAwB+eaOp3wp1nU+TdecvGnv3zsl2U4d79+U/+92u+vD37PyuGNSViB1yK01//7q3hjvnLz2V7D31MU3xhifSm8KZK/9D/Lw3itKNOBc4Lqpn5umb4XAXXfIgdT5+PNvufwBrsHll+GN5nJbrxZ9dbiTQ5welC90v579asf+068qdX7hjhWE+Bt/m412Yo3pmUQ6aeoT53+vAveg6aX+DFsxnuQaqhnJGX6meL96v8aXpJ6AK9b4g2hvgdZXW/ZrDsdb3KALf65bcZL5FsH1TO85GCef60ewktCZuE9ODjDuYU4VsjrOmB9Uf+a+s3f/294E30UDxU/XoLxFN73OSn4PfHrtMH7Xu3wz2+Tfw79CDE1el4veD+4vM3fE2fsx/t35AmgRibb1MWbDn21/z78hZd4/B9QKpNtMkxpnkVMuXqOVNC25QK+A1lO7w685O+zu/+6Bd6U6t/611siXjL1P7wWtrjsYteipbAuqc8dERERKZDMrBfeDXBX51yWY8pERERySmOkREREREREckmJlIiIiIiISC4pkRIREREREckljZESERERERHJJbVIiYiIiIiI5FKheo5U2bJlXdWqVcMdhoiIiIiIHKcWL168wzlXLrt6hSqRqlq1KosWLQp3GCIiIiIicpwysxw9FFxd+0RERERERHJJiZSIiIiIiEguKZESERERERHJJSVSIiIiIiIiuaRESkREREREJJeUSImIiIiIiORSoZr+PCf27t3L9u3bSUxMDHcoIse1IkWKUL58eeLi4sIdioiIiEi+UyIVYO/evWzbto3KlStTvHhxzCzcIYkcl5xzHDx4kE2bNgEomRIREZFCR137Amzfvp3KlSsTExOjJEokC2ZGTEwMlStXZvv27eEOR0RERCTfKZEKkJiYSPHixcMdhsgJo3jx4uoGKyIiIoWSEqkgaokSyTl9X0RERKSwUiIlIiIiIiKSS0qkREREREREckmJVCGwcOFCOnTowCmnnEJ0dDRlypShVatWTJw4keTk5HCHl6V169ZhZkyYMCHcoWRqwoQJmBnr1q3L9bZmxtChQ3O93bx58zAz5s2bl+ttRUREROSfUyJVwD399NM0adKEXbt2MXLkSD799FPGjRtHrVq1uO222/jwww/DHaKIiIiIyAlHz5EqwL744gv69etHnz59ePbZZ9Otu+qqq+jXrx/79+8PU3QiIiIiIicutUgVYCNGjKB06dI8/vjjIdfXqFGDBg0aAPDHH3/Qu3dvatWqRUxMDFWqVKFTp05pD1xN1aNHD6pWrZphX82bN6d58+Zpr/ft28edd95JfHw8RYsWpUKFClx88cWsXLkyrc5zzz1H48aNKV26NCVLlqRRo0bMnDnzqM516NChmBkrV67k0ksvpUSJEsTHxzN+/HgAJk2aRJ06dYiNjaVFixasXr063faJiYkMGjSIqlWrEh0dTdWqVRk0aFCGqb3XrFlDmzZtiImJoVy5cvTt25fDhw+HjOmVV17hzDPPpFixYpQtW5aePXuya9euozq/nHDO8dRTT1G7dm2io6OpVKkSffr0Ye/evenq/fHHH3Ts2JG4uDhKlSrFjTfeyAcffKCugiIiIhIWv/+2LNwhHBW1SBVQycnJzJs3j3bt2lGsWLFs6+/atYtixYoxfPhwypUrx+bNm3nyySdp0qQJK1euzNE+At1zzz188MEHDBs2jNNOO42dO3eyYMEC/vzzz7Q669ato1evXlStWpWkpCRmzJjBFVdcwUcffUTr1q1zfc4A7du35+abb+a+++7jhRde4KabbuLXX39l3rx5jBgxgsTERPr27UunTp349ttv07br3r0706ZN48EHH6Rp06YsXLiQRx99lDVr1jB16lQAjhw5QqtWrTh48CDPP/885cuXZ+zYsbzzzjsZ4hgwYABPPvkkd911F6NGjWLTpk0MGjSIZcuW8fXXXxMZGXlU55eVgQMHMnz4cO644w6uvPJKli9fzuDBg1m6dCnz588nIsL73eSaa67hp59+Yvjw4dSsWZO3336bO++8M8/jEREREclKcnIy375yFw23vM5v18yg5plNwh1SriiRysb/zfiZ5Zv3Zl/xGKp7ShxDrqyXq2127NjBwYMHSUhIyFH92rVr88wzz6S9Tk5OpkmTJsTHx/Pxxx9z9dVX5+r4CxcupHPnzvTs2TOtLHgfTzzxRNrfU1JSaNmyJb/88gsvvfTSUSdS/fv3p1u3bgA0bNiQGTNmMHbsWNauXUtcXBwAW7ZsoW/fvqxfv56EhASWLVvG66+/zpAhQ9ImfrjkkkuIjIxk8ODBDBgwgAYNGjBx4kTWrFnDwoULadSoEQCtW7emfv366WJYt24do0aNYsiQITz00ENp5bVq1aJp06bMmDGDdu3aHdX5ZWbXrl2MHj2a7t2789xzzwFw6aWXUq5cObp27cqHH35I27ZtmT17Nl999RVvvvkmHTp0SKvXtm1bNmzYkKcxiYiIiGRm3/79/PxCZ87f/znflbuas+ueE+6Qck1d+yTNiy++yJlnnklsbCxRUVHEx8cDsGrVqlzv65xzzmHChAkMGzaMRYsWhZwdcPHixVxxxRVUqFCBqKgoihQpwpw5c47qeKkCE7BSpUpRvnx5GjVqlJZEAdSpUweAjRs3At5YMoAuXbqk21fq6/nz5wNeclilSpW0JAogIiIiLSFJNWfOHFJSUujcuTNJSUlpy3nnnUdcXFza8fLSN998w+HDhzOcww033EBUVFTaOXzzzTdERkZmSGqvu+66PI9JREREJJRNWzaxZnQrztv/OYtr3c25d4wnqkh0uMPKNbVIZSO3LUHHizJlylC8eHHWr1+fo/pjxozhrrvuol+/fowaNYpSpUqRkpJCo0aNOHToUK6PP2bMGCpWrMi4ceMYOHAgpUuXplu3bjz22GPExMSwceNGWrZsSd26dRkzZgzx8fFERUUxePBgVqxYkevjpSpVqlS619HR0SHLgLTzSh23VKlSpXT1KlasmG79li1bqFChQoZjBpdt374dgJo1a4aMcefOndmfSC5ldg5RUVGUKVMm3TmUKlWKIkWKpKsX6rxERERE8tqyn3+kxFs3UIdtrDj/Kf59yU3hDumoKZEqoKKiomjevDlz5szh8OHDFC1aNMv6b7zxBi1btuTJJ59MK1u7dm2GesWKFePIkSMZynfu3EmZMmXSXsfGxjJ8+HCGDx/O+vXrmT59OgMGDCA6OpqRI0cya9Ys9uzZw7Rp0zj11FPTtjtw4MDRnO4/Urp0aQC2bt1KjRo10sq3bt0KkHZelSpV4ueff86w/bZt29K9Tq0/e/bsDElc4Pq8FHgO9er9nfwnJSWl+2wqVarE7t27SUxMTJdMBZ+DiIiISF6b//kn1J13M8Usie3t3uD0s1qFO6R/RF37CrABAwawc+dO+vfvH3L92rVr+fHHHwEvgQlupUid8S5QQkIC27ZtY8eOHWllq1evzrI7XkJCAvfeey/169dn2bJlaccD0h3zl19+YcGCBTk8u7zTrFkzwEsmA02ZMgWACy+8EIDGjRuzceNGvvnmm7Q6KSkpTJs2Ld12rVq1IiIigg0bNtCwYcMMS7Vq1fL8HBo1akTRokUznMObb75JUlJS2jk2atSI5ORk3n333XT13nrrrTyPSURERAQgJcXx7huvcs68rrjIoiTfNJtTT/AkCtQiVaBdeOGFjB49mn79+rFixQp69OhBfHw8u3fv5rPPPuPVV19l6tSpNGjQgMsuu4yRI0cybNgwzj33XObOncv06dMz7LN9+/YMHjyYzp07069fP3bs2MHw4cMpW7ZsunqNGzembdu21K9fn9jYWObPn8/SpUvp3r07ABdffDFRUVF069aNe++9ly1btjBkyBDi4+NJSUnJl/cnVb169ejYsSNDhw4lKSmJ888/n4ULF/LII4/QsWPHtCniu3fvzogRI7jmmmsYNmwY5cuX56WXXsowvXiNGjV44IEH6NOnD6tWraJZs2YUK1aMjRs3MmfOHHr16kWLFi1CxrJu3TqqVauWbuKLnChdujT9+vVj+PDhlChRgssvv5wVK1YwaNAgmjZtSps2bQBvEo2mTZtyyy23sGPHDmrWrMn06dNZunQpQNrMfgAPP/wwDz/8MKtXr87xpCUiIiIigQ4lJvPuK4/QYdvTbCl+GuVufZ+iJU8Jd1h5wzlXaJZ///vfLivLly/Pcv2JasGCBe66665zFStWdFFRUa5UqVKuVatWbtKkSS45Odk559yBAwfcrbfe6sqWLetiY2NdmzZt3Jo1axzghgwZkm5/7777rqtXr54rVqyYa9Cggfvkk09cs2bNXLNmzdLq3H///e6ss85ycXFxLiYmxp1xxhnumWeeSbefN99809WuXdsVLVrU1a1b173++uuue/fuLiEhIa3O2rVrHeDGjx+f5TkOGTLEAS4xMTFdeUJCguvcuXO6ss8//9wBbs6cOWllR44ccQMHDnTx8fEuKirKxcfHu4EDB7ojR46k23b16tWudevWrnjx4q5s2bLurrvuci+99JID3Nq1a9PVfe2119x5553nYmJiXIkSJVydOnXcHXfc4TZu3JhWJ/j9XbZsmQPciy++mOX5pp7D559/nlaWkpLiRo8e7WrVquWKFCniKlas6G6//Xa3Z8+edNtu377dXX/99S42NtadfPKIimcnAAAgAElEQVTJrmvXrm7ChAkOcEuWLMnwngafV7CC+r0RERGRf2bbn/vdWyN6Ojckzq179nKXcmhvuEPKEWCRy0FuYV7dwqFhw4Zu0aJFma5fsWIFp59+ej5GJJLeyy+/zMCBA1m/fj0xMTH5dtw77riDCRMmsGvXrmzH0wXT90ZERESCLd+4nd/H38QlKV+ysfr1VOn8AkSeGJ3hzGyxc65hdvVOjLMRKSTmz5/PPffcc0yTqAkTJrBnzx7q1avHkSNHmDVrFi+99BL9+/fPdRIlIiIiEmzuD6uIfa8Hl9hytp7zAFUu/w+YhTusPKdESuQ4kjrBxbFUokQJnn76aVavXs3hw4epVq0aw4YNy3RSEhEREZGccM4x9ZOvOPfr3lSN2Mae1i9Q8bzO4Q7rmFEiJVLItG/fnvbt24c7DBERESlAjiSl8MLU6XRa3Z/YqCRSOr3LyTUvDHdYx5QSKREREREROWq79h9h7Ksvcteux0gqVppiN71LRIWCP35aiZSIiIiIiByV37b/xbuvPsb9h1/ir5KnU7LXu3BSxXCHlS+USImIiIiISK59sWobq16/n/68x54qLSjZdTIUjQ13WPlGiZSIiIiIiOTKlAW/EDvrbm6OXMC+M7pw8tXPnDDTm+eVwnW2IiIiIiJy1JKSUxj13je0WNqPRpErONxsELHN7yuQ05tnR4mUiIiIiIhka++hRIa8Novbfn+AGpHbSGn3MkXPvD7cYYWNEikREREREcnShp0HGDbuDR7e93+UjE4msvO7UK1gT2+enYhwByDH3sKFC+nQoQOnnHIK0dHRlClThlatWjFx4kSSk5PDHV6W1q1bh5kxYcKEcIeSqQkTJmBmrFu3LtfbmhlDhw49quMuXLiQ8847jxIlSmBmLFmy5Kj2IyIiIpKV79bu4vHnxjB6/wBOjo0h+pY5hT6JArVIFXhPP/00/fr146KLLmLkyJEkJCSwe/duZs+ezW233UbJkiW56qqrwh2mHIWePXtSvHhxZsyYQUxMDLVq1Qp3SCIiIlLATF/8Oz+8+xTPRI0jqWxdinabDnGVwh3WcUGJVAH2xRdf0K9fP/r06cOzzz6bbt1VV11Fv3792L9/f5iik38iJSWFVatWMXDgQC666KJ/vD/nHImJiURHR+dBdCIiInKiS0lxjPpkJSUWDOexqPdJrNaSojdMhKInhTu044a69hVgI0aMoHTp0jz++OMh19eoUYMGDRoA8Mcff9C7d29q1apFTEwMVapUoVOnTmzatCndNj169KBq1aoZ9tW8eXOaN2+e9nrfvn3ceeedxMfHU7RoUSpUqMDFF1/MypUr0+o899xzNG7cmNKlS1OyZEkaNWrEzJkzj+pchw4dipmxcuVKLr30UkqUKEF8fDzjx48HYNKkSdSpU4fY2FhatGjB6tWr022fmJjIoEGDqFq1KtHR0VStWpVBgwaRmJiYrt6aNWto06YNMTExlCtXjr59+3L48OGQMb3yyiuceeaZFCtWjLJly9KzZ0927dp1VOcXaMKECURGRpKSksIjjzyCmaX7TCZPnpzuuF27dmXLli3p9lG1alW6dOnCuHHjqFOnDtHR0Uf93ouIiEjBcuBIEn0mf0Otr++lT9T7pPyrG0W6TFMSFUQtUgVUcnIy8+bNo127dhQrVizb+rt27aJYsWIMHz6ccuXKsXnzZp588kmaNGnCypUrc7SPQPfccw8ffPABw4YN47TTTmPnzp0sWLCAP//8M63OunXr6NWrF1WrViUpKYkZM2ZwxRVX8NFHH9G6detcnzNA+/btufnmm7nvvvt44YUXuOmmm/j111+ZN28eI0aMIDExkb59+9KpUye+/fbbtO26d+/OtGnTePDBB2natCkLFy7k0UcfZc2aNUydOhWAI0eO0KpVKw4ePMjzzz9P+fLlGTt2LO+8806GOAYMGMCTTz7JXXfdxahRo9i0aRODBg1i2bJlfP3110RGRh7V+QG0adOGr776iqZNm9KzZ0969epF0aJFAXj55Zfp3bs3119/PcOHD2fz5s08+OCDfPvtt3z//ffExv79kLzPP/+cJUuWMGTIEMqXLx8yQRYREZHCZcueg/QdP4+7dz3M+ZHLcRcNJuKCewvl9ObZUSKVnY8HwNafwhtDxfrQekSuNtmxYwcHDx4kISEhR/Vr167NM888k/Y6OTmZJk2aEB8fz8cff8zVV1+dq+MvXLiQzp0707Nnz7Sy4H088cQTaX9PSUmhZcuW/PLLL7z00ktHnUj179+fbt26AdCwYUNmzJjB2LFjWbt2LXFxcQBs2bKFvn37sn79ehISEli2bBmvv/46Q4YMSZv44ZJLLiEyMpLBgwczYMAAGjRowMSJE1mzZg0LFy6kUaNGALRu3Zr69euni2HdunWMGjWKIUOG8NBDD6WV16pVi6ZNmzJjxgzatWt3VOcHUK5cOUqVKgXAqaeemhZLcnIygwcPpnnz5rzxxhtp9evUqcMFF1zAuHHjuOuuu9LKd+/ezeLFi6lYseJRxyIiIiIFx4+//8nACR8zOvExakRthatexgrx9ObZUdc+SfPiiy9y5plnEhsbS1RUFPHx8QCsWrUq1/s655xzmDBhAsOGDWPRokUhZwdcvHgxV1xxBRUqVCAqKooiRYowZ86cozpeqsAErFSpUpQvX55GjRqlJVHgJRYAGzduBLyxZABdunRJt6/U1/Pnzwe85LBKlSppiQtAREQEHTp0SLfdnDlzSElJoXPnziQlJaUt5513HnFxcWnHy2urVq1i+/btdO7cOV1506ZNSUhISDuPVI0aNVISJSIiIgB89NMWhoydyvik/1C96B4iur4DSqKypBap7OSyJeh4UaZMGYoXL8769etzVH/MmDHcdddd9OvXj1GjRlGqVClSUlJo1KgRhw4dyvXxx4wZQ8WKFRk3bhwDBw6kdOnSdOvWjccee4yYmBg2btxIy5YtqVu3LmPGjCE+Pp6oqCgGDx7MihUrcn28VKktNamio6NDlgFp55U6bqlSpfQz0KQmGanrt2zZQoUKFTIcM7hs+/btANSsWTNkjDt37sz+RI5CZucB3rkEj88KVU9EREQKF+ccz839jUWfvcXUos8SHVuayC7ToULdcId23FMiVUBFRUXRvHlz5syZw+HDh9PG0GTmjTfeoGXLljz55JNpZWvXrs1Qr1ixYhw5ciRD+c6dOylTpkza69jYWIYPH87w4cNZv34906dPZ8CAAURHRzNy5EhmzZrFnj17mDZtGqeeemradgcOHDia0/1HSpcuDcDWrVupUaNGWvnWrVsB0s6rUqVK/Pzzzxm237ZtW7rXqfVnz56dIYkLXJ/XAs8j2NatW2nYsGG6MlNfZxERkULtUGIyA97+kaI/TWFc9H+x8vWI6PyWpjfPoXzv2mdmVcxsupntMbO9ZvaOmcXncFuXyXLWsY77RDRgwAB27txJ//79Q65fu3YtP/74I+AlMEWKFEm3PnXGu0AJCQls27aNHTt2pJWtXr06y+54CQkJ3HvvvdSvX59ly5alHQ9Id8xffvmFBQsW5PDs8k6zZs0A0o0rApgyZQoAF17oPXCucePGbNy4kW+++SatTkpKCtOmTUu3XatWrYiIiGDDhg00bNgww1KtWrVjch61a9emQoUKGc7j66+/Zv369WnnKSIiIrJj32E6v/IN1Zc9zcgirxBRowURN32sJCoX8rVFysxigLnAYaA74IBHgc/NrIFzLicPNZoAjA0q+yUv4ywoLrzwQkaPHk2/fv1YsWIFPXr0ID4+nt27d/PZZ5/x6quvMnXqVBo0aMBll13GyJEjGTZsGOeeey5z585l+vTpGfbZvn17Bg8eTOfOnenXrx87duxg+PDhlC1bNl29xo0b07ZtW+rXr09sbCzz589n6dKldO/eHYCLL76YqKgounXrxr333suWLVsYMmQI8fHxpKSk5Mv7k6pevXp07NiRoUOHkpSUxPnnn8/ChQt55JFH6NixY9oU8d27d2fEiBFcc801DBs2jPLly/PSSy+xd+/edPurUaMGDzzwAH369GHVqlU0a9aMYsWKsXHjRubMmUOvXr1o0aJFyFjWrVtHtWrV0k18kVORkZE8/PDD9O7dmy5dutClSxc2bdrEwIEDOe2007jxxhtztJ+qVatSu3ZtPvnkk1wdX0RERE4Mq7b+xS3jv+aeg2NoF/UlnN0NazMaIotkv7Gkye+ufTcD1YHazrnfAMzsR+BXoDcwOgf72OSc+yb7agJw9913c+655/LUU09x3333sWPHDk466SQaNmzI2LFjufLKKwF46KGH+PPPP3nqqac4dOgQzZo145NPPqF69erp9lezZk2mT5/OoEGDaNeuHbVq1WL06NEMGzYsXb0LL7yQadOmMWLECJKSkqhevTpPPfVU2qxx9erVY8qUKTz00EO0bduWGjVqMGLECGbNmsW8efPy5b0JNHHiRKpXr864ceN49NFHOeWUU3jggQcYMmRIWp3o6GjmzJlDnz59uP322ylRogSdOnWiTZs23Hrrren2N2zYME4//XSef/55nn/+ecyMKlWq0LJlS0477bRM40h9QPLRTgJxyy23EBMTw6hRo7jqqquIjY3l8ssv5/HHH0839XlWkpKSQk4OIiIiIie+z1du58HXv+LZiNGcE/ETXDQILrhP05sfBXPO5d/BzD4DijnnmgSVzwdwzmXZ98jMHPCYc27Q0Ry/YcOGbtGiRZmuX7FiBaeffvrR7FokT7z88ssMHDiQ9evXExMTE+5wckTfGxERkeOfc45xC9YxbuaXTIkZRYLbjF31PJx5Q7hDO+6Y2WLnXMPs6uX3GKl6wLIQ5T8DOZ0a5DYzO2xmB8xsrpldkHfhiYTX/Pnzueeee06YJEpERESOf4nJKTz47jLenvkRH8YMJSFqN9blbSVR/1B+d+0rDewOUb4LyDi9WUaTgQ+BzUAC0B+Ya2atnHPzQm1gZrcAtwBpz0USOV6lTnAhIiIikhf2HEjktimLiVo7l3eLjyG6RCms84ea3jwPhGP681B9CXPUKdM51zXg5Zdm9j5eC9ejQNNMtnkZeBm8rn25C1VERERE5MS0dsd+ek74H+f9OZPHiv6XiHJ1ofM0iDsl3KEVCPndtW83XqtUsFKEbqnKknPuL2AmcM4/jEtEREREpMD4evUO2j33FTfse43hUS8TUb053PiRkqg8lN8tUj/jjZMKVhdYfpT7NEK3comIiIiIFDpvfLeB/3tvCc+WGEerxM/hX13giqc1vXkey+8WqQ+ARmaWNqe2mVUFmvjrcsXM4oA2wLd5FB/5OYuhyIlO3xcREZHjR3KK49EPl/PYO9/y1kmjvSSqxUBo+5ySqGMgv1ukXgH6AO+b2SC8lqRHgI0EPGTXzBKA1cDDzrmH/bL7gNrA5/w92cR9QEWgc14EV6RIEQ4ePKgZ00Ry6ODBgxQpon+YRUREwm3f4ST6vv4Dy1eu4NOST1H+yAZo9yKc1SncoRVY+ZpIOef2m9lFwFPAJLxueZ8Bdzvn9gVUNSCS9C1mq4Cr/eVkYC+wAOjpnPsuL+IrX748mzZtonLlyhQvXhzTg8lEQnLOcfDgQTZt2kSFChXCHY6IiEih9vvuA/SauIjIP37ms5OfJMYdhi5vQ/Xm4Q6tQMv3WfuccxuAa7Ops46gmfycczOAGccuMoiLiwNg8+bNJCYmHstDiZzwihQpQoUKFdK+NyIiIpL/vt+wm1teW8TZiT/wQvGniYo+Gbq8DxVCTUsgeSkc058f1+Li4nRjKCIiIiLHvfeXbKL/9B+5MWYBAyJfxMrUgU7T4OTK4Q6tUFAiJSIiIiJyAklJcTz92a88+9kvjCrzEe33T4HqLaDDa1BMDQL5RYmUiIiIiMgJ4lBiMve+tZTZP27krYpTOefPWXBWF7hS05vnNyVSIiIiIiIngO17D3Hza4tYu2kLn1d6hVN3fwvNH4Rm94MmSct3SqRERERERI5zP2/eQ6+Jiyh6YCsLyj/NSXvWaHrzMFMiJSIiIiJyHJv981bufnMJZxfdxPi4xylycD90ng41WoQ7tEItIvsqIiIiIiKS35xzvDR/Nb0nL+a6kr/ymj1EkYgIuGmWkqjjgFqkRERERESOM0eSUhj47k+8tfh3HklYSpc/nsTK1obOb2l68+OEEikRERERkePIrv1HuHXyYr5bu5Opp83n/I0vQ/Xm/vTmJ4c7PPEpkRIREREROU78tv0vbpqwiB179/FFrenEb3gPzuoMVz6j6c2PM0qkRERERESOA1/88gd3TP2e0pGH+Cb+FeI2fAnNBkDzAZre/DikREpEREREJMwmLVzH0BnLaVT2MOOjHyd66y9w1fPwry7hDk0yoURKRERERCRMkpJTeHTmCiZ8vY6u1ffxf38NJWLvX9BpGtRsGe7wJAtKpEREREREwmDvoUT6TP2BL375g8ca/EGn9YOx6BJw08dQsX64w5NsKJESEREREclnG3YeoOfE/7F2x37eOHcNjZYNhbK1ofM0OPnUcIcnOaBESkREREQkH323dhe3Tl5McnIKc8/5lvilT0O1ZnD9JE1vfgJRIiUiIiIikk+mL/6d/7zzI1VLRvN2/DvELX0DzuwIVz4LUdHhDk9yQYmUiIiIiMgxlpLiGDV7FS/OW03LasV4qegYiqz4HJo9AM3/o+nNT0BKpEREREREjqEDR5K4580lfPLzNnr/qzgDdg3G1q+Ats/B2V3DHZ4cJSVSIiIiIiLHyLod++k9aTG/bP+L0c2LcPXyPtihPd6kEjUvDnd48g8okRIREREROQY+X7mdvm/8QESE8X7rJBosuBWiS8CNH0OlBuEOT/6hiHAHICIiIiJSkKSkOJ797Fdumvg/Ti0Vw9zm62kw7yZvWvNenyqJKiDUIiUiIiIikkf2Hkqk35tL+HTFdq47qzwjik8hau54qHERXDceipcMd4iSR5RIiYiIiIjkgV+2/UXvSYvZuOsAIy8pR4e1A7GV30KTu6HlQxARGe4QJQ8pkRIRERER+Ydm/riF/tOXEhMdxQdXRVP3y65weK/XCnXGNeEOT44BJVIiIiIiIkcpKTmFUbNXMXb+Gv4VX5IJDZZz8if/gZMrQ5e3oeIZ4Q5RjhElUiIiIiIiR2HX/iPc9foPfPXbDrqdU4khRSYS+ekEqNESrn0VYkqHO0Q5hpRIiYiIiIjk0rJNe+g9aTF/7DvMs20q0faXAbDxW2h6D1w0WOOhCgElUiIiIiIiuTB98e8MfPcnypSIZma7aE6b19EbD9V+AtS7OtzhST5RIiUiIiIikgNHklJ4dOZyXlu4nsbVy/ByvZ846SN/PFTXd6BCvXCHKPlIiZSIiIiISDa27z3E7VO+Z9H63dzapDL3u3FEzJmo8VCFmBIpEREREZEsLF6/i9smf89fh5IYe3VlLv2pP/z+HTTtBxcN0nioQkqJlIiIiIhICM45Jn+znoc/XM4pJYszrU0kVT+9Hg7/Be0nQr124Q5RwkiJlIiIiIhIkEOJyQx6bxnTF/9Oi9rleL7Oj8R8MABOPhW6vgsV6oY7RAkzJVIiIiIiIgF+332A2yZ/z0+b9tCvRQJ3Hn4Z+8QfD3Xdf6F4qXCHKMcBJVIiIiIiIr4Fv+2gz9TvSUp2vNa+Chf+cI/GQ0lISqREREREpNBzzvHyF2sYOWslNcrFMuHiFCrPvg4O79N4KAlJiZSIiIiIFGr7Dydx//QfmfnTFi6vX5Gnaiyh6HsP+OOh3tN4KAlJiZSIiIiIFFprd+znltcWsfqPfQy8tDq9/noRmzURal7sPR9K46EkE0qkRERERKRQ+nT5Nu55cwlRkcYbN1Tl3P/dCb//T+OhJEeUSImIiIhIoZKS4nj6s1959rNfOaNyHP+9KIUKH1+t8VCSK0qkRERERKTQ2HMwkXveXMLcldu59uxTGZGwiCJvazyU5J4SKREREREpFFZt/YtbJi1i0+6DPHblaXTa+Rz28USo2QqufUXjoSRXlEiJiIiISIE3Y+lm7p/+I7HFoni7SzXO/Pp2bzzUBfdCi4EaDyW5pkRKRERERAqspOQURs5ayStfruXfCaV4tXkSpWZe5Y2H6vAa1L0q3CHKCUqJlIiIiIgUSDv3HebO13/g69U76dYonocqfUvUWwOgZBXo9j6UPz3cIcoJTImUiIiIiBQ4P/7+J7dOWsyO/UcYfc3pXLP1Kfj4NY2HkjyjREpERERECpRpizYy6L1llIstyvtdq3P6FzfDpkVwwX3Q4kGNh5I8oURKRERERAqEI0kp/N+Mn5ny7Qaa1CzDixckEjfjSn881CSo2zbcIUoBokRKRERERE542/Ye4rbJi/l+w5/0vrAa95f9msg3NR5Kjh0lUiIiIiJyQvvful3cNvl7DhxJ4sXrz6D1xifgo9fgtEvgmlegeMlwhygFkBIpERERETkhOed4beF6HvlwOVVKxzCtYzzV596k8VCSL5RIiYiIiMgJ5+CRZAa++xPv/LCJi08vz9PnHyL2nTaQeEDjoSRfKJESERERkRPKxl0H6D1pMSu27uWelqdx58lfEPH6A1AyAbrPgPJ1wh2iFAJKpERERETkhPHlr39w5+s/kJziGNelAS1+GwkLJmk8lOQ7JVIiIiIictxzzvHi/NU88ckqTit/Eq9efQpV5vTwxkNd2B+aPwgREeEOUwoRJVIiIiIiclzbdziJ/m8t5eNlW7miQSVGnXuA4m+11ngoCSslUiIiIiJy3Fr9xz56T1rM2h37GXR5HXoWm4tNHaDxUBJ2SqRERERE5Lg0++et3DttKUWiIpjc40warxgGP0yG0y6Fa17WeCgJq3zvSGpmVcxsupntMbO9ZvaOmcUfxX7+Y2bOzL46FnGKiIiISHgkpzienL2KWyYtplq5Esy8sQaN53f1kqgL74eObyiJkrDL1xYpM4sB5gKHge6AAx4FPjezBs65/TncT3VgILD9WMUqIiIiIvlvz4FE+r75A/NW/UGHhqfyyNn7KPr6pd54qOsnw+lXhjtEESD/u/bdDFQHajvnfgMwsx+BX4HewOgc7udFYApQG3VPFBERESkQVmzZS+9Ji9my5yCPtatHp4g52GSNh5LjU3537WsLfJOaRAE459YCC4CrcrIDM+sEnA3855hEKCIiIiL57v0lm7jmha85nJTMmzf9i85bR2Ef3Qc1WsLNc5VEyXEnv1tz6gHvhyj/GWif3cZmVgp4CrjfObfLzPI4PBERERHJT0nJKQz/eCX//Wot51YtzQttK1B2ZmfYtNgbD9X8P3o+lByX8juRKg3sDlG+CyiVg+1HAb8AE/IwJhEREREJgx37DtNn6vd8s2YXPc6vyqD6u4macgkkHoTrp8DpV4Q7RJFMhWN8kQtRlm3TkpldAHQDznbOhdpHZtvdAtwCEB+f68kBRUREROQYWLLxT26bvJhd+48wun0DrkmeBZMGQKmq0GMmlKsd7hBFspTf7aS78VqlgpUidEtVoLHAf4HfzaykmZXESwQj/ddFQ23knHvZOdfQOdewXLly/yR2EREREckDb/5vAx1eWkhkhPHOzWdzzcYR8NF9UPNibzyUkig5AeR3i9TPeOOkgtUFlmez7en+cmuIdbuBe4Cn/1F0IiIiInLMHE5KZugHy3n9uw1ccFpZxrSpQMkPboDN32s8lJxw8juR+gB4wsyqO+fWAJhZVaAJMCCbbVuEKHsaiATuBH4LsV5EREREjgNb9hzktsnfs2Tjn9zevAb31t5J5KSLIfGQxkPJCSm/E6lXgD7A+2Y2CG+81CPARryuewCYWQKwGnjYOfcwgHNuXvDOzOxPICrUOhERERE5PnyzZid9pn7PwSPJvNT5X1x2cKbGQ8kJL1/bTp1z+4GL8Gbem4T3UN21wEXOuX0BVQ2vpUltuyIiIiInKOcc475aS+dXvyWueBE+uPXfXLb6MY2HkgIh32ftc85tAK7Nps46cjCTn3Oued5EJSIiIiJ56eCRZP7zzo+8t2QzrepW4KnLyhH7XgdvPFSzB6DZAI2HkhNaOKY/FxEREZECbMPOA/SevJiVW/dy3yW1uL3aNiImttR4KClQlEiJiIiISJ6Zt2o7fd9Y4nXr696QFnveh0n/gVLVoMcUdeWTAkOJlIiIiIj8Y845Xpi3midmr6J2hZMY27EeCQsHw5IpUKs1XDMWip0c7jBF8owSKRERERH5R3bvP8J9by3ls5XbueqsUxhxcWmKv3MtbP7BGwvV7AGNh5ICR4mUiIiIiBy1xet3c+fU79mx7whDr6xL98qbsPHtvfFQN0yFOm3CHaLIMaGfBkREREQk15xzvPLFGq4fu5DISGP6rY3oETUHe+0qKFbSm9pcSZQUYGqREhEREZFcCezK1/qMioy4ohonf3ofLJuu8VBSaCiREhEREZEcC+zK939t69Gt6m7stZawex1cNAia3qvxUFIoKJESERERkWylpDhe/WoNj89aRaWSxZh+ayMabHoT/jsYSpSDHh9BQuNwhymSb5RIiYiIiEiWgrvyjby8CnGz+8DKD6HWZdDuRYgpHe4wRfKVEikRERERyVSGrnynbsUmXgR/bYVLh0Oj28As3GGK5Dt1YBURERGRDFJSHC9/sTpgVr7z6J78Djb+coiIhJ6zofHtSqKk0FKLlIiIiIikk6Er32UVifu4J6yeC/Wuhiuf0ax8UugpkRIRERGRNBm68lVch03oCIf2wBVPw797qBVKBHXtExERERFCdOXrfQ7dD03BXmvntT7dPBca3qgkSsSnFikRERGRQi5DV75WZYmb2RU2fA1ndYHLH4foEuEOU+S4okRKREREpBDL0JWvzEpswnWQdBiufhnOvD7cIYocl5RIiYiIiBRCwQ/YffuWf1N/5TMw+zmoWB+umwBla4Y7TJHjlhIpERERkT4lRocAACAASURBVEImQ1e+lnHEzbgeNn8P59wMlzwKRYqFO0yR45oSKREREZFCJENXvrgfsAl3AQYdJkHdtuEOUeSEoERKREREpBAI7Mp3SsnivHPz2ZyxbATMHgeVG8J146BUQrjDFDlhKJESERERKeCCu/KNalGc2A+ugW3LoElfuGgwRBYJd5giJxQlUiIiIiIFWIaufDFfYxPuhSLFofN0OK1VuEMUOSEpkRIREREpgFJSHK98uYZRn3hd+d7tdSb1ljwMS1+HhKZw7asQVyncYYqcsJRIiYiIiBQwu/cf4d63ljI3tSvfhZHEvt8Wdv4/e/cdV2XZ+HH8c7FBBHHgQHHv3OYuszRXapm2937qaT3V07Jlu0xbNrRpPU21tNxbc6a4BwoqCg5QEARknuv3x7FfDtRTATfj+369egW317nP9/yh+PW+Rgz0fAJ6/he8vJ2OKVKqqUiJiIiIlCEnTeUb1IKb/OZjvnwSAsPg5qlQ/0KnI4qUCSpSIiIiImXAaVP5bmtJyzUjYMsUaNQbLv8Igqs5HVOkzFCREhERESnlTpvK1y2P4KkDIW0f9BkJXe8HLy+nY4qUKSpSIiIiIqXYmrhk7v9mLYfScxg5uDk32l8xX78AFWvBrTOhzvlORxQpk1SkREREREqhP6byvTErmohKgfx8azNarHwcdsyCZpfBkPfd66JEpEioSImIiIiUMqdO5RvVKZ0KUwZA5iEYMArOvwOMcTqmSJmmIiUiIiJSipw4le/Fwc24IWci5tvXIKw+3DEXarZxOqJIuaAiJSIiIlIKnDqVb8rNDWm+/CHYtRhaXQWXjQb/ik7HFCk3VKRERERESrjTpvK1P0SFn/tDbiYMGQttr9dUPpFipiIlIiIiUoKtiUvm39+s5XB6Di8OasINmf/D/DAGwlvAsM8hvJnTEUXKJRUpERERkRLo1Kl8U2+IpNnSeyF+FXS4Bfq9Br6BTscUKbdUpERERERKmBOn8g1oVYNRreIJmnIzuFww7DM470qnI4qUeypSIiIiIiXIiVP5XhrUmOtTx2Mmj4OabWH451C5gdMRRQQVKREREZES4dSpfL9cV5Omv90B+9dDl3uh9/Pg4+90TBE5TkVKRERExGGnTeVrtp2gKTeAty9c+x007e90RBE5hYqUiIiIiINOnMr38mUNuO7wWMwvX0GdLjDsUwit7XREESmAl6cDjTGtjTE/GGMOGGNyjDHtj19/yRhzadFFFBERESl7XC7Lx4tiuerjFfh6e/HrNVW4fv0tmLVfwwWPwi3TVKJESjCPipQxphuwEmgDTAa8T7nHPYUfTURERKRsSsnI4Y4Jq3l1xjb6tghnVs9dNJkyGDKT4caf4JJnwFsTh0RKMk9/h74OzAMGc3pxWg1cX8i5RERERMqkE6fyvTqwLtccHI2ZMQkaXARXjIOK1Z2OKCIe8LRIdQCutNa6jDHmlF87BOh3vIiIiMhZuFyWcUt28ubxXfmmDQ+m8aIb4MgeuPgZ6PEf8PJ41YWIOMzTIpUNnOno7BpAauHEERERESl7TtqV77zqjK63koCpz0FwuHstVN2uTkcUkb/I0yL1G/CAMebnE67Z4/+/DVhQqKlEREREyogTp/K91r82V+97DTN3OjTpD5d/AEGVnY4oIn+Dp0XqWdxlai3wI+4SdYMx5g2gC9CpaOKJiIiIlE6nTuWbMdSPhouug/SD0PdV6PIvOG3FhIiUFh5NxLXWrgUuAo4AzwMGeAgIAHpZa7cWUT4RERGRUic5I4fbv/yd12Zso1+Lasw+fzUNfx3u3onv9tnQ9V6VKJFSzuN9Na21vwM9jTFBQFUgxVp7tMiSiYiIiJRCq3cnc/+37ql8b/SrwfC9L2EWzYeWQ2HQ2xAQ6nREESkEnp4jNc4YUw/AWptprd3zR4kyxkQaY8YVXUQRERGRks/lsny0KJarx7kP2J01xMVVq6/FxC2DQe/AsM9UokTKEE/32LwDCD/Dr1UDbi+cOCIiIiKlz4lT+fq3qMqctkuoP/16CKgEd86HDrdoKp9IGVMYR2ZXB44Vwn1ERERESp0Tp/KN6luVK3c9i1m2HNrdAP3fAL8KTkcUkSJwxiJljBkCDDnh0jPGmKRThgUCPYGoIsgmIiIiUmKduivfnIGZ1F1yL+TnwNDx0PoqpyOKSBE62xOpBkCf419b3Fuc55wyJhtYDTxe+NFERERESqbkjBwe+WEdC6KTGHReFUaF/Yz/7A+hRisY9gVUbeR0RBEpYmcsUtbaMcAYAGPMXuAya+364gomIiIiUhKdOJVvdJ9Qroh9EhMTBZ3ugj4vgm+A0xFFpBh4tEbKWlunqIOIiIiIlGR5+S4+WhTLmLk7iKgUyNy+h4lcehdg4KqvoMVgpyOKSDH6S5tNGGNCgEa4D+I9ibV2WWGFEhERESlJ9hzO5OEf1rEmLoUrzqvM68Hf4jf/S4jo6N7WPKyu0xFFpJh5VKSMMf7AeOBazrxlundhhRIREREpCay1TFwTz/NTN+NlDJ8MqMglm/6DidkC3R+Ei58Bb1+nY4qIAzx9IjUC98YTdwCfAw/g3mjiFtznSP2nKMKJiIiIOCUlI4cnJ29k5uYDdK4XxkfN1hK2eKR7O/PrJ0LjPue+iYiUWZ4eyDscGAl8ffz7Zdba8dba7sAm4BJP39AYU8cYM9EYk2qMSTPGTDbGRHrwurrGmCnGmDhjzDFjzCFjzEJjTH9P31tERETEE4u2J9H37cXM23aQFy6pxnfBYwhb+BTUuwD+tVwlSkQ8fiIVCWy21uYbY3KBE0+W+wT4DHj4XDcxxgQB83E/zboZ97bqLwELjDGtrbUZZ3l5MHAI99OxeCAEuBOYboy50lo72cPPIiIiIlKgrNx8XpuxjS+W7aZxeDA/9jpC3d+uhZx0GDAKzr8DjHE6poiUAJ4WqcO4iwy4S0xrYMnx78NwH8zriTtxn0/V1FobA2CM2QDsAO4GRp/phdbazcDtJ14zxkwDdgG3AipSIiIi8rdtSkjl4e/XsSMxnTs7V+dxr6/wmf0FVG8FV34C4c2cjigiJYinRWol0AaYjruwvHj86VIe8F9gqYf3GQys+KNEAVhrdxljlgJDOEuRKoi1Ns8Ykwrk/pXXiYiIiPwh32UZt3gno+dEExbkx+TLg2j/+z1wOAa63e/eUMLH3+mYIlLCeFqk3gD+2NfzJaAJ8CruNVargXs9vE9LYEoB1zfjXod1TsYYr+PvWxX3E64mwIMevr+IiIjI/4tPyeQ/P6xn1a5k+reoxlu1FxE0+zWoEA43TYEGPZ2OKCIllKcH8q4CVh3/OhUYYowJBAKstSl/4f0qAwWNT8Y9RdATbwCPHP86HbjGWjvvTIONMXcBdwFERp5zTwsREREpB6y1TFm3j2d+3oTLWsYOqMqA2Ocwi5dByytg4GgIqux0TBEpwc65a58xxs8Ys8oYc9L2NNbaY3+xRP3/Swt6m7/w+reB84FBwAzgG2PMZWd8M2vHWWs7Wms7VqtW7a8lFRERkTInNTOX+79dy0Pfr6NpjYos6neIgUuHY/ZvhMs/gmGfq0SJyDmd84mUtTbHGNMEyC+E90vB/VTqVGEU/KSqoDzxuDe8APjVGLMQGAX8Wgj5REREpAxbFnOIR35cT9LRbJ6+pBa3p47Fa/aPUKczDB0HYfWcjigipYSna6TmAr1xb13+T2zGvU7qVC2ALX/znquBh/52IhERESnzsnLzGTUrmk9+20WDqhWYdYU3DX+7EdL2Qa+nocd/wNvTvxaJiHhepEbjnkLnBfwM7OeUKXrW2j0e3GcqMMoY08BauxPAGFMP6A484WGW/3c8Tw8g9q++VkRERMqHbQfSeOi7dWw7cJSbO9VkRPAUfKe94376dPtsqN3R6YgiUgoZawtasnTKIGNcJ3xb4Austd4e3KcCsB44hvtgXQu8CFQEWltr04+Pq4u7HI201o48fu153NMClwIHgBq4z5XqDVxnrf3uXO/fsWNHu3r16nMNExERkTLA5bJ8tnQXb8yMJiTQh/cvDabL2idg/zpodyP0ew38g899IxEpV4wxa6y15/wXFk+fSN3FGQrUX2GtzTDGXAyMAb7CvcnEPOChP0rUcQbw5uTNMKJwT+G7BgjFXabWAxdYaz09x0pERETKgf2px3jkh/Usiz1Mn+bhjGkYRfDs58A3AK7+GpoPcjqiiJRyHj2RKiv0REpERKTs+2X9Pp7+aSO5+ZZXLq3B5XtfxWyfCQ16weUfQkhNpyOKSAlW2E+kREREREq0tKxcnpuymZ/WJtC2TiU+6pJMjfnDISsV+r4Kne8Br3Oe/CIi4hEVKRERESn1Vuw8zCM/rOdAWhaP9KrDfXkT8PplPIS3gJt+huoFbRosIvL3qUiJiIhIqZWT52L0nO18vDiWyMpB/Do8hOZL74BD0dDlPrjkWfe6KBGRQqYiJSIiIqXSjoNHefC7dWzZn8a1HSN4odoC/H55GYKqwI0/QcOLnY4oImWYipSIiIiUKi6XZcLy3bw6YxsV/H348spa9Nw8AjYtce/GN+hdCKrsdEwRKeP+cpEyxgTiPs/poLU2r/AjiYiIiBTsYFoWj03cwOLtSfRqWo23W+0idO7tkJ8Hg9+HdjeAMU7HFJFywOOta4wx/Y0xq4CjQBzQ+vj1j40x1xRRPhEREREAZm7aT9+3F7Nq12Feu6wen1X6lNBf74IqjeGeJdD+RpUoESk2HhUpY8wg4FfcJWrEKa/bC9xS6MlEREREgKNZuTz643ru+TqKOmFBzB0WwDWrr8Vs+AF6Pg63zYQqDZ2OKSLljKdT+54HJlhrbzXG+ACvnPBrG4F7CjuYiIiIyOrdyTz8wzoSUo7xQM96POj7E94/vwWhdeDWmRDZ2emIIlJOeVqkWgBPHP/anvJrKUDVQkskIiIi5V5uvot35+1g7IIYIsICmXJtTVqtvB8S1kCb66D/6xAQ4nRMESnHPC1SR4EqZ/i1ukBS4cQRERGR8i42KZ2Hv1/HhvhUhrWP4KW6awn45Qbw9oXhX0DLK5yOKCLicZGaBzxhjJkOZBy/Zo0xfsB9wKyiCCciIiLlh7WWr1fu4eVpWwjw9Wb8sPr0iXkZZvwK9S+Eyz+C0AinY4qIAJ4XqaeAVcA2YBru6X2PAW1wP6kaViTpREREpFxIOprN45M2MH9bIhc0rsq756cQNvsKOJYMl74EXe4DL483GxYRKXIe/Ylkrd0FdATmAIOOX+4DRAGdrbXxRRNPREREyro5Ww7S7+3F/BZziJEDGjKh1mTCJl8NgZXgzvnQ7X6VKBEpcTx6ImWMqQDss9beXMR5REREpJzIyM7jpWlb+HbVXprXDGFSH3/qLbwVErdAp7uhzwvgG+h0TBGRAp2zSBljfIFUYCgwtcgTiYiISJm3dk8KD3+/jrjkTO6+sB6Phc7HZ9JICKgE10+Exn2cjigiclbnLFLW2lxjTCKQVwx5REREpAzLy3fx/oIY3psfQ42QACZeX58OUU/BqgXQdAAMfg8q6FQVESn5PN1s4hvgVmB6EWYRERGRMmz3oQwe+n4d6/Ye4fK2tXi52W4qTLsN8rLhsrehwy1gjNMxRUQ84mmR2g5cbYxZDkwB9nPKwbzW2gmFnE1ERETKAGst3/++l5G/bsHHyzB2eBMGxr8NP38NtdrB0E+gaiOnY4qI/CWeFqmPjv8/AuhcwK9bQEVKRERETnI4PZsnJm9kzpaDdG1QhXcvyKfa7OGQshsueAQuetJ90K6ISCnjaZFqXKQpREREpMxZsC2RxyZuIO1YLiP6NeY2OxmvH96AkAi4dTrU7eZ0RBGRv82jImWtjS3qICIiIlI2HMvJ55XpW/lqRRxNq1fku+HVabTkPohfBa2vhgFvQkCo0zFFRP4RT59IiYiIiJzTxvhUHvx+LTuTMri9ez2eqBmF76RrwXjDlZ9Cq2FORxQRKRSeHsi7g1M2lziVtbZJoSQSERGRUiffZfloUSxj5mynarA/393QlC5bRsKvU6BuD7jiI6hUx+mYIiKFxtMnUis5vUhVAboAacDiwgwlIiIipcfe5Ewe/n4dq+NSGNi6Jq+3SyF4+iDISITez0O3B8DL2+mYIiKFytM1UjcUdN0YUxmYCUwrzFAiIiJS8llrmRSVwPNTN2OAd4Y1Z/DhTzHfvw9VGsO1c93bm4uIlEH/aI2UtTbZGPMGMBL4vnAiiYiISEmXkpHDUz9tZMamA3SqV5l3Lgmg5tyb4OBG6Hg7XPoS+AU5HVNEpMgUxmYTmUBkIdxHRERESoHF25N49Mf1pGTm8N++TbgncB5e3z8HfsFw7ffQtJ/TEUVEitzfLlLGGC+gBfAssLXQEomIiEiJlJWbz2sztvHFst00Cg9mwlV1abbyCYiZC40vhSFjITjc6ZgiIsXC0137cjl9swkvwADpwMBCziUiIiIlyOZ9qTz03Tp2JKZzS7d6PNVwF34/9YWcDBgwCs6/A4xxOqaISLHx9InU65xepLKAOGCatTalUFOJiIhIiZDvsoxfspO3ZkdTKciPCTe25MKdY+DHL6BGa7jyE6jW1OmYIiLFztNd+0YUdRAREREpWRKOHOM/369j5a5k+raszhtd8wmdPhSSd0L3B6HXCPDxczqmiIgjCmOzCRERESljpqxLYMTPm3C5LG9e2ZJhmT9ivnkNgqvDzb9A/Qucjigi4qgzFiljzLi/cB9rrb27EPKIiIiIg7YfPMor07eyMDqJDnXDeLdfFSIW3AN7lkPLoXDZaAgMczqmiIjjzvZEagCnr4s6E0/HiYiISAmUdDSb0XO28/3ve6jg78PTA5pzW8gqvL+7GqyFKz6G1ldrQwkRkePOWKSstbWLM4iIiIgUv2M5+Xz6204+XBhLdp6Lm7rW48Hu1Qhb8CTMnwh1usDQjyGsntNRRURKFK2REhERKYdcLsvP6xJ4c1Y0+1OzuLRFdZ7o15QG+36BT5+BrCNw8Qjo/jB4668LIiKnOtsaqVpAorU27/jXZ2Wt3VeoyURERKRILI89zMvTt7ApIY1WEaGMubotXYITYdrVELcUandyr4Wq0crpqCIiJdbZ/olpL9AVWAXEc+51UN6FFUpEREQKX2xSOq9O38bcrQepFRrA21e3ZXDzULyWvAnL3wf/ijDoXWh3I3h5OR1XRKREO1uRuguIPeFrbSghIiJSCh1Oz+adeTv438o9BPp681jfptzeoz4BsTPhw8chdS+0vQH6vAAVqjodV0SkVDjbZhOfnvD1J8UTR0RERApLVm4+Xyzbzdj5MWTk5HFtp0ge6t2EankH4MfrYfsMCG8Bt86Eul2djisiUqpo9aiIiEgZY63llw37eX3GNhKOHKNX02o8NaA5jav4u6fwLXoDjBf0eRG6/Au8fZ2OLCJS6nhcpIwxVYGrgaZAwCm/rAN5RURESoDVu5N5adpW1u09QvOaIbx+ZWt6NK4Ku5bAj4/AoWhodhn0fx1CddKJiMjf5VGRMsY0AZbjLlABQApQCfACUoGjRRVQREREzi3ucAavzdjGjE0HCK/ozxvDWnNl+9p4Zx6CyXfDhu+gUiRc9wM06et0XBGRUs/TJ1JvAlHAECAduBTYDNwMjAAGFkk6EREROasjmTm8Nz+GCct34+PlxcO9m3DnhfUJ8jGw5jOY9wLkZMIFj8IFj4BfkNORRUTKBE+L1PnAvUDW8e+9rLXZwDhjTGXgbeCSIsgnIiIiBcjJc/HVijjenbeDtKxcrupQh0cubUJ4SADsWwfT/gMJa6DeBTBwNFRr4nRkEZEyxdMiFQIctta6jDFpwIl7o67C/VRKREREipi1lpmbDvDazG3EHc7kgsZVeWpAc5rXDIGsVJj+LPw+HoKqwtDx0Go4GON0bBGRMsfTIrUbqH7862jgSmDm8e/7A0cKN5aIiIicat3eI7w8bQu/706hSfVgvrj1fHo2qYYB2DgRZj0F6Ylw/h1w8QgIrOR0ZBGRMsvTIjUX6A1MBMYA3xhjugF5wHnAq0UTT0RERPYmZ/LGrGh+Wb+PqsF+vHJFK67qWBsfby84FAPTH4GdC6FmW7j2O4ho73RkEZEyz9Mi9QQQCGCt/c4Yk417K/Qg4GPgo6KJJyIiUn6lZeUydkEMny/djQH+3asR91zUkGB/H8g9BotGw9K3wScQBoyCjreBl7fTsUVEygWPipS1Nos/N5rAWvsT8FNRhRIRESnPcvNdfLtqD2/P3UFyRg5D20XwaN+m1KoU6B6wYw5MfxRSdkOrq+DSl6Bi9bPeU0RECtcZi5Qx5mJglbU2vRjziIiIlFvWWuZtTeSVGVvZmZRBlwaVGTGwBedFhLoHpCbAzCdg61So0hhumgoNejobWkSknDrbE6k5QFfcu/JhjPECFgK3W2t3FH00ERGR8mNTQiovT9vK8p2HaVC1AuNv6kjv5uEYYyA/D1Z+BAtfBVceXPwMdLsffPydji0iUm6drUiduleqAXoAFYsujoiISPmyP/UYb86K5qe1CVQK9OWFwS25rnMkvt5e7gF7VrrPhDq4CRr3hQFvQFg9RzOLiIjnm02IiIhIIUrPzuPjRbGMX7ITlwvuurAB9/VqREiAr3tAZjLMeRbWfgUhEXD119DsMp0JJSJSQqhIiYiIFKO8fBc/rI5n9JztHErPZlCbWvy3b1PqVA5yD3C5YN3/3CUqOw26PQA9Hwf/YGeDi4jISc5VpCKMMQ2Of+19wrXTDuC11u4s1GQiIiJlzMLoRF6ZvpXtB9PpWDeM8Td1oF1k2J8DDm6GX/8De1dAZFcYOBqqt3AusIiInNG5itTEAq79fIaxOrhCRESkAFv3p/HK9K0s2XGIulWC+PD69vQ7r4Z7IwmA7HT3RhIrPoSAUBgyFtpcB15ezgYXEZEzOluRurXYUoiIiJRBiWlZvDV7Oz+u2UvFAF9GDGzOjV3r4u9z/N8erYWtv7i3NE9LgPY3Q+/nIaiyk7FFRMQDZyxS1tovi+INjTF1gDFAH9w7Ac4FHrLW7jnH6zoCdwEXApHAIWAJMMJau6sosoqIiPwdmTl5jF+8i48Xx5Kb7+KWbvV54JJGVAry+3NQ8i6Y8V/YMRuqt4LhX0CdTo5lFhGRv6ZYN5swxgQB84Fs4GbAAi8BC4wxra21GWd5+TVAS+BdYDMQATwDrDbGtLXW7i3S8CIiIueQ77JMjopn1OxoDqZl0/+8Gjzerxn1qlb4c1BeNix9F5aMAi8f6PsKdLobvLX/k4hIaVLcf2rfCTQAmlprYwCMMRuAHcDdwOizvPZ1a23SiReMMUuBXcfv+2yRJBYREfHA0phDvDxtK1v2p9GmTiXev64959c7ZYrezoUw7VE4vANaXA79XoWQWo7kFRGRf6a4i9RgYMUfJQrAWrvreCEawlmK1Kkl6vi1OGNMEu6nUyIiIsUuJvEor0zfxvxtiURUCuSda9oyqHUtvLxOOO/p6EGY9RRsmghh9eH6SdC4t3OhRUTkHyvuItUSmFLA9c3A8L96M2NMcyAc2PoPc4mIiPwlh9KzeXvudr5dtZcgX2+e6N+MW7rVI8D3hE1sXfnw+6cw/0XIy3KfB9XjYfANdC64iIgUiuIuUpWBlAKuJwNhBVw/I2OMD/ARkAR8+s+jiYiInFtWbj6f/raLDxfGciw3n+s7R/LgJY2pEux/8sCENe4zofavgwa9YOBbUKWhM6FFRKTQObGy1RZwzRRw7VzeB7oBA621BZUz942NuQv3bn9ERkb+jbcREREBl8sydf0+3pwVTcKRY/RuXp0nBzSjYbXgkwceOwLzRsLqzyC4Ogz7DFoOBfN3ftSJiEhJVdxFKgX3U6lThVHwk6oCGWNexV2ObrbWzj7bWGvtOGAcQMeOHQsqcSIiIme1alcyL03bwob4VFrWCuHN4a3p1rDqyYOshQ0/wOynIfMwdL4Hej0FASHOhBYRkSJV3EVqM+51UqdqAWzx5AbGmKeBJ4AHrLVfFWI2ERGRk+w6lMFrM7Yya/NBaoQE8NbwNlzRLuLkjSQAkqJh2iOwewlEdIQbJkHNNs6EFhGRYlHcRWoqMMoY08BauxPAGFMP6I67HJ2VMeYB3OdOPW2tfa8Ic4qISDmWkpHDO/N28PWKOPx8vHikTxPuuKABgX7eJw/MyYTFb8Ky98AvCC4bA+1vAS8vR3KLiEjxKe4iNR74NzDFGDMC93qpF4G9wMd/DDLG1AVigZHW2pHHr10DvA3MBOYbY7qccN80a61HT7RERETOJDsvnwnL4nhv/g7Ss/O4+vxIHu7TmPCKAacPjp4JMx6DI3ugzXXQZyQEVyv+0CIi4ohiLVLW2gxjzMXAGOAr3JtMzAMestamnzDUAN7Aif+k1+/49X7H/zvRIuCiIootIiJlnLWWaRv38/rMbexNPkbPJtV4akBzmtaoePrgI3th5hOw7Veo1gxumQ71uhd/aBERcVSx79pnrd0DXHmOMbs5ZSc/a+0twC1FlUtERMqnNXEpvDxtC1F7jtCsRkUm3NaJC5sU8GQpPxeWj4VFr7u/7/08dLkPfPyKM66IiJQQTmx/LiIi4rj4lExenbGNaRv2U62iP69f2YphHergfepGEgBxy9xnQiVthaYDof9rUElHaoiIlGcqUiIiUq5k5eYzbvFOxi6IwRh44JLG3H1hAyr4F/AjMeMQzH4G1n8DoZFw7XfQtH/xhxYRkRJHRUpERMqN+dsO8sIvW4g7nMnAVjV5emBzalUKPH2gywVRX8Lc5yEnHXo8DBc+Bn4Vij2ziIiUTCpSIiJS5u05nMnIXzczd2siDapV4OvbO9OjcdWCB+/fANP+A/G/Q90eMPAtCG9WvIFFRKTEU5ESEZEyKys3nw8XxvLholh8vAxP9m/Grd3r4+dTwDlPWWmw4BVY9TEEVoYrPobWV4MpYM2UiIiUeypSIiJSflQrgAAAIABJREFU5lhrmbs1kZG/bmZv8jEGtanF0wOaUyO0gPOgrIXNP8Gsp+DoAeh4K1zyLASGFX9wEREpNVSkRESkTNl9KIPnf9nMwugkGocH882dnenW8AzT+A7HwvRHIXY+1GwDV/8Panco3sAiIlIqqUiJiEiZcCwnnw8WxvDxop34+XgxYmBzbu5WD1/vAqbxZafDb2Ng2Xvg4w/934Dz7wAv7+IPLiIipZKKlIiIlGrWWmZtPsiLv24h4cgxLm9bi6cGNCc8pIBpfC4XrP8W5r0A6Qeh1XC49CWoWKP4g4uISKmmIiUiIqXWzqR0npu6mSU7DtGsRkW+v6sLnRtUKXjw7qUw60nYvx4iOrqn8dU5v3gDi4hImaEiJSIipU5mTh7vzY/hkyU7CfDx5tnLWnBT17r4FDSNL2U3zHkWtkyBkAgY+gm0Gqbd+ERE5B9RkRIRkVLDWsv0jQd4adoW9qdmMbR9BE/0b0Z4xQKm8WWlwZK3YMUH4OUDvZ6Grv8Gv6DiDy4iImWOipSIiJQKMYnpPD91M7/FHKJ5zRDeu7YdHetVPn2gKx/Wfg3zX4SMJGhzrXs785BaxR9aRETKLBUpEREp0dKz83hv3g4+/W0XgX7evDC4Jdd3jix4Gt+uxTDzKTi4Eep0huu+hwhtZy4iIoVPRUpEREokay2/bNjPy9O2cDAtm+EdavN4/2ZUDfY/ffDhWPc6qG2/QmgkDPscWl6hdVAiIlJkVKRERKTE2X7wKM9N2czynYc5LyKED67vQIe6YacPzEqFxW/Cio/A2w8ufga63ge+gcUfWkREyhUVKRERKTGOZuXyztwdfLFsNxX8fXjx8vO4rlMk3l6nPFnKz4OoL2HBK5B5GNpd7y5ROg9KRESKiYqUiIg4zlrLlHX7eGX6VpLSs7m6Yx3+268ZlSv4nT44dgHMegoSt0Dd7tD3FajVtvhDi4hIuaYiJSIijtp2II1np2xm1a5kWtcOZdxNHWlbp9LpAw/FwOwRsH0GVKoLV02A5oO1DkpERByhIiUiIo5Iy8plzJztTFgeR8UAH165ohVXn1/n9Gl8x1Jg0Zuw6mPwCYTeL0Dne8C3gLOjREREiomKlIiIFCtrLZOjEnh1xjYOZ2RzXadIHr20KWGnTuPLz4M1n7vXQR1LgfY3wcUjIDjcmeAiIiInUJESEZFis2VfGs9O2cTquBTa1qnEZ7d0pHXtAqbxxcyFWU9D0jaodwH0exVqtCr+wCIiImegIiUiIkUu9Vguo2dH89WKOCoF+fHGla0Z1qE2XqdO40uKdheomDkQVh+u+QaaDtA6KBERKXFUpEREpMi4XJaJUfG8PmMbKZk53NClLo/0aUpokO/JAzOTYeFr8Psn4BcMl74Ene4CnwIO3xURESkBVKRERKRIbEpI5Zkpm1i75wjtIyvx5W2dOC8i9ORB+bnu8rTwNchOgw63Qq+noEJVZ0KLiIh4SEVKREQK1ZHMHEbNjuZ/K/dQpYIfo4a3YWi7iJOn8VkLO2a7p/Ed3gENernPg6rewrngIiIif4GKlIiIFAqXy/LD6r28PnMbqcdyublrPR7u04TQwFOm8SVudR+oGzsfqjSC636AxpdqHZSIiJQqKlIiIvKPbYg/wjNTNrN+7xHOrxfGyCHn0bxmyMmDMg65tzJf8zn4V4R+r0HH28HHr+CbioiIlGAqUiIi8relZOTwxqxovvt9D1WD/RlzdRsubxuBOfHpUl4OrBoHi96AnHQ4/w646EkIquxccBERkX9IRUpERP6yfJfl21V7GDU7mqNZedzWvT4P9W5MxYATpvFZC9HTYfYISN4Jjfq4d+MLb+ZccBERkUKiIiUiIn/J2j0pPDtlMxsTUulcvzIjh5xH0xoVTx50YBPMehJ2LYaqTeH6idC4jzOBRUREioCKlIiIeORwejZvzIzm+9V7Ca/ozzvXtGVwm1onT+NLT4IFL0HUBAgIhQGjoMMt4O17xvuKiIiURipSIiJyVvkuy/9WxjFqVjSZOfncdWEDHrikMcH+J/wIycuGlR/Bojch7xh0vgd6/hcCw5wLLiIiUoRUpERE5IzWxCXzzM+b2bI/jW4Nq/DC4JY0rn7CND5rYesvMOcZSNkNTfq510FVbexYZhERkeKgIiUiIqdJOprNazO2MSkqnhohAbx/XTsGtqp58jS+/eth5lMQ9xuEt4Abf4KGFzsXWkREpBipSImIyP/Ly3fx1Yo4Rs/ZTlZuPvf0bMj9FzeiwonT+I4ehPkjYe3/3FuYDxwN7W8Gb/1IERGR8kM/9UREBIBVu5J5dsomth04ygWNq/L84JY0rBb854DcLFgxFpaMdq+J6nofXPgYBFZyLrSIiIhDVKRERMq5xLQsXp2xjZ/WJlArNIAPr29Pv/Nq/DmNz1rY8jPMfhZS90Czy6DPSKjS0NngIiIiDlKREhEpp3LzXXy5bDdvz91BTp6L+3o15L5ejQjyO+FHQ0IUzHoK9iyH6ufBkKnQoKdzoUVEREoIFSkRkXJoxc7DPDdlM9EHj9KzSTWeH9yS+lUr/DkgbT/MGwnrv4EK1WDQu9DuBvDydi60iIhICaIiJSJSjhxMy+LlaVuZun4fEZUCGXdjB/q0qP7nNL6cTFj+Pvw2Blx50P0huOARCAhxNriIiEgJoyIlIlIO5Oa7+HzpLt6Zu4Ncl+WBSxrzr54NCfQ7/oTJWtg0CeY8B2nx0GII9H4BKtd3NriIiEgJpSIlIlLGLYs5xLNTNxOTmM4lzcJ5dlAL6lY5YRpf/GqY+STEr4IarWHoOKjX3bnAIiIipYCKlIhIGbV6dzLvL4hhYXQSdSoH8slNHendovqfA1LjYe4LsPEHCK4OQ8ZCm2u1DkpERMQDKlIiImWItZbFOw4xdkEMq3YlU7mCH4/1bcrtPeoT4Hu8IOVkwNJ3Yek7YF1wwaPQ4yHwr+hseBERkVJERUpEpAxwuSyztxxg7IJYNiakUiMkgGcva8E1ner8uZ25y+V++jT3BTi6D1oOhd7PQ1hdJ6OLiIiUSipSIiKlWF6+i6nr9/HBwlhiEtOpWyWI14a24or2Efj7nDBFb89KmPkE7IuCWu1h+OcQ2cW54CIiIqWcipSISCmUlZvPxDXxfLw4lr3Jx2havSLvXNOWga1q4uPt9efAw7Gw4GX3jnwVa8IVH0Orq8DL68w3FxERkXNSkRIRKUUysvP4ZuUexi/ZSeLRbNrUqcSzl7XkkmbheHkdPwvKWohb5j4PKnoG+PhDz8eh+4PgV+HsbyAiIiIeUZESESkFUjNz+WLZbj5ftosjmbl0a1iFMVe3pVvDKn8eppufB1t+dheofWshsDJc+Bh0uhOCw539ACIiImWMipSISAmWdDSbT37bydfL48jIyad383Du7dWI9pFhfw7KSoOoCbDyI0jdC1UawWVjoPU14BfkXHgREZEyTEVKRKQEik/JZNzinXz/+15y810MbF2Ley9qSPOaIX8OOrLXXZ7WfAk5R6FuDxjwJjTuqzVQIiIiRUxFSkSkBIlNSufDhbH8vDYBY2Bou9rcc1FD6lc9YW1TQpR7+t7mn93ft7wCut4HEe2dCS0iIlIOqUiJiJQAm/el8sGCWKZv2o+/jxc3dKnLXRc2oFalQPcAlwu2z3QXqLil4B8CXe+FTndDpTrOhhcRESmHVKRERBy0Ji6Z9+fHsCA6iYr+PvyrZ0Nu61GfqsH+7gE5mbD+G1j+ASTHQmgd6PsKtLsRAkLOfnMREREpMipSIiLFzFrLkh2HGLsghpW7kqlcwY9HL23CjV3rERro6x509CD8Ph5+/xSOJbsP0R32GTQfAt76o1tERMRp+mksIlJMXC7L7C0H+WBhDBviU6kREsAzl7Xg2k51CPI7/sfxwS2wYixs+AHyc6HZQOj6b4jsAn9scy4iIiKOU5ESESliefkuftmwjw8WxLIjMZ3IykG8OrQVQ9tH4O/j7T5AN3Y+LHsfYueBTyC0vwm63AtVGjodX0RERAqgIiUiUkSy8/KZuCaejxbFsjf5GE2qB/PONW0Z2KomPt5ekJcD676H5WPh4CYIrg4Xj4COt0NQZafji4iIyFmoSImIFLLMnDy+WbmH8Ut2cjAtmza1Q3lmYAt6N6+Ol5eBzGRY8zmsHAfpByC8BQz5AFoNAx9/p+OLiIiIB4q9SBlj6gBjgD6AAeYCD1lr93jw2leAjkAHoDJwq7X2i6JLKyLiudTMXL5cvpvPl+4iJTOXrg2q8NbwtnRvVAVjDByOhRUfwrr/QW4mNLwYLv/A/X+tfxIRESlVirVIGWOCgPlANnAzYIGXgAXGmNbW2oxz3OJ+YB3wK3BTUWYVEfFU0tFsPv1tF1+viCM9O49LmoVzb69GdKgb5l7/tHclLHsPtk0DLx9ofZX7AN3qLZ2OLiIiIn9TcT+RuhNoADS11sYAGGM2ADuAu4HR53h9qLXWZYxphIqUiDgs4cgxxi2K5bvf95KT72Jgq5rce1EjWtQKgfw82DTZvf4pYTUEhsEFj0CnO6FiDaeji4iIyD9U3EVqMLDijxIFYK3dZYxZCgzhHEXKWusq4nwiIue0MymdDxfG8tPaBACGto/gnp4NaVAtGLKPug/PXfkhHNkDlRvAgFHQ9jrwq+BwchERESksxV2kWgJTCri+GRhezFlERP6SLfvSGLswhukb9+Pn7cUNXepy54UNiKgUCKkJMPt1WPMlZKdCZDfo+yo07Q9e3k5HFxERkUJW3EWqMpBSwPVkIKwo3tAYcxdwF0BkZGRRvIWIlHFr4pIZuyCW+dsSCfb34Z6eDbm9R32qBvvDvnUw733Y/JN7PVSLIe4DdGt3cDq2iIiIFCEntj+3BVwrsu2qrLXjgHEAHTt2LOi9RUROY63lt5hDjF0Qw4qdyYQF+fJInybc1K0eof7esGM2LH8fdi8Bv4rQ6W7ofDeE1XU6uoiIiBSD4i5SKbifSp0qjIKfVImIFCuXyzJn60E+WBDD+vhUqof488xlLbi2Ux2CTC6s/9q9gcThHRASAX1ehA43Q0Co09FFRESkGBV3kdqMe53UqVoAW4o5i4jI/8vLd/Hrhv18sDCG7QfTiawcxKtDWzG0fQT+Wcmw9A34/RPIPAw128KVn7qn8Xn7Oh1dREREHFDcRWoqMMoY08BauxPAGFMP6A48UcxZRETIzstn0poEPloUy57kTJpUD+ada9oysFVNfJJ3wPSHYP33kJ8NTfpDt39D3e46QFdERKScK+4iNR74NzDFGDMC93qpF4G9wMd/DDLG1AVigZHW2pEnXO8JVAP+OISlozEmHcBaO7FYPoGIlAmZOXl8s3IP45fs5GBaNm1qhzJiYAd6NwvHK24xfPsgxMwBnwBodz10uReqNnY6toiIiJQQxVqkrLUZxpiLgTHAV7g3mZgHPGStTT9hqAG8Aa9TbvEC0POE7+87/t8frxEROavUY7lMWLabz5buIiUzly4NKvPW8LZ0r18Rs/knGP8+HNgIFapBr6eh4+1QoYrTsUVERKSEKfZd+6y1e4ArzzFmNwUUI2vtRUWTSkTKukPp2Xz62y6+Wh5HenYeFzcL575eDekQbmDNFzD1Yzi6H6o1g8HvQaurwDfA6dgiIiJSQjmx/bmISLHZd+QY4xbv5NtVe8jJdzGwVU3+dVFDWgYkw4rX4OuvITcDGlzkLlCNemv9k4iIiJyTipSIlEm7DmXw4cIYflqbgLUwtH0E9/RsSIOsLbDk37DtVzDe0GoYdL0ParRyOrKIiIiUIipSIlKmbNmXxgcLY5i+cT++3l5c37kud/aoS8SBeTDlUYhf5T7zqfuD0OkuCKnldGQREREphVSkRKRMWBOXwgcLYpi3LZFgfx/u7tmQ286vRrUdP8KEa+FIHITVg/5vQtvrwD/Y6cgiIiJSiqlIiUiplZyRw9R1CUxem8CG+FTCgnx5pE8Tbj7Pn5ANn8L4zyErFep0hktfgmYDwcvb6dgiIiJSBqhIiUipkpPnYv62RCZFxbNgWyJ5LkvLWiE8N6gF19Q5QuDqUfDxRLAuaD4Iut4Pdc53OraIiIiUMSpSIlLiWWtZH5/K5Kh4pq7fx5HMXKpV9Oe2HvUZ2iacZmnLYdUbMGcx+FaA8++ALv9yT+UTERERKQIqUiJSYu07coyf1iYwOSqe2KQM/H28uLRlDa5sH0GPsFR81n8F33wLGYlQsRb0fgE63AyBYU5HFxERkTJORUpESpSM7DxmbT7ApKh4lsUexlroVK8yd17QgAHNKxGycwYsGwFxv7m3L2/SD9rf5D7/yVt/pImIiEjx0N86RMRxLpdlxc7DTIpKYMam/WTm5BNZOYgHL2nM0Ha1iczZAVHvwrwfIDsVwurDJc+5d9+rWMPp+CIiIlIOqUiJiGNik9KZHBXPT1EJ7EvNoqK/D4Pb1OLKDrXpWN0Ls2kSTPwS9q8Hb39oMcT99Klud/Dycjq+iIiIlGMqUiJSrI5k5vDLhv1MWhPPur1H8DJwYZNqPDGgOZc2Dydg/+8Q9TRs/gnyjkH189xnP7UerrVPIiIiUmKoSIlIkcvNd7EwOolJa+KZvy2RnHwXzWpU5OkBzRnSthbhXkdh/bfw8QQ4vAP8KkKba9xPn2q1A2Oc/ggiIiIiJ1GREpEiYa1lU0Iak45vWZ6ckUPVYD9u7FqXoe0jaFG9AmbnQpjxHERPB1ce1OkCPR6GlpeDXwWnP4KIiIjIGalIiUihOpiW9f9blm8/mI6ftxd9WlRnaPsILmxSDd+j8bB2LHz3P0iLh6Aq0Pke99Onak2dji8iIiLiERUpEfnHjuXkM3vLASauiWdpzCFcFjrUDePlK87jsla1CPWzED0NvpkAsQvcL2p4MfR9GZoOAB8/Zz+AiIiIyF+kIiUif4vLZVm1O5nJUfFM33iA9Ow8IioFcl+vRgxtX5v6VStAUjQsed69/inzMITUhp6PQ7vroVKk0x9BRERE5G9TkRKRv2T3oQwmR8UzeW0C8SnHqODnzYBWNRnavjad61fGKy/TvePelK9g7wrw8nE/dWp/MzTsBV7eTn8EERERkX9MRUpEzin1WC7TNuxnUlQ8a+JSMAZ6NKrKo5c2pW/LGgT6esG+KJg2EjZOgpyjUKUx9HkR2lwLwdWc/ggiIiIihUpFSkQKlJfvYvGOJCZFJTBny0Fy8lw0Dg/mif7NuLxtBDVCA+BYCqz9FKImwMGN4BMILa9wbxwR2UXblouIiEiZpSIlIifZss+9ZfmUdfs4lJ5NWJAv13WKZGj7CFpFhGKshbjfYO5XsGUK5GdDzbYwcDS0GgYBoU5/BBEREZEipyIlIiQezWLqun1MXBPPtgNH8fU2XNLMvWX5RU3D8fPxgqMH4LdPIeorSNkF/qHuJ0/tb4SabZz+CCIiIiLFSkVKpJzKys1nzpaDTI6KZ/GOQ+S7LG3qVOLFIS25rHUtwir4QX4exMxyT93bPgtsPtTtARc9CS0Gg2+g0x9DRERExBEqUiLliLWWNXEpTIqK59cN+zmalUfN0ADuvrABQ9vXplF4sHtg8k5Y8TWs/R+kH4AK4dDtfmh3I1Rt5OyHEBERESkBVKREyoG9yZlMjkpg8tp44g5nEuTnTb/zanBl+9p0aVAFby8DuVmwcSJEfQm7FoPxgkZ9oP1b0KQvePs6/TFERERESgwVKZEy6mhWLtM37mdSVAKrdiVjDHRtUIUHLm5Mv/NqUMH/+G//g5vdU/c2fO/eha9SJPQaAW2vg9AIZz+EiIiISAmlIiVShuS7LEt2JDE5KoFZmw+QneeiQdUKPNa3KZe3iyCi0vE1TdlHYc0kd4FKWAPeftDsMvfmEfV7gpeXsx9EREREpIRTkRIpA6IPHGVSVDw/r00g8Wg2oYG+XNWxDkPbR9C2TiWMMWAt7P3dPXVv02TIzYBqzaDvq9D6aqhQxemPISIiIlJqqEiJlFKH0rOZum4fk9fGsykhDR8vw0VNwxnWIYJezcLx9/F2D8w4DBu+c29bnrQVfCvAeUOh/c1Qu6MOzRURERH5G1SkREqR7Lx85m1NZHJUPAujk8hzWVpFhPLcoBYMblOLKsH+7oEuF8TOd0/d2zYN8nMgoiMMetddovwrOvtBREREREo5FSmREi41M5fFO5JYEJ3IvK2JpB7LpXqIP7dfUJ+h7WrTtMYJpSg1AdZ9A2snwJE9EBgGHW93H5pbvaVzH0JERESkjFGREilhrLVs2Z/GwugkFmxLJGpPCi4LlYJ8ubhZOFe0i6B7o6ruLcsB8nNh+0z31L2YOWBd7g0jLnnOvYGEb4CzH0hERESkDFKREikB0rJyWbrjEAuiE1kYnUTi0WwAWkWE8u9ejejZNJy2dSr9WZ4ADse6N45Y9y1kJELFmtDjP9DuBqhc36FPIiIiIlI+qEiJOMBay/aD6SyITmTBtkTWxKWQ57JUDPDhwibVuKhJNXo2rUZ4xVOeJuVkwtap7rVPcUvBeEOTfu5tyxv1Bm/9lhYREREpDvpbl0gxycjOY2nMIRZEJ7EwOpH9qVkANK8Zwl0XNuCipuG0j6zE/7V37+FxlfeBx78/SZaNb/gm22B8l5GxA7hZhzvYzpUCDySbkAulS9JcyKbdJU2bTQhtkhK6zTZbaJu2m7Bhs9lAFzaXNslmtxCoZQcWQigFisGWsWV8RzLy/S7p3T/Oka0IgT2akWYkfz/Pc56R3znvmd+B95mZ37zv+Z2a6h73cNq1EdavgOYV0PQQHN4N42dnS/cW3QBjppbhbCRJkk5tJlJSP0kpsa51P41rWli+poVfNu/kSEcno4fXcFn9JG55Wx1LGyYz9fQes04H2rKkqSt5aluftY+qg/lXwaLfgJmXetNcSZKkMjKRkkro4JEOHl+/g+WrW2lsamFT20EAzp4ymg9fOoulDXUsnjmB2ppuSdCR/bDx8SxxWt8I2/8FSFA7BmZdCm/5OMxZCpPP8Z5PkiRJFcJESirShh37jxWJeHz9qxxp7+S0YdVcWj+Rm6+Yy9KGOs4aP/J4h4522PRUljStXwGbfgGdR6FqGEy/EJZ9Iau6N+3NUD2sbOclSZKk12ciJRXo0NEOftHcxvLVLaxoaqV5x34A5tSN4sYLZ7Jsfh1vmTWBEcOqsw4pwSsv5Mv1GmHDY3BkLxAw9Vy46JPZjNOMi6F2VJnOSpIkSYUwkZJOwqa2AzTms06PrdvBoaOdDK+p4uK5E/nwJdmSvZkTuyVB3QtErF+RlSeHrEjEue+DOUtg1hUwamJ5TkiSJElFMZGSenGkvZNfbshmnRqbWnmpZR8AMyaM5AOLp7N0/mQunjPx+KzTgTZY9bNsxqlngYjZS7IZpzlLYNyMcpyOJEmSSsxESspt232QxjWtLF/dwmMv7WD/kQ5qq6u4cM4EPnTBDJY21DFn0igiIruf08vLeykQMRpmXWaBCEmSpCHOREqnrKMdnfzTyztpzO/rtHr7XgCmjTuNd//aNJY1TObiuRMZNbwmKxCx9WlY2ZglT5ufhI4jeYGICywQIUmSdIoxkdIppWXPIRqbssTp50072Hu4nZqq4C2zJvCFq+aztGEy8yaPJgBaXoR//mHvBSIuvNkCEZIkSacwEykNae0dnTyzaVe2ZG9NC6u27gFgytjhXH3eGSxtqOPS+kmMGTEMdm2C9X8Pj66A5pWw75XsIBaIkCRJUg8mUhpyduw7zMqmVpavaWVlUyu7Dx6luir4VzPG89l3NbCsYTLnnDGGOLgzS5h+1miBCEmSJBXEREqDXmdn4rktu7MKe2taeG7LblKCSaOH844FU1jaUMfl9XWcXnMUNv4/eP478JMVsO05XlsgYglMXmCBCEmSJL0hEykNSjv3H2Hl2lYa17SyoqmVtv1HiIBF08fxu28/m2UNk1k4dSRV2/4Z1t8HDzRaIEKSJEklYyKlQaGzM/HCtj0sX93C8jUtPLNpF50Jxo8cxpKz61g2fzKX109iwv510PwwrFwBGx7NC0QAU8+zQIQkSZJKxkRKFWv3waM8unYHy9e0sKKplda9hwE476zT+Z23zmNZQx3njdlL9YaVsO6v4WELREiSJGlgmEipYuw73M6L2/bwyw1tNK5u5Z827qSjMzF2RA1XnF3HsobJLJlezaTWX0DzD+DvGy0QIUmSpLIwkVJZvLrvMKu27sm33azauocNr+4npez5BWeM5ZNL5vC2uWM4v/NFqjf8HTy1An5igQhJkiSVn4mU+lVKiS27Dh5PmrZkSdP2PYeO7TNt3GksPHMs7140jXOnDGfRsJeZ8MoT2Y1wn+xRIGLprdmskwUiJEmSVEYmUiqZjs5E8459rNq6h+fzhOmFbXvYdeAoAFUBc+pGc+GcCfza5GrePLKF+tjCyD3roLUJVq2Gx16G1JkdsKtAxOylMNMCEZIkSaocJlLqk8PtHTRt38fzW3cfW5q3etteDh7tAKC2uor5U0dzfUMtbxm9h/k1WznzyEZq2ppgSxOs3nb8YFXDYGI9nHEenHs9TFkIsy63QIQkSZIqlomUTmjvoaO8cOx6puyappda9tHemV3QNHZ4FZdPPsT7GnbwptpXmNm5ibH71lO1owle3HX8QLWjYdK8bGnepLOhrgEmNcD4WVDtUJQkSdLg4bdX/YrWvYePzTC9kCdNG149AMAw2lk0qo2rx7exaM4rzIkt1B1sZtiu9UTrQWjNDzJyUpYkLXxPnizlSdPYaRaEkCRJ0pBgInWKSimxeefBY0lT10zTK3sOcxqHmBtbuXBMK9eMbOXsaVuZemQjI/dvJDraYUd+kNOnZ0lS/VKoOzubXaprgJETynlqkiRJUr8zkToFtHd0sn7H/ixp2nI8aao+1EZ9bGVe1RauGdXKZ4dt56xxGxlzaHvW8QjQXgMT5sC0BVD3njxZOhsmzoPho8t6XpIkSVK5DHgiFRHTgbuKn9ZyAAAPHElEQVSAdwABPAx8OqW08ST6jgC+AtwIjAOeAT6XUlrZfxEPLoeOdrBm+95jydLzW3aza3szMzo3Ux9baKjexnuGb2NW9RZGjeh2/VLnaTBmHtRdfnxmqa4Bxs+GmtrynZAkSZJUgQY0kYqIkcA/AoeBm4AE3AEsj4jzUkr7T3CIe4Crgc8C64HfBh6MiItTSs/0X+SVaU+3IhAvbn6Vts1rGbZzLXPZzNyqLXyoehtzYyun1Rw81ieNGEfUzYe6a48nTJPOzpbpVVWV8WwkSZKkwWOgZ6Q+DswBGlJKLwFExHPAWuBm4M7X6xgR5wM3AL+VUvp23rYCWAXcDlzbv6GXV8veQ6zasofVm16hbeMLtL+ymnEHmqmPLVwWW/nNqu3U0g75PWrbR51B9ZQGYtLbf+X6pRhVZ8EHSZIkqUgDnUhdCzzRlUQBpJSaI+Ix4DreIJHK+x4FHujWtz0i7gc+HxHDU0qH+ynuAZNSYmPbAdZs2Exr83Mc2vYitTvXcubRTcyLzSyJHVRFVna8s6aKQ6OnUz1lIbVT33e8nPikedSMGFvmM5EkSZKGroFOpBYCP+qlfRVw/Un0bU4pHeilby1Qn/89aLS3d7Dh5XVsXfsc+7esourVJsYdaGZ22sI74/j1S0ejlr2nzyJNupAjZy1gxNRzoK6BqglzGTlsRBnPQJIkSTo1DXQiNQHY2Ut7GzC+iL5dzw8KT/7gLsa+eD9nHt1IfRygPm/fHyNpGzmLfeOX0HHmAibOOpfaM85h2LiZTKiqLmvMkiRJko4rR/nz1EvbyVy0E33pGxGfAD4BMGPGjJN4mf6Xjh6io2o4TVOuZNiUc5g461zOqD+fUWPPYJTXL0mSJEkVb6ATqZ30PnM0nt5nm7prA3rLhMZ3e/41Ukp3A3cDLF68uLdEbMBd+MFbgVvLHYYkSZKkPhroeteryK516mkB8MJJ9J2dl1Dv2fcI8NJru0iSJElS6Q10IvVj4KKImNPVEBGzgEvz507UdxjdilJERA3wAeChoVCxT5IkSdLgMNCJ1H8FNgA/iojrIuJasip+m4Bvdu0UETMjoj0ivtjVlt9w9wHgzyPiYxHxNuB+YDbwpQE8B0mSJEmnuAFNpFJK+4G3Ak3Ad4H7gGbgrSmlfd12DaC6l/g+AnwbuAP4KTAduDKl9HQ/hy5JkiRJxwx41b6U0kbgvSfYZwO9VONLKR0EPpNvkiRJklQWA720T5IkSZIGPRMpSZIkSSqQiZQkSZIkFchESpIkSZIKZCIlSZIkSQUykZIkSZKkAplISZIkSVKBTKQkSZIkqUAmUpIkSZJUIBMpSZIkSSqQiZQkSZIkFchESpIkSZIKZCIlSZIkSQWKlFK5YxgwEdEKvFzuOHKTgB3lDkKDjuNGfeG4UV84btQXjhv1RaWNm5kppboT7XRKJVKVJCKeSiktLnccGlwcN+oLx436wnGjvnDcqC8G67hxaZ8kSZIkFchESpIkSZIKZCJVPneXOwANSo4b9YXjRn3huFFfOG7UF4Ny3HiNlCRJkiQVyBkpSZIkSSqQiVSJRcT0iPh+ROyOiD0R8cOImHGSfUdExNciYltEHIyIxyPiiv6OWeXX13ETEYsj4u6IWB0RByJiY0TcFxGzByJulVcx7zc9jnNrRKSIeLQ/4lRlKXbcRMQ5EfG9iNiRf1atiYhb+jNmlV+R329mRMR38s+oAxHRFBF3RMSo/o5b5RMRZ0XE1/Pvswfyz5lZJ9m3Kv9s2hARhyLi2Yh4b/9GXDgTqRKKiJHAPwLzgZuA3wTmActP8s3iHuDjwBeBa4BtwIMRsah/IlYlKHLcfBBYCPwl8OvA54E3A09FxPR+C1plV4L3m67jzAFuA1r6I05VlmLHTUQsBn4BDAc+BlwF/BlQ3V8xq/yKGTf58w8DVwB/CFwNfAv4PeC/9WPYKr964P3ATuDnBfb9CvBl4K/Ivt88AXwvIq4qZYBFSym5lWgDbgE6gPpubbOBduAzJ+h7PpCAj3RrqwHWAD8u97m5Vey4qeulbSbQCdxe7nNzq8xx0+M4DwLfBBqBR8t9Xm79uxX5flMFrAL+rtzn4TawW5Hj5p3595t39mj/at5/ZLnPz63fxk1Vt78/lo+DWSfRbzJwGPijHu2PAM+V+7y6b85Ilda1wBMppZe6GlJKzcBjwHUn0fco8EC3vu3A/cC7ImJ46cNVhejzuEkptfbS9jLQCkwrcZyqLMW83wAQETeQzWDe2i8RqhIVM26WAguAO/stOlWqYsZNbf64p0f7LrLkPEoVpCpLSqmzj13fRTZu7u3Rfi9wbiVdvmAiVVoLged7aV9F9uFzor7NKaUDvfStJZse1dBUzLh5jYg4h+zXnBeLjEuVrahxExHjgbuA/5BSaitxbKpcxYyby/LHERHxREQcjYiWiPjLiDitpFGq0hQzbh4G1gL/KSIWRMToiHgr2SzXN1JK+0sbqoaAhWQzUi/1aF+VPxb83ai/mEiV1gSydaA9tQHji+jb9byGpmLGza+IiBrgG2QzUvcUH5oqWLHj5mtAE/DfSxiTKl8x4+bM/PEB4CHgHcCfki3Z+dtSBaiK1Odxk1I6RJaEdy0N3Uu2ROt/A79T2jA1REwAdqV8PV83FfeduKbcAQxBvd2Y62SmraOIvhr8SvX//q+AS4CrU0q9fehpaOnTuImIy4F/A7y5lw8qDX19fb/p+vH13pTSF/O/GyOiGvhqRCxIKb1QkghVifr6fjOCLPmeTFakYiNwAVlhrXbg35YwRg0Ng+Y7sYlUae2k9yx5PL3/ktNdG9BbGdHx3Z7X0FTMuDkmIv4E+ARwU0rpoRLFpspVzLj5JtmM5eaIGJe31QDV+b8PppQOlyxSVZJixs2r+ePPerQ/RFY4YBFgIjU0FTNuPkp2fV19Smld3rYyInYDd0fEN1JKz5YsUg0FbcD4iIgeP/ZV3Hdil/aV1iqydZ09LeDEHy6rgNl5idGefY/w2nWiGjqKGTcARMRtZKXPb0kpfbeEsalyFTNuzgE+SfYFqGu7FLgo/9tfiIeuYj+n4LW/FHf9StzXC8tV+YoZN+cCO7slUV2ezB/PKTI2DT2ryG6xMLdHe9e1URXzg42JVGn9GLgovy8LAPmNxy7NnztR32HA9d361gAfAB7y1+EhrZhxQ0T8e+AO4LaU0tf7KUZVnmLGzbJetmfJLiZfBny/9OGqQhQzbv4v2QXgV/Zof1f++FRpQlQFKmbcbCebXehZNOvC/HFLiWLU0PEPZJMIv9Gj/Ubg+bxiZEUIl8eXTn7TuWeBg8AfkP1q9xVgDHBeSmlfvt9MYB3ZfX5u79b/frIPpM8CzWS/Cl8DXJJSenoAT0UDqJhxExEfJLvI+0Hgj3oceo/XKwxdxb7f9HK8RqAmpXTZ6+2jwa8En1NfIrup6p+S3aB1MfAl4IGU0ocH7kw0kIr8nJoFPEeWUP0x2TVSi8nGURNwQRFlslXhIuJ9+Z9vI1sJ8SmyglitKaUV+T7twHdSSh/t1u+rwKeBLwBPk00s3Axcl1L6ycCdwRvzGqkSSintz0t63gV8l2y5wyPAp7veZHJBdhf4njOCHyF7k7kDGEf2pnWlSdTQVuS4uTJvv5LX/kq8gmxduoagErzf6BRUgnFzO1nVtU8Bvw9sI6sA+ZV+Dl1lVMy4SSltiIiLgC+Tfb+ZBGwC7gb+2CRqyPtej3//Tf7Y/TtKdb51dxuwj6xM/lRgDfD+SkqiwBkpSZIkSSqYv1BKkiRJUoFMpCRJkiSpQCZSkiRJklQgEylJkiRJKpCJlCRJkiQVyERKkiRJkgpkIiVJGnAR8eGISBFRP0Cv9+WI8H4fkqSSMZGSJJ0KvgVcXO4gJElDR025A5Akqb+llDYDm8sdhyRp6HBGSpJUkSJiSUQ8EhF7I2J/RDwYEW/qsU9jRDwaEW+PiKcj4kBEPB8R7+6x32uW9kVETUTcGhGrI+JwRGyNiD+LiBHd9pmVL0G8OSJuj4htEbErIn4SEWf1EvPH8zgORsTOiFgREZdExPCIaI2Iu3rp07XMcX7x/9UkSQPFREqSVHEi4mrgEWAfcCNwAzAG+HlETO+x+1zgL4A7gX8NbAO+fxLXX90L/AHwt8DVwJ8AHwXu62XfW4F64LeAW8iWCf7KfhHxn4G7gaeB9+dxrwRmpJQOA98GbuqeqOVuBlaklFafIF5JUgVxaZ8kqRL9BVlycV1XQ0QsB9YDvwd8utu+k4ArUkpr8/2eJkum3g/8x94OHhGXAx8Abkop/Y+8+eGIaAPujYhFKaVnunV5OaV0Q7f+dcDXIuLMlNLWPGn7XeCulNJnuvX7abe//0se+/XAd/PjnAdcBHzoZP6jSJIqhzNSkqSKEhHzyGaZ7suX39VERA1wAHgcuKJHl7VdSRRASqkFaAFmvMHLXAkcAX7Q4zUeyp/v+Ro/7fHvf8kfu17j7WSfqXe/3gumlJqBB8lmoLrcDLQCP3yDWCVJFchESpJUaSbnj/cAR3ts1wATe+zf1ssxDgM9l9D1fI1asqWD3Y/fkj9/otc4nD92vUbX/icqaPE3wKUR8aaIGEW2/O/bKaUjJ+gnSaowLu2TJFWaV/PHW4GHe3m+FEnHq8Ah4PLXeX5rgcfbkT9OA9a8wX7/B9hANhP1LNl1X687iyVJqlwmUpKkSrOGLNlYmFL6aj+9xj8AnwNOTyk9UoLjPQx0Ap8guw6qVymlzoj4JvB5siTu4ZTSuhK8viRpgJlISZLK6cqI2N6jbTfw28CPIqIW+F9kMz5TgEuAjSmlO4t50ZRSY0T8T7LqfncCT5IlQrOAq4DPpZSaCjjeury0+WciYgzwY6ADuABYnVJ6oNvu9wBfBs4H3lvMeUiSysdESpJUTl/vpW1VSulNEXEFcBvwLeA0YDvwBPBAL3364kbg35GVNL+N7LqnDWQFIV4p9GAppd+PiJeATwE3AfuB5zhewKJrv9aIWAGcS5ZwSZIGoUgpnXgvSZJUEhExHtgI/HlK6Q/LHY8kqW+ckZIkaQDk955qILuhbxVZBT9J0iBl+XNJkgbG1cDPya6buimltK3M8UiSiuDSPkmSJEkqkDNSkiRJklQgEylJkiRJKpCJlCRJkiQVyERKkiRJkgpkIiVJkiRJBTKRkiRJkqQC/X9Yv7si18O9EAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "<Figure size 1008x576 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "failures_compas = np.zeros((11, 2))\n", + "\n", + "for r in np.arange(0, 11):\n", + " ## Causal model with logistic regression\n", + " failures_compas[r, 0] = ep([r / 10], compas_dummy, response_column, predict_columns, logreg_c, 0)\n", + " \n", + " ## Causal model with random forest classifier\n", + " failures_compas[r, 1] = ep([r / 10], compas_dummy, response_column, predict_columns, forest_c, 0)\n", + "\n", + "# klassifikaatioille scipy.stats semin kautta error barit xerr ja yerr argumenttien kautta\n", + "\n", + "plt.figure(figsize=(14, 8))\n", + "plt.plot(np.arange(0, 11) / 10, failures_compas[:, 0], label='Causal model, log.')\n", + "plt.plot(np.arange(0, 11) / 10, failures_compas[:, 1], label='Causal model, for.')\n", + "\n", + "plt.title('Failure rate vs. Acceptance rate - COMPAS')\n", + "plt.xlabel('Leniency')\n", + "plt.ylabel('Failure rate')\n", + "plt.legend()\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Of course if leniency is one, then the empirical performance should always converge to the proportion of false positives in the data." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "toc": { + "base_numbering": 1, + "nav_menu": { + "height": "265.233px", + "width": "283.233px" + }, + "number_sections": true, + "sideBar": true, + "skip_h1_title": true, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": true, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "251.4px" + }, + "toc_section_display": true, + "toc_window_display": true + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "position": { + "height": "465.65px", + "left": "843.6px", + "right": "16.2px", + "top": "159px", + "width": "676.2px" + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/analysis_and_scripts/Compas Analysis.ipynb b/analysis_and_scripts/Compas Analysis.ipynb index 43596e7b77ba390700a1d737e29eada1b3696cda..cce5687f22aaa7cd9aedb253b426425ab111eb45 100644 --- a/analysis_and_scripts/Compas Analysis.ipynb +++ b/analysis_and_scripts/Compas Analysis.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": { + "toc": true + }, + "source": [ + "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n", + "<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Loading-the-Data\" data-toc-modified-id=\"Loading-the-Data-1\"><span class=\"toc-item-num\">1 </span>Loading the Data</a></span></li><li><span><a href=\"#Racial-Bias-in-Compas\" data-toc-modified-id=\"Racial-Bias-in-Compas-2\"><span class=\"toc-item-num\">2 </span>Racial Bias in Compas</a></span><ul class=\"toc-item\"><li><span><a href=\"#Risk-of-Violent-Recidivism\" data-toc-modified-id=\"Risk-of-Violent-Recidivism-2.1\"><span class=\"toc-item-num\">2.1 </span>Risk of Violent Recidivism</a></span></li></ul></li><li><span><a href=\"#Predictive-Accuracy-of-COMPAS\" data-toc-modified-id=\"Predictive-Accuracy-of-COMPAS-3\"><span class=\"toc-item-num\">3 </span>Predictive Accuracy of COMPAS</a></span></li><li><span><a href=\"#Directions-of-the-Racial-Bias\" data-toc-modified-id=\"Directions-of-the-Racial-Bias-4\"><span class=\"toc-item-num\">4 </span>Directions of the Racial Bias</a></span></li><li><span><a href=\"#Risk-of-Violent-Recidivism\" data-toc-modified-id=\"Risk-of-Violent-Recidivism-5\"><span class=\"toc-item-num\">5 </span>Risk of Violent Recidivism</a></span></li><li><span><a href=\"#Gender-differences-in-Compas-scores\" data-toc-modified-id=\"Gender-differences-in-Compas-scores-6\"><span class=\"toc-item-num\">6 </span>Gender differences in Compas scores</a></span></li></ul></div>" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -2383,6 +2393,48 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": true, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": true, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false } }, "nbformat": 4, diff --git a/analysis_and_scripts/tree.dot b/analysis_and_scripts/tree.dot new file mode 100644 index 0000000000000000000000000000000000000000..4b473230d4b6e4d43a9a24c1ce13de64589c289f --- /dev/null +++ b/analysis_and_scripts/tree.dot @@ -0,0 +1,609 @@ +digraph Tree { +node [shape=box, style="filled, rounded", color="black", fontname=helvetica] ; +edge [fontname=helvetica] ; +0 [label="X <= 0.06\ngini = 0.42\nsamples = 22638\nvalue = [10938, 24937]\nclass = 1", fillcolor="#399de58f"] ; +1 [label="X <= -0.58\ngini = 0.26\nsamples = 14836\nvalue = [3590, 19957]\nclass = 1", fillcolor="#399de5d1"] ; +0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ; +2 [label="X <= -1.07\ngini = 0.14\nsamples = 8795\nvalue = [1085, 12887]\nclass = 1", fillcolor="#399de5ea"] ; +1 -> 2 ; +3 [label="X <= -1.54\ngini = 0.08\nsamples = 4657\nvalue = [294, 7122]\nclass = 1", fillcolor="#399de5f4"] ; +2 -> 3 ; +4 [label="X <= -1.73\ngini = 0.03\nsamples = 2168\nvalue = [53, 3392]\nclass = 1", fillcolor="#399de5fb"] ; +3 -> 4 ; +5 [label="X <= -2.12\ngini = 0.02\nsamples = 1502\nvalue = [21, 2321]\nclass = 1", fillcolor="#399de5fd"] ; +4 -> 5 ; +6 [label="X <= -2.26\ngini = 0.0\nsamples = 647\nvalue = [2, 998]\nclass = 1", fillcolor="#399de5fe"] ; +5 -> 6 ; +7 [label="gini = 0.0\nsamples = 457\nvalue = [0, 696]\nclass = 1", fillcolor="#399de5ff"] ; +6 -> 7 ; +8 [label="X <= -2.26\ngini = 0.01\nsamples = 190\nvalue = [2, 302]\nclass = 1", fillcolor="#399de5fd"] ; +6 -> 8 ; +9 [label="gini = 0.0\nsamples = 1\nvalue = [2, 0]\nclass = 0", fillcolor="#e58139ff"] ; +8 -> 9 ; +10 [label="gini = 0.0\nsamples = 189\nvalue = [0, 302]\nclass = 1", fillcolor="#399de5ff"] ; +8 -> 10 ; +11 [label="X <= -2.12\ngini = 0.03\nsamples = 855\nvalue = [19, 1323]\nclass = 1", fillcolor="#399de5fb"] ; +5 -> 11 ; +12 [label="gini = 0.0\nsamples = 1\nvalue = [1, 0]\nclass = 0", fillcolor="#e58139ff"] ; +11 -> 12 ; +13 [label="X <= -2.08\ngini = 0.03\nsamples = 854\nvalue = [18, 1323]\nclass = 1", fillcolor="#399de5fc"] ; +11 -> 13 ; +14 [label="gini = 0.09\nsamples = 54\nvalue = [4, 83]\nclass = 1", fillcolor="#399de5f3"] ; +13 -> 14 ; +15 [label="gini = 0.02\nsamples = 800\nvalue = [14, 1240]\nclass = 1", fillcolor="#399de5fc"] ; +13 -> 15 ; +16 [label="X <= -1.73\ngini = 0.06\nsamples = 666\nvalue = [32, 1071]\nclass = 1", fillcolor="#399de5f7"] ; +4 -> 16 ; +17 [label="gini = 0.0\nsamples = 1\nvalue = [3, 0]\nclass = 0", fillcolor="#e58139ff"] ; +16 -> 17 ; +18 [label="X <= -1.57\ngini = 0.05\nsamples = 665\nvalue = [29, 1071]\nclass = 1", fillcolor="#399de5f8"] ; +16 -> 18 ; +19 [label="X <= -1.57\ngini = 0.06\nsamples = 580\nvalue = [29, 934]\nclass = 1", fillcolor="#399de5f7"] ; +18 -> 19 ; +20 [label="gini = 0.06\nsamples = 579\nvalue = [28, 934]\nclass = 1", fillcolor="#399de5f7"] ; +19 -> 20 ; +21 [label="gini = 0.0\nsamples = 1\nvalue = [1, 0]\nclass = 0", fillcolor="#e58139ff"] ; +19 -> 21 ; +22 [label="gini = 0.0\nsamples = 85\nvalue = [0, 137]\nclass = 1", fillcolor="#399de5ff"] ; +18 -> 22 ; +23 [label="X <= -1.54\ngini = 0.11\nsamples = 2489\nvalue = [241, 3730]\nclass = 1", fillcolor="#399de5ef"] ; +3 -> 23 ; +24 [label="gini = 0.0\nsamples = 1\nvalue = [3, 0]\nclass = 0", fillcolor="#e58139ff"] ; +23 -> 24 ; +25 [label="X <= -1.27\ngini = 0.11\nsamples = 2488\nvalue = [238, 3730]\nclass = 1", fillcolor="#399de5ef"] ; +23 -> 25 ; +26 [label="X <= -1.29\ngini = 0.1\nsamples = 1321\nvalue = [107, 1999]\nclass = 1", fillcolor="#399de5f1"] ; +25 -> 26 ; +27 [label="X <= -1.29\ngini = 0.1\nsamples = 1196\nvalue = [106, 1805]\nclass = 1", fillcolor="#399de5f0"] ; +26 -> 27 ; +28 [label="gini = 0.1\nsamples = 1195\nvalue = [104, 1805]\nclass = 1", fillcolor="#399de5f0"] ; +27 -> 28 ; +29 [label="gini = 0.0\nsamples = 1\nvalue = [2, 0]\nclass = 0", fillcolor="#e58139ff"] ; +27 -> 29 ; +30 [label="X <= -1.29\ngini = 0.01\nsamples = 125\nvalue = [1, 194]\nclass = 1", fillcolor="#399de5fe"] ; +26 -> 30 ; +31 [label="gini = 0.0\nsamples = 65\nvalue = [0, 104]\nclass = 1", fillcolor="#399de5ff"] ; +30 -> 31 ; +32 [label="gini = 0.02\nsamples = 60\nvalue = [1, 90]\nclass = 1", fillcolor="#399de5fc"] ; +30 -> 32 ; +33 [label="X <= -1.27\ngini = 0.13\nsamples = 1167\nvalue = [131, 1731]\nclass = 1", fillcolor="#399de5ec"] ; +25 -> 33 ; +34 [label="X <= -1.27\ngini = 0.46\nsamples = 10\nvalue = [5, 9]\nclass = 1", fillcolor="#399de571"] ; +33 -> 34 ; +35 [label="gini = 0.18\nsamples = 8\nvalue = [1, 9]\nclass = 1", fillcolor="#399de5e3"] ; +34 -> 35 ; +36 [label="gini = 0.0\nsamples = 2\nvalue = [4, 0]\nclass = 0", fillcolor="#e58139ff"] ; +34 -> 36 ; +37 [label="X <= -1.1\ngini = 0.13\nsamples = 1157\nvalue = [126, 1722]\nclass = 1", fillcolor="#399de5ec"] ; +33 -> 37 ; +38 [label="gini = 0.14\nsamples = 956\nvalue = [117, 1408]\nclass = 1", fillcolor="#399de5ea"] ; +37 -> 38 ; +39 [label="gini = 0.05\nsamples = 201\nvalue = [9, 314]\nclass = 1", fillcolor="#399de5f8"] ; +37 -> 39 ; +40 [label="X <= -0.86\ngini = 0.21\nsamples = 4138\nvalue = [791, 5765]\nclass = 1", fillcolor="#399de5dc"] ; +2 -> 40 ; +41 [label="X <= -1.07\ngini = 0.17\nsamples = 1657\nvalue = [244, 2387]\nclass = 1", fillcolor="#399de5e5"] ; +40 -> 41 ; +42 [label="gini = 0.0\nsamples = 1\nvalue = [2, 0]\nclass = 0", fillcolor="#e58139ff"] ; +41 -> 42 ; +43 [label="X <= -1.04\ngini = 0.17\nsamples = 1656\nvalue = [242, 2387]\nclass = 1", fillcolor="#399de5e5"] ; +41 -> 43 ; +44 [label="X <= -1.05\ngini = 0.23\nsamples = 204\nvalue = [43, 289]\nclass = 1", fillcolor="#399de5d9"] ; +43 -> 44 ; +45 [label="X <= -1.07\ngini = 0.21\nsamples = 200\nvalue = [40, 287]\nclass = 1", fillcolor="#399de5db"] ; +44 -> 45 ; +46 [label="gini = 0.12\nsamples = 48\nvalue = [5, 70]\nclass = 1", fillcolor="#399de5ed"] ; +45 -> 46 ; +47 [label="gini = 0.24\nsamples = 152\nvalue = [35, 217]\nclass = 1", fillcolor="#399de5d6"] ; +45 -> 47 ; +48 [label="X <= -1.04\ngini = 0.48\nsamples = 4\nvalue = [3, 2]\nclass = 0", fillcolor="#e5813955"] ; +44 -> 48 ; +49 [label="gini = 0.0\nsamples = 1\nvalue = [2, 0]\nclass = 0", fillcolor="#e58139ff"] ; +48 -> 49 ; +50 [label="gini = 0.44\nsamples = 3\nvalue = [1, 2]\nclass = 1", fillcolor="#399de57f"] ; +48 -> 50 ; +51 [label="X <= -1.04\ngini = 0.16\nsamples = 1452\nvalue = [199, 2098]\nclass = 1", fillcolor="#399de5e7"] ; +43 -> 51 ; +52 [label="gini = 0.0\nsamples = 43\nvalue = [0, 69]\nclass = 1", fillcolor="#399de5ff"] ; +51 -> 52 ; +53 [label="X <= -1.04\ngini = 0.16\nsamples = 1409\nvalue = [199, 2029]\nclass = 1", fillcolor="#399de5e6"] ; +51 -> 53 ; +54 [label="gini = 0.0\nsamples = 1\nvalue = [2, 0]\nclass = 0", fillcolor="#e58139ff"] ; +53 -> 54 ; +55 [label="gini = 0.16\nsamples = 1408\nvalue = [197, 2029]\nclass = 1", fillcolor="#399de5e6"] ; +53 -> 55 ; +56 [label="X <= -0.85\ngini = 0.24\nsamples = 2481\nvalue = [547, 3378]\nclass = 1", fillcolor="#399de5d6"] ; +40 -> 56 ; +57 [label="X <= -0.86\ngini = 0.43\nsamples = 52\nvalue = [26, 58]\nclass = 1", fillcolor="#399de58d"] ; +56 -> 57 ; +58 [label="X <= -0.86\ngini = 0.39\nsamples = 49\nvalue = [21, 58]\nclass = 1", fillcolor="#399de5a3"] ; +57 -> 58 ; +59 [label="X <= -0.86\ngini = 0.47\nsamples = 6\nvalue = [5, 3]\nclass = 0", fillcolor="#e5813966"] ; +58 -> 59 ; +60 [label="gini = 0.38\nsamples = 4\nvalue = [1, 3]\nclass = 1", fillcolor="#399de5aa"] ; +59 -> 60 ; +61 [label="gini = 0.0\nsamples = 2\nvalue = [4, 0]\nclass = 0", fillcolor="#e58139ff"] ; +59 -> 61 ; +62 [label="X <= -0.86\ngini = 0.35\nsamples = 43\nvalue = [16, 55]\nclass = 1", fillcolor="#399de5b5"] ; +58 -> 62 ; +63 [label="gini = 0.09\nsamples = 13\nvalue = [1, 21]\nclass = 1", fillcolor="#399de5f3"] ; +62 -> 63 ; +64 [label="gini = 0.42\nsamples = 30\nvalue = [15, 34]\nclass = 1", fillcolor="#399de58e"] ; +62 -> 64 ; +65 [label="gini = 0.0\nsamples = 3\nvalue = [5, 0]\nclass = 0", fillcolor="#e58139ff"] ; +57 -> 65 ; +66 [label="X <= -0.6\ngini = 0.23\nsamples = 2429\nvalue = [521, 3320]\nclass = 1", fillcolor="#399de5d7"] ; +56 -> 66 ; +67 [label="X <= -0.6\ngini = 0.24\nsamples = 2263\nvalue = [500, 3084]\nclass = 1", fillcolor="#399de5d6"] ; +66 -> 67 ; +68 [label="X <= -0.72\ngini = 0.23\nsamples = 2238\nvalue = [481, 3066]\nclass = 1", fillcolor="#399de5d7"] ; +67 -> 68 ; +69 [label="gini = 0.22\nsamples = 1216\nvalue = [238, 1669]\nclass = 1", fillcolor="#399de5db"] ; +68 -> 69 ; +70 [label="gini = 0.25\nsamples = 1022\nvalue = [243, 1397]\nclass = 1", fillcolor="#399de5d3"] ; +68 -> 70 ; +71 [label="X <= -0.6\ngini = 0.5\nsamples = 25\nvalue = [19, 18]\nclass = 0", fillcolor="#e581390d"] ; +67 -> 71 ; +72 [label="gini = 0.48\nsamples = 16\nvalue = [9, 14]\nclass = 1", fillcolor="#399de55b"] ; +71 -> 72 ; +73 [label="gini = 0.41\nsamples = 9\nvalue = [10, 4]\nclass = 0", fillcolor="#e5813999"] ; +71 -> 73 ; +74 [label="X <= -0.59\ngini = 0.15\nsamples = 166\nvalue = [21, 236]\nclass = 1", fillcolor="#399de5e8"] ; +66 -> 74 ; +75 [label="X <= -0.6\ngini = 0.07\nsamples = 58\nvalue = [3, 86]\nclass = 1", fillcolor="#399de5f6"] ; +74 -> 75 ; +76 [label="gini = 0.16\nsamples = 22\nvalue = [3, 32]\nclass = 1", fillcolor="#399de5e7"] ; +75 -> 76 ; +77 [label="gini = 0.0\nsamples = 36\nvalue = [0, 54]\nclass = 1", fillcolor="#399de5ff"] ; +75 -> 77 ; +78 [label="X <= -0.59\ngini = 0.19\nsamples = 108\nvalue = [18, 150]\nclass = 1", fillcolor="#399de5e0"] ; +74 -> 78 ; +79 [label="gini = 0.49\nsamples = 4\nvalue = [4, 3]\nclass = 0", fillcolor="#e5813940"] ; +78 -> 79 ; +80 [label="gini = 0.16\nsamples = 104\nvalue = [14, 147]\nclass = 1", fillcolor="#399de5e7"] ; +78 -> 80 ; +81 [label="X <= -0.31\ngini = 0.39\nsamples = 6041\nvalue = [2505, 7070]\nclass = 1", fillcolor="#399de5a5"] ; +1 -> 81 ; +82 [label="X <= -0.41\ngini = 0.32\nsamples = 2550\nvalue = [808, 3212]\nclass = 1", fillcolor="#399de5bf"] ; +81 -> 82 ; +83 [label="X <= -0.58\ngini = 0.3\nsamples = 1590\nvalue = [454, 2034]\nclass = 1", fillcolor="#399de5c6"] ; +82 -> 83 ; +84 [label="X <= -0.58\ngini = 0.49\nsamples = 9\nvalue = [8, 6]\nclass = 0", fillcolor="#e5813940"] ; +83 -> 84 ; +85 [label="gini = 0.0\nsamples = 1\nvalue = [2, 0]\nclass = 0", fillcolor="#e58139ff"] ; +84 -> 85 ; +86 [label="X <= -0.58\ngini = 0.5\nsamples = 8\nvalue = [6, 6]\nclass = 0", fillcolor="#e5813900"] ; +84 -> 86 ; +87 [label="gini = 0.0\nsamples = 1\nvalue = [0, 2]\nclass = 1", fillcolor="#399de5ff"] ; +86 -> 87 ; +88 [label="X <= -0.58\ngini = 0.48\nsamples = 7\nvalue = [6, 4]\nclass = 0", fillcolor="#e5813955"] ; +86 -> 88 ; +89 [label="gini = 0.0\nsamples = 1\nvalue = [2, 0]\nclass = 0", fillcolor="#e58139ff"] ; +88 -> 89 ; +90 [label="gini = 0.5\nsamples = 6\nvalue = [4, 4]\nclass = 0", fillcolor="#e5813900"] ; +88 -> 90 ; +91 [label="X <= -0.41\ngini = 0.3\nsamples = 1581\nvalue = [446, 2028]\nclass = 1", fillcolor="#399de5c7"] ; +83 -> 91 ; +92 [label="X <= -0.41\ngini = 0.3\nsamples = 1564\nvalue = [446, 2000]\nclass = 1", fillcolor="#399de5c6"] ; +91 -> 92 ; +93 [label="X <= -0.42\ngini = 0.3\nsamples = 1559\nvalue = [440, 1997]\nclass = 1", fillcolor="#399de5c7"] ; +92 -> 93 ; +94 [label="gini = 0.3\nsamples = 1529\nvalue = [435, 1944]\nclass = 1", fillcolor="#399de5c6"] ; +93 -> 94 ; +95 [label="gini = 0.16\nsamples = 30\nvalue = [5, 53]\nclass = 1", fillcolor="#399de5e7"] ; +93 -> 95 ; +96 [label="X <= -0.41\ngini = 0.44\nsamples = 5\nvalue = [6, 3]\nclass = 0", fillcolor="#e581397f"] ; +92 -> 96 ; +97 [label="gini = 0.0\nsamples = 2\nvalue = [4, 0]\nclass = 0", fillcolor="#e58139ff"] ; +96 -> 97 ; +98 [label="gini = 0.48\nsamples = 3\nvalue = [2, 3]\nclass = 1", fillcolor="#399de555"] ; +96 -> 98 ; +99 [label="gini = 0.0\nsamples = 17\nvalue = [0, 28]\nclass = 1", fillcolor="#399de5ff"] ; +91 -> 99 ; +100 [label="X <= -0.4\ngini = 0.36\nsamples = 960\nvalue = [354, 1178]\nclass = 1", fillcolor="#399de5b2"] ; +82 -> 100 ; +101 [label="X <= -0.4\ngini = 0.5\nsamples = 31\nvalue = [18, 22]\nclass = 1", fillcolor="#399de52e"] ; +100 -> 101 ; +102 [label="X <= -0.41\ngini = 0.46\nsamples = 27\nvalue = [12, 21]\nclass = 1", fillcolor="#399de56d"] ; +101 -> 102 ; +103 [label="gini = 0.0\nsamples = 3\nvalue = [3, 0]\nclass = 0", fillcolor="#e58139ff"] ; +102 -> 103 ; +104 [label="X <= -0.4\ngini = 0.42\nsamples = 24\nvalue = [9, 21]\nclass = 1", fillcolor="#399de592"] ; +102 -> 104 ; +105 [label="gini = 0.46\nsamples = 21\nvalue = [9, 16]\nclass = 1", fillcolor="#399de570"] ; +104 -> 105 ; +106 [label="gini = 0.0\nsamples = 3\nvalue = [0, 5]\nclass = 1", fillcolor="#399de5ff"] ; +104 -> 106 ; +107 [label="X <= -0.4\ngini = 0.24\nsamples = 4\nvalue = [6, 1]\nclass = 0", fillcolor="#e58139d4"] ; +101 -> 107 ; +108 [label="gini = 0.0\nsamples = 2\nvalue = [5, 0]\nclass = 0", fillcolor="#e58139ff"] ; +107 -> 108 ; +109 [label="X <= -0.4\ngini = 0.5\nsamples = 2\nvalue = [1, 1]\nclass = 0", fillcolor="#e5813900"] ; +107 -> 109 ; +110 [label="gini = 0.0\nsamples = 1\nvalue = [0, 1]\nclass = 1", fillcolor="#399de5ff"] ; +109 -> 110 ; +111 [label="gini = 0.0\nsamples = 1\nvalue = [1, 0]\nclass = 0", fillcolor="#e58139ff"] ; +109 -> 111 ; +112 [label="X <= -0.4\ngini = 0.35\nsamples = 929\nvalue = [336, 1156]\nclass = 1", fillcolor="#399de5b5"] ; +100 -> 112 ; +113 [label="X <= -0.4\ngini = 0.09\nsamples = 25\nvalue = [2, 42]\nclass = 1", fillcolor="#399de5f3"] ; +112 -> 113 ; +114 [label="X <= -0.4\ngini = 0.38\nsamples = 6\nvalue = [2, 6]\nclass = 1", fillcolor="#399de5aa"] ; +113 -> 114 ; +115 [label="gini = 0.0\nsamples = 4\nvalue = [0, 6]\nclass = 1", fillcolor="#399de5ff"] ; +114 -> 115 ; +116 [label="gini = 0.0\nsamples = 2\nvalue = [2, 0]\nclass = 0", fillcolor="#e58139ff"] ; +114 -> 116 ; +117 [label="gini = 0.0\nsamples = 19\nvalue = [0, 36]\nclass = 1", fillcolor="#399de5ff"] ; +113 -> 117 ; +118 [label="X <= -0.35\ngini = 0.35\nsamples = 904\nvalue = [334, 1114]\nclass = 1", fillcolor="#399de5b3"] ; +112 -> 118 ; +119 [label="X <= -0.35\ngini = 0.39\nsamples = 512\nvalue = [212, 602]\nclass = 1", fillcolor="#399de5a5"] ; +118 -> 119 ; +120 [label="gini = 0.36\nsamples = 485\nvalue = [183, 588]\nclass = 1", fillcolor="#399de5b0"] ; +119 -> 120 ; +121 [label="gini = 0.44\nsamples = 27\nvalue = [29, 14]\nclass = 0", fillcolor="#e5813984"] ; +119 -> 121 ; +122 [label="X <= -0.33\ngini = 0.31\nsamples = 392\nvalue = [122, 512]\nclass = 1", fillcolor="#399de5c2"] ; +118 -> 122 ; +123 [label="gini = 0.24\nsamples = 174\nvalue = [40, 247]\nclass = 1", fillcolor="#399de5d6"] ; +122 -> 123 ; +124 [label="gini = 0.36\nsamples = 218\nvalue = [82, 265]\nclass = 1", fillcolor="#399de5b0"] ; +122 -> 124 ; +125 [label="X <= -0.31\ngini = 0.42\nsamples = 3491\nvalue = [1697, 3858]\nclass = 1", fillcolor="#399de58f"] ; +81 -> 125 ; +126 [label="gini = 0.0\nsamples = 1\nvalue = [4, 0]\nclass = 0", fillcolor="#e58139ff"] ; +125 -> 126 ; +127 [label="X <= -0.04\ngini = 0.42\nsamples = 3490\nvalue = [1693, 3858]\nclass = 1", fillcolor="#399de58f"] ; +125 -> 127 ; +128 [label="X <= -0.05\ngini = 0.42\nsamples = 2579\nvalue = [1217, 2896]\nclass = 1", fillcolor="#399de594"] ; +127 -> 128 ; +129 [label="X <= -0.05\ngini = 0.42\nsamples = 2493\nvalue = [1187, 2780]\nclass = 1", fillcolor="#399de592"] ; +128 -> 129 ; +130 [label="X <= -0.06\ngini = 0.42\nsamples = 2491\nvalue = [1183, 2780]\nclass = 1", fillcolor="#399de592"] ; +129 -> 130 ; +131 [label="gini = 0.42\nsamples = 2432\nvalue = [1144, 2721]\nclass = 1", fillcolor="#399de594"] ; +130 -> 131 ; +132 [label="gini = 0.48\nsamples = 59\nvalue = [39, 59]\nclass = 1", fillcolor="#399de556"] ; +130 -> 132 ; +133 [label="gini = 0.0\nsamples = 2\nvalue = [4, 0]\nclass = 0", fillcolor="#e58139ff"] ; +129 -> 133 ; +134 [label="X <= -0.04\ngini = 0.33\nsamples = 86\nvalue = [30, 116]\nclass = 1", fillcolor="#399de5bd"] ; +128 -> 134 ; +135 [label="X <= -0.04\ngini = 0.34\nsamples = 81\nvalue = [30, 106]\nclass = 1", fillcolor="#399de5b7"] ; +134 -> 135 ; +136 [label="gini = 0.32\nsamples = 80\nvalue = [26, 106]\nclass = 1", fillcolor="#399de5c0"] ; +135 -> 136 ; +137 [label="gini = 0.0\nsamples = 1\nvalue = [4, 0]\nclass = 0", fillcolor="#e58139ff"] ; +135 -> 137 ; +138 [label="gini = 0.0\nsamples = 5\nvalue = [0, 10]\nclass = 1", fillcolor="#399de5ff"] ; +134 -> 138 ; +139 [label="X <= 0.04\ngini = 0.44\nsamples = 911\nvalue = [476, 962]\nclass = 1", fillcolor="#399de581"] ; +127 -> 139 ; +140 [label="X <= 0.03\ngini = 0.46\nsamples = 726\nvalue = [405, 749]\nclass = 1", fillcolor="#399de575"] ; +139 -> 140 ; +141 [label="X <= 0.01\ngini = 0.44\nsamples = 652\nvalue = [342, 698]\nclass = 1", fillcolor="#399de582"] ; +140 -> 141 ; +142 [label="gini = 0.46\nsamples = 511\nvalue = [292, 536]\nclass = 1", fillcolor="#399de574"] ; +141 -> 142 ; +143 [label="gini = 0.36\nsamples = 141\nvalue = [50, 162]\nclass = 1", fillcolor="#399de5b0"] ; +141 -> 143 ; +144 [label="X <= 0.03\ngini = 0.49\nsamples = 74\nvalue = [63, 51]\nclass = 0", fillcolor="#e5813931"] ; +140 -> 144 ; +145 [label="gini = 0.44\nsamples = 30\nvalue = [36, 17]\nclass = 0", fillcolor="#e5813987"] ; +144 -> 145 ; +146 [label="gini = 0.49\nsamples = 44\nvalue = [27, 34]\nclass = 1", fillcolor="#399de534"] ; +144 -> 146 ; +147 [label="X <= 0.05\ngini = 0.38\nsamples = 185\nvalue = [71, 213]\nclass = 1", fillcolor="#399de5aa"] ; +139 -> 147 ; +148 [label="X <= 0.05\ngini = 0.32\nsamples = 85\nvalue = [27, 108]\nclass = 1", fillcolor="#399de5bf"] ; +147 -> 148 ; +149 [label="gini = 0.39\nsamples = 58\nvalue = [24, 67]\nclass = 1", fillcolor="#399de5a4"] ; +148 -> 149 ; +150 [label="gini = 0.13\nsamples = 27\nvalue = [3, 41]\nclass = 1", fillcolor="#399de5ec"] ; +148 -> 150 ; +151 [label="X <= 0.05\ngini = 0.42\nsamples = 100\nvalue = [44, 105]\nclass = 1", fillcolor="#399de594"] ; +147 -> 151 ; +152 [label="gini = 0.0\nsamples = 3\nvalue = [5, 0]\nclass = 0", fillcolor="#e58139ff"] ; +151 -> 152 ; +153 [label="gini = 0.39\nsamples = 97\nvalue = [39, 105]\nclass = 1", fillcolor="#399de5a0"] ; +151 -> 153 ; +154 [label="X <= 0.86\ngini = 0.48\nsamples = 7802\nvalue = [7348, 4980]\nclass = 0", fillcolor="#e5813952"] ; +0 -> 154 [labeldistance=2.5, labelangle=-45, headlabel="False"] ; +155 [label="X <= 0.39\ngini = 0.5\nsamples = 5371\nvalue = [4259, 4246]\nclass = 0", fillcolor="#e5813901"] ; +154 -> 155 ; +156 [label="X <= 0.39\ngini = 0.49\nsamples = 2632\nvalue = [1792, 2365]\nclass = 1", fillcolor="#399de53e"] ; +155 -> 156 ; +157 [label="X <= 0.28\ngini = 0.49\nsamples = 2621\nvalue = [1792, 2344]\nclass = 1", fillcolor="#399de53c"] ; +156 -> 157 ; +158 [label="X <= 0.26\ngini = 0.49\nsamples = 1790\nvalue = [1171, 1652]\nclass = 1", fillcolor="#399de54a"] ; +157 -> 158 ; +159 [label="X <= 0.26\ngini = 0.49\nsamples = 1668\nvalue = [1108, 1516]\nclass = 1", fillcolor="#399de545"] ; +158 -> 159 ; +160 [label="X <= 0.16\ngini = 0.49\nsamples = 1665\nvalue = [1102, 1516]\nclass = 1", fillcolor="#399de546"] ; +159 -> 160 ; +161 [label="gini = 0.48\nsamples = 878\nvalue = [547, 833]\nclass = 1", fillcolor="#399de558"] ; +160 -> 161 ; +162 [label="gini = 0.49\nsamples = 787\nvalue = [555, 683]\nclass = 1", fillcolor="#399de530"] ; +160 -> 162 ; +163 [label="gini = 0.0\nsamples = 3\nvalue = [6, 0]\nclass = 0", fillcolor="#e58139ff"] ; +159 -> 163 ; +164 [label="X <= 0.27\ngini = 0.43\nsamples = 122\nvalue = [63, 136]\nclass = 1", fillcolor="#399de589"] ; +158 -> 164 ; +165 [label="X <= 0.27\ngini = 0.45\nsamples = 112\nvalue = [61, 120]\nclass = 1", fillcolor="#399de57d"] ; +164 -> 165 ; +166 [label="gini = 0.36\nsamples = 45\nvalue = [17, 56]\nclass = 1", fillcolor="#399de5b2"] ; +165 -> 166 ; +167 [label="gini = 0.48\nsamples = 67\nvalue = [44, 64]\nclass = 1", fillcolor="#399de550"] ; +165 -> 167 ; +168 [label="X <= 0.28\ngini = 0.2\nsamples = 10\nvalue = [2, 16]\nclass = 1", fillcolor="#399de5df"] ; +164 -> 168 ; +169 [label="gini = 0.0\nsamples = 4\nvalue = [0, 8]\nclass = 1", fillcolor="#399de5ff"] ; +168 -> 169 ; +170 [label="gini = 0.32\nsamples = 6\nvalue = [2, 8]\nclass = 1", fillcolor="#399de5bf"] ; +168 -> 170 ; +171 [label="X <= 0.32\ngini = 0.5\nsamples = 831\nvalue = [621, 692]\nclass = 1", fillcolor="#399de51a"] ; +157 -> 171 ; +172 [label="X <= 0.32\ngini = 0.5\nsamples = 340\nvalue = [293, 247]\nclass = 0", fillcolor="#e5813928"] ; +171 -> 172 ; +173 [label="X <= 0.29\ngini = 0.5\nsamples = 321\nvalue = [270, 239]\nclass = 0", fillcolor="#e581391d"] ; +172 -> 173 ; +174 [label="gini = 0.47\nsamples = 74\nvalue = [76, 46]\nclass = 0", fillcolor="#e5813965"] ; +173 -> 174 ; +175 [label="gini = 0.5\nsamples = 247\nvalue = [194, 193]\nclass = 0", fillcolor="#e5813901"] ; +173 -> 175 ; +176 [label="X <= 0.32\ngini = 0.38\nsamples = 19\nvalue = [23, 8]\nclass = 0", fillcolor="#e58139a6"] ; +172 -> 176 ; +177 [label="gini = 0.44\nsamples = 16\nvalue = [17, 8]\nclass = 0", fillcolor="#e5813987"] ; +176 -> 177 ; +178 [label="gini = 0.0\nsamples = 3\nvalue = [6, 0]\nclass = 0", fillcolor="#e58139ff"] ; +176 -> 178 ; +179 [label="X <= 0.32\ngini = 0.49\nsamples = 491\nvalue = [328, 445]\nclass = 1", fillcolor="#399de543"] ; +171 -> 179 ; +180 [label="X <= 0.32\ngini = 0.32\nsamples = 30\nvalue = [10, 39]\nclass = 1", fillcolor="#399de5be"] ; +179 -> 180 ; +181 [label="gini = 0.44\nsamples = 17\nvalue = [8, 17]\nclass = 1", fillcolor="#399de587"] ; +180 -> 181 ; +182 [label="gini = 0.15\nsamples = 13\nvalue = [2, 22]\nclass = 1", fillcolor="#399de5e8"] ; +180 -> 182 ; +183 [label="X <= 0.33\ngini = 0.49\nsamples = 461\nvalue = [318, 406]\nclass = 1", fillcolor="#399de537"] ; +179 -> 183 ; +184 [label="gini = 0.47\nsamples = 22\nvalue = [23, 14]\nclass = 0", fillcolor="#e5813964"] ; +183 -> 184 ; +185 [label="gini = 0.49\nsamples = 439\nvalue = [295, 392]\nclass = 1", fillcolor="#399de53f"] ; +183 -> 185 ; +186 [label="gini = 0.0\nsamples = 11\nvalue = [0, 21]\nclass = 1", fillcolor="#399de5ff"] ; +156 -> 186 ; +187 [label="X <= 0.67\ngini = 0.49\nsamples = 2739\nvalue = [2467, 1881]\nclass = 0", fillcolor="#e581393d"] ; +155 -> 187 ; +188 [label="X <= 0.67\ngini = 0.5\nsamples = 1835\nvalue = [1593, 1352]\nclass = 0", fillcolor="#e5813927"] ; +187 -> 188 ; +189 [label="X <= 0.67\ngini = 0.5\nsamples = 1822\nvalue = [1589, 1330]\nclass = 0", fillcolor="#e581392a"] ; +188 -> 189 ; +190 [label="X <= 0.39\ngini = 0.5\nsamples = 1818\nvalue = [1581, 1330]\nclass = 0", fillcolor="#e5813928"] ; +189 -> 190 ; +191 [label="X <= 0.39\ngini = 0.28\nsamples = 11\nvalue = [15, 3]\nclass = 0", fillcolor="#e58139cc"] ; +190 -> 191 ; +192 [label="gini = 0.38\nsamples = 8\nvalue = [9, 3]\nclass = 0", fillcolor="#e58139aa"] ; +191 -> 192 ; +193 [label="gini = 0.0\nsamples = 3\nvalue = [6, 0]\nclass = 0", fillcolor="#e58139ff"] ; +191 -> 193 ; +194 [label="X <= 0.39\ngini = 0.5\nsamples = 1807\nvalue = [1566, 1327]\nclass = 0", fillcolor="#e5813927"] ; +190 -> 194 ; +195 [label="gini = 0.0\nsamples = 4\nvalue = [0, 6]\nclass = 1", fillcolor="#399de5ff"] ; +194 -> 195 ; +196 [label="gini = 0.5\nsamples = 1803\nvalue = [1566, 1321]\nclass = 0", fillcolor="#e5813928"] ; +194 -> 196 ; +197 [label="gini = 0.0\nsamples = 4\nvalue = [8, 0]\nclass = 0", fillcolor="#e58139ff"] ; +189 -> 197 ; +198 [label="X <= 0.67\ngini = 0.26\nsamples = 13\nvalue = [4, 22]\nclass = 1", fillcolor="#399de5d1"] ; +188 -> 198 ; +199 [label="X <= 0.67\ngini = 0.17\nsamples = 11\nvalue = [2, 20]\nclass = 1", fillcolor="#399de5e6"] ; +198 -> 199 ; +200 [label="X <= 0.67\ngini = 0.44\nsamples = 2\nvalue = [1, 2]\nclass = 1", fillcolor="#399de57f"] ; +199 -> 200 ; +201 [label="gini = 0.0\nsamples = 1\nvalue = [0, 2]\nclass = 1", fillcolor="#399de5ff"] ; +200 -> 201 ; +202 [label="gini = 0.0\nsamples = 1\nvalue = [1, 0]\nclass = 0", fillcolor="#e58139ff"] ; +200 -> 202 ; +203 [label="X <= 0.67\ngini = 0.1\nsamples = 9\nvalue = [1, 18]\nclass = 1", fillcolor="#399de5f1"] ; +199 -> 203 ; +204 [label="gini = 0.2\nsamples = 5\nvalue = [1, 8]\nclass = 1", fillcolor="#399de5df"] ; +203 -> 204 ; +205 [label="gini = 0.0\nsamples = 4\nvalue = [0, 10]\nclass = 1", fillcolor="#399de5ff"] ; +203 -> 205 ; +206 [label="X <= 0.67\ngini = 0.5\nsamples = 2\nvalue = [2, 2]\nclass = 0", fillcolor="#e5813900"] ; +198 -> 206 ; +207 [label="gini = 0.0\nsamples = 1\nvalue = [2, 0]\nclass = 0", fillcolor="#e58139ff"] ; +206 -> 207 ; +208 [label="gini = 0.0\nsamples = 1\nvalue = [0, 2]\nclass = 1", fillcolor="#399de5ff"] ; +206 -> 208 ; +209 [label="X <= 0.85\ngini = 0.47\nsamples = 904\nvalue = [874, 529]\nclass = 0", fillcolor="#e5813965"] ; +187 -> 209 ; +210 [label="X <= 0.67\ngini = 0.47\nsamples = 884\nvalue = [863, 508]\nclass = 0", fillcolor="#e5813969"] ; +209 -> 210 ; +211 [label="gini = 0.0\nsamples = 8\nvalue = [16, 0]\nclass = 0", fillcolor="#e58139ff"] ; +210 -> 211 ; +212 [label="X <= 0.67\ngini = 0.47\nsamples = 876\nvalue = [847, 508]\nclass = 0", fillcolor="#e5813966"] ; +210 -> 212 ; +213 [label="gini = 0.0\nsamples = 3\nvalue = [0, 7]\nclass = 1", fillcolor="#399de5ff"] ; +212 -> 213 ; +214 [label="X <= 0.68\ngini = 0.47\nsamples = 873\nvalue = [847, 501]\nclass = 0", fillcolor="#e5813968"] ; +212 -> 214 ; +215 [label="gini = 0.33\nsamples = 40\nvalue = [52, 14]\nclass = 0", fillcolor="#e58139ba"] ; +214 -> 215 ; +216 [label="gini = 0.47\nsamples = 833\nvalue = [795, 487]\nclass = 0", fillcolor="#e5813963"] ; +214 -> 216 ; +217 [label="X <= 0.85\ngini = 0.45\nsamples = 20\nvalue = [11, 21]\nclass = 1", fillcolor="#399de579"] ; +209 -> 217 ; +218 [label="gini = 0.0\nsamples = 3\nvalue = [0, 7]\nclass = 1", fillcolor="#399de5ff"] ; +217 -> 218 ; +219 [label="X <= 0.86\ngini = 0.49\nsamples = 17\nvalue = [11, 14]\nclass = 1", fillcolor="#399de537"] ; +217 -> 219 ; +220 [label="X <= 0.86\ngini = 0.5\nsamples = 15\nvalue = [11, 11]\nclass = 0", fillcolor="#e5813900"] ; +219 -> 220 ; +221 [label="gini = 0.5\nsamples = 14\nvalue = [9, 11]\nclass = 1", fillcolor="#399de52e"] ; +220 -> 221 ; +222 [label="gini = 0.0\nsamples = 1\nvalue = [2, 0]\nclass = 0", fillcolor="#e58139ff"] ; +220 -> 222 ; +223 [label="gini = 0.0\nsamples = 2\nvalue = [0, 3]\nclass = 1", fillcolor="#399de5ff"] ; +219 -> 223 ; +224 [label="X <= 1.56\ngini = 0.31\nsamples = 2431\nvalue = [3089, 734]\nclass = 0", fillcolor="#e58139c2"] ; +154 -> 224 ; +225 [label="X <= 1.08\ngini = 0.35\nsamples = 1849\nvalue = [2240, 663]\nclass = 0", fillcolor="#e58139b4"] ; +224 -> 225 ; +226 [label="X <= 1.06\ngini = 0.4\nsamples = 824\nvalue = [942, 359]\nclass = 0", fillcolor="#e581399e"] ; +225 -> 226 ; +227 [label="X <= 0.99\ngini = 0.39\nsamples = 756\nvalue = [877, 318]\nclass = 0", fillcolor="#e58139a3"] ; +226 -> 227 ; +228 [label="X <= 0.99\ngini = 0.42\nsamples = 510\nvalue = [573, 241]\nclass = 0", fillcolor="#e5813994"] ; +227 -> 228 ; +229 [label="X <= 0.86\ngini = 0.41\nsamples = 508\nvalue = [573, 236]\nclass = 0", fillcolor="#e5813996"] ; +228 -> 229 ; +230 [label="gini = 0.0\nsamples = 12\nvalue = [18, 0]\nclass = 0", fillcolor="#e58139ff"] ; +229 -> 230 ; +231 [label="gini = 0.42\nsamples = 496\nvalue = [555, 236]\nclass = 0", fillcolor="#e5813993"] ; +229 -> 231 ; +232 [label="gini = 0.0\nsamples = 2\nvalue = [0, 5]\nclass = 1", fillcolor="#399de5ff"] ; +228 -> 232 ; +233 [label="X <= 1.0\ngini = 0.32\nsamples = 246\nvalue = [304, 77]\nclass = 0", fillcolor="#e58139be"] ; +227 -> 233 ; +234 [label="X <= 0.99\ngini = 0.19\nsamples = 56\nvalue = [76, 9]\nclass = 0", fillcolor="#e58139e1"] ; +233 -> 234 ; +235 [label="gini = 0.31\nsamples = 29\nvalue = [34, 8]\nclass = 0", fillcolor="#e58139c3"] ; +234 -> 235 ; +236 [label="gini = 0.05\nsamples = 27\nvalue = [42, 1]\nclass = 0", fillcolor="#e58139f9"] ; +234 -> 236 ; +237 [label="X <= 1.05\ngini = 0.35\nsamples = 190\nvalue = [228, 68]\nclass = 0", fillcolor="#e58139b3"] ; +233 -> 237 ; +238 [label="gini = 0.38\nsamples = 168\nvalue = [193, 66]\nclass = 0", fillcolor="#e58139a8"] ; +237 -> 238 ; +239 [label="gini = 0.1\nsamples = 22\nvalue = [35, 2]\nclass = 0", fillcolor="#e58139f0"] ; +237 -> 239 ; +240 [label="X <= 1.06\ngini = 0.47\nsamples = 68\nvalue = [65, 41]\nclass = 0", fillcolor="#e581395e"] ; +226 -> 240 ; +241 [label="gini = 0.0\nsamples = 1\nvalue = [0, 2]\nclass = 1", fillcolor="#399de5ff"] ; +240 -> 241 ; +242 [label="X <= 1.06\ngini = 0.47\nsamples = 67\nvalue = [65, 39]\nclass = 0", fillcolor="#e5813966"] ; +240 -> 242 ; +243 [label="gini = 0.0\nsamples = 1\nvalue = [3, 0]\nclass = 0", fillcolor="#e58139ff"] ; +242 -> 243 ; +244 [label="X <= 1.06\ngini = 0.47\nsamples = 66\nvalue = [62, 39]\nclass = 0", fillcolor="#e581395f"] ; +242 -> 244 ; +245 [label="gini = 0.47\nsamples = 10\nvalue = [5, 8]\nclass = 1", fillcolor="#399de560"] ; +244 -> 245 ; +246 [label="gini = 0.46\nsamples = 56\nvalue = [57, 31]\nclass = 0", fillcolor="#e5813974"] ; +244 -> 246 ; +247 [label="X <= 1.51\ngini = 0.31\nsamples = 1025\nvalue = [1298, 304]\nclass = 0", fillcolor="#e58139c3"] ; +225 -> 247 ; +248 [label="X <= 1.33\ngini = 0.3\nsamples = 966\nvalue = [1241, 273]\nclass = 0", fillcolor="#e58139c7"] ; +247 -> 248 ; +249 [label="X <= 1.32\ngini = 0.32\nsamples = 654\nvalue = [814, 204]\nclass = 0", fillcolor="#e58139bf"] ; +248 -> 249 ; +250 [label="X <= 1.09\ngini = 0.31\nsamples = 637\nvalue = [798, 192]\nclass = 0", fillcolor="#e58139c2"] ; +249 -> 250 ; +251 [label="gini = 0.13\nsamples = 39\nvalue = [51, 4]\nclass = 0", fillcolor="#e58139eb"] ; +250 -> 251 ; +252 [label="gini = 0.32\nsamples = 598\nvalue = [747, 188]\nclass = 0", fillcolor="#e58139bf"] ; +250 -> 252 ; +253 [label="X <= 1.32\ngini = 0.49\nsamples = 17\nvalue = [16, 12]\nclass = 0", fillcolor="#e5813940"] ; +249 -> 253 ; +254 [label="gini = 0.0\nsamples = 2\nvalue = [0, 5]\nclass = 1", fillcolor="#399de5ff"] ; +253 -> 254 ; +255 [label="gini = 0.42\nsamples = 15\nvalue = [16, 7]\nclass = 0", fillcolor="#e581398f"] ; +253 -> 255 ; +256 [label="X <= 1.37\ngini = 0.24\nsamples = 312\nvalue = [427, 69]\nclass = 0", fillcolor="#e58139d6"] ; +248 -> 256 ; +257 [label="X <= 1.34\ngini = 0.12\nsamples = 80\nvalue = [121, 8]\nclass = 0", fillcolor="#e58139ee"] ; +256 -> 257 ; +258 [label="gini = 0.28\nsamples = 17\nvalue = [20, 4]\nclass = 0", fillcolor="#e58139cc"] ; +257 -> 258 ; +259 [label="gini = 0.07\nsamples = 63\nvalue = [101, 4]\nclass = 0", fillcolor="#e58139f5"] ; +257 -> 259 ; +260 [label="X <= 1.37\ngini = 0.28\nsamples = 232\nvalue = [306, 61]\nclass = 0", fillcolor="#e58139cc"] ; +256 -> 260 ; +261 [label="gini = 0.0\nsamples = 1\nvalue = [0, 2]\nclass = 1", fillcolor="#399de5ff"] ; +260 -> 261 ; +262 [label="gini = 0.27\nsamples = 231\nvalue = [306, 59]\nclass = 0", fillcolor="#e58139ce"] ; +260 -> 262 ; +263 [label="X <= 1.52\ngini = 0.46\nsamples = 59\nvalue = [57, 31]\nclass = 0", fillcolor="#e5813974"] ; +247 -> 263 ; +264 [label="gini = 0.0\nsamples = 2\nvalue = [0, 6]\nclass = 1", fillcolor="#399de5ff"] ; +263 -> 264 ; +265 [label="X <= 1.56\ngini = 0.42\nsamples = 57\nvalue = [57, 25]\nclass = 0", fillcolor="#e581398f"] ; +263 -> 265 ; +266 [label="X <= 1.52\ngini = 0.4\nsamples = 55\nvalue = [57, 22]\nclass = 0", fillcolor="#e581399d"] ; +265 -> 266 ; +267 [label="gini = 0.0\nsamples = 5\nvalue = [7, 0]\nclass = 0", fillcolor="#e58139ff"] ; +266 -> 267 ; +268 [label="gini = 0.42\nsamples = 50\nvalue = [50, 22]\nclass = 0", fillcolor="#e581398f"] ; +266 -> 268 ; +269 [label="gini = 0.0\nsamples = 2\nvalue = [0, 3]\nclass = 1", fillcolor="#399de5ff"] ; +265 -> 269 ; +270 [label="X <= 1.83\ngini = 0.14\nsamples = 582\nvalue = [849, 71]\nclass = 0", fillcolor="#e58139ea"] ; +224 -> 270 ; +271 [label="X <= 1.83\ngini = 0.2\nsamples = 304\nvalue = [426, 54]\nclass = 0", fillcolor="#e58139df"] ; +270 -> 271 ; +272 [label="X <= 1.63\ngini = 0.2\nsamples = 303\nvalue = [426, 53]\nclass = 0", fillcolor="#e58139df"] ; +271 -> 272 ; +273 [label="X <= 1.58\ngini = 0.09\nsamples = 92\nvalue = [133, 7]\nclass = 0", fillcolor="#e58139f2"] ; +272 -> 273 ; +274 [label="X <= 1.58\ngini = 0.23\nsamples = 22\nvalue = [33, 5]\nclass = 0", fillcolor="#e58139d8"] ; +273 -> 274 ; +275 [label="gini = 0.15\nsamples = 21\nvalue = [33, 3]\nclass = 0", fillcolor="#e58139e8"] ; +274 -> 275 ; +276 [label="gini = 0.0\nsamples = 1\nvalue = [0, 2]\nclass = 1", fillcolor="#399de5ff"] ; +274 -> 276 ; +277 [label="X <= 1.6\ngini = 0.04\nsamples = 70\nvalue = [100, 2]\nclass = 0", fillcolor="#e58139fa"] ; +273 -> 277 ; +278 [label="gini = 0.11\nsamples = 25\nvalue = [32, 2]\nclass = 0", fillcolor="#e58139ef"] ; +277 -> 278 ; +279 [label="gini = 0.0\nsamples = 45\nvalue = [68, 0]\nclass = 0", fillcolor="#e58139ff"] ; +277 -> 279 ; +280 [label="X <= 1.63\ngini = 0.23\nsamples = 211\nvalue = [293, 46]\nclass = 0", fillcolor="#e58139d7"] ; +272 -> 280 ; +281 [label="gini = 0.0\nsamples = 1\nvalue = [0, 1]\nclass = 1", fillcolor="#399de5ff"] ; +280 -> 281 ; +282 [label="X <= 1.66\ngini = 0.23\nsamples = 210\nvalue = [293, 45]\nclass = 0", fillcolor="#e58139d8"] ; +280 -> 282 ; +283 [label="gini = 0.35\nsamples = 33\nvalue = [45, 13]\nclass = 0", fillcolor="#e58139b5"] ; +282 -> 283 ; +284 [label="gini = 0.2\nsamples = 177\nvalue = [248, 32]\nclass = 0", fillcolor="#e58139de"] ; +282 -> 284 ; +285 [label="gini = 0.0\nsamples = 1\nvalue = [0, 1]\nclass = 1", fillcolor="#399de5ff"] ; +271 -> 285 ; +286 [label="X <= 2.08\ngini = 0.07\nsamples = 278\nvalue = [423, 17]\nclass = 0", fillcolor="#e58139f5"] ; +270 -> 286 ; +287 [label="X <= 1.95\ngini = 0.03\nsamples = 127\nvalue = [195, 3]\nclass = 0", fillcolor="#e58139fb"] ; +286 -> 287 ; +288 [label="gini = 0.0\nsamples = 64\nvalue = [92, 0]\nclass = 0", fillcolor="#e58139ff"] ; +287 -> 288 ; +289 [label="X <= 1.95\ngini = 0.06\nsamples = 63\nvalue = [103, 3]\nclass = 0", fillcolor="#e58139f8"] ; +287 -> 289 ; +290 [label="gini = 0.0\nsamples = 1\nvalue = [0, 1]\nclass = 1", fillcolor="#399de5ff"] ; +289 -> 290 ; +291 [label="X <= 1.96\ngini = 0.04\nsamples = 62\nvalue = [103, 2]\nclass = 0", fillcolor="#e58139fa"] ; +289 -> 291 ; +292 [label="gini = 0.28\nsamples = 5\nvalue = [5, 1]\nclass = 0", fillcolor="#e58139cc"] ; +291 -> 292 ; +293 [label="gini = 0.02\nsamples = 57\nvalue = [98, 1]\nclass = 0", fillcolor="#e58139fc"] ; +291 -> 293 ; +294 [label="X <= 2.09\ngini = 0.11\nsamples = 151\nvalue = [228, 14]\nclass = 0", fillcolor="#e58139ef"] ; +286 -> 294 ; +295 [label="gini = 0.0\nsamples = 1\nvalue = [0, 1]\nclass = 1", fillcolor="#399de5ff"] ; +294 -> 295 ; +296 [label="X <= 2.31\ngini = 0.1\nsamples = 150\nvalue = [228, 13]\nclass = 0", fillcolor="#e58139f0"] ; +294 -> 296 ; +297 [label="X <= 2.3\ngini = 0.19\nsamples = 70\nvalue = [100, 12]\nclass = 0", fillcolor="#e58139e0"] ; +296 -> 297 ; +298 [label="gini = 0.07\nsamples = 68\nvalue = [100, 4]\nclass = 0", fillcolor="#e58139f5"] ; +297 -> 298 ; +299 [label="gini = 0.0\nsamples = 2\nvalue = [0, 8]\nclass = 1", fillcolor="#399de5ff"] ; +297 -> 299 ; +300 [label="X <= 2.55\ngini = 0.02\nsamples = 80\nvalue = [128, 1]\nclass = 0", fillcolor="#e58139fd"] ; +296 -> 300 ; +301 [label="gini = 0.03\nsamples = 41\nvalue = [63, 1]\nclass = 0", fillcolor="#e58139fb"] ; +300 -> 301 ; +302 [label="gini = 0.0\nsamples = 39\nvalue = [65, 0]\nclass = 0", fillcolor="#e58139ff"] ; +300 -> 302 ; +} \ No newline at end of file diff --git a/analysis_and_scripts/tree.png b/analysis_and_scripts/tree.png new file mode 100644 index 0000000000000000000000000000000000000000..ebfc7534aaecc522bbd0dd5f4043c3c9be8cf6dd Binary files /dev/null and b/analysis_and_scripts/tree.png differ diff --git a/figures/tulos_kuva_placeholder_en.png b/figures/tulos_kuva_placeholder_en.png new file mode 100644 index 0000000000000000000000000000000000000000..9ee6778167653a150cc23e0474718b5daeccc876 Binary files /dev/null and b/figures/tulos_kuva_placeholder_en.png differ diff --git a/figures/valikoitumis_iso.jpg b/figures/valikoitumis_iso.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6953c10e643b324b236fc11cb11c9104e3438ee7 Binary files /dev/null and b/figures/valikoitumis_iso.jpg differ diff --git a/figures/valikoitumisharha.png b/figures/valikoitumisharha.png new file mode 100644 index 0000000000000000000000000000000000000000..c3549722c72ab791cbea7f340a282b4594394c5f Binary files /dev/null and b/figures/valikoitumisharha.png differ diff --git a/figures/valikoitumisharha_kaaavio.drawio b/figures/valikoitumisharha_kaaavio.drawio new file mode 100644 index 0000000000000000000000000000000000000000..540943174f9f93d70c0e852e09b85d37f903e685 --- /dev/null +++ b/figures/valikoitumisharha_kaaavio.drawio @@ -0,0 +1 @@ +<mxfile modified="2019-04-21T15:58:55.500Z" host="www.draw.io" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0" etag="NAxnU8qYrcP3EQhgaOI9" version="10.6.3" type="device"><diagram id="3qIiofentZYx9hMsOPwP" name="Page-1">7VpZc+MoEP41qpp9yJaErvjRcTKT2a05qrJXHlmLsRhLwoOQj/31CxJIQsfasWXF2fJLLBpoaPi+7gZi2LN4+4HCVfiJBCgygBlsDfveAMAyLZP/CMmukLieWwgWFAeyUSV4wv8g1VNKMxygVGvICIkYXunCOUkSNGeaDFJKNnqzbyTSR13BBWoJnuYwakv/xAELC+kt8Cv5I8KLUI1seZOiJoaqsbQkDWFANjWR/WDYM0oIK77i7QxFYvHUuhT93vfUlhOjKGGHdPjds25+QRb+uHx+frynn39zl3/cSC1rGGXSYAN4Edd3941wtXzWbCeXwvuREVVxk+YbNeUNLH+1rSr510L+5lr+VoKvxsw2pk7xl4mfO2+Z8qmjJf5eSBNDWFD0w6rfdyXhhuFKuxmiZImjQo9U3ewqbK+prCtYknSJ1lCsX8e80q6JMCVJs9IoXNNdE/dMuVwT2tGwq3OxBUoMtN0AlGRJgMTeWrx6E2KGnlZwLmo3nIpcFrI4ktVC03sY40iwcEbiVcYQFSDlfKVJqb4OJoUMRBna1kQSXB8QiRGjO95E1toK6JLp1q0sbyreAEfKwhpnPCmDkqqLUnWFZv4hAf0CcIMzgfuxhr1O0OIeBLAsIp3g2p0KrjeKGeABDTO2BVqYsdwOzDjnwoxzxcyFY8ZRGcTFYMbtwExjFVESTEU2wkvzCKYpnusLh7aY/cW/zZ9dWXqu1dwL001V2KlCwidfdPJdVX6uV1b98pLq2LsJKcnoHO1PFxikC8T2e14UaMlVe0trW9a1Y0pGUQQZXuspWdc2yhG+EpxzVUWmiR6ZWkgo7Ja96ilUU5HTUDRpKCoWpqUoR1Vp9vFA814daODCgOZcNtDcY4HW9HHmuEDze6NggNcHBUGPB8E8yDTi4KedjIEM4QQl/Rl4GZ3yEfWYdeokarH0BC3v9CPBCZp6w7aZqzOtn+pnhwEM70kEtKVuOBYenZnuSlJGyRLNSEQolyQkQWImOIoaIhjhRSL8kTjtcfmdiPWYH7GnsiLGQSCG6cwuqvzDlIbKSwLLe6UkFbgHJhxN9g+WcNyejZ4vTnb7FP2KUXQwxfUEdAzmn2zfW3Mh5kgu5JCtvFjP8p+epOl6BjnK9OQLr+ZZJoN4lhZoviQJTlkWF/5gKIIPQMo2I3fHMHKciH4N54OEc1snne13hHPTH5F06gJjaNY9yLhLruwbIBhe2TfIK4Ht72efZY/JPvtM7BM8COEaYsayOt6GznPfMBEto/t9rhx8BZPjR2cQ15aiHLrn4tzsmWs+0c/TPWwvZjrY+l8d1Gs5qNbzQpeDGjUntwZ9XwBH3fuOdO0rDdt77SuPKZdy7asSOAUZdUH00mtf4OrYc0BD0Zmvfa1BHxj8/wPS1NZeCtT8yTBQs4GO2dGh1nXTMO5b1khIcw5Fmn1RSHPdxjHZOxJprms3AupQb1m8WP37XNG8+idE++Ff</diagram></mxfile> \ No newline at end of file diff --git a/viitteet.bib b/viitteet.bib index 31e1d8224066520dd6310eabd42866cbe05b847c..b51b30e246f98fababfce1c120e195349115ca4f 100644 --- a/viitteet.bib +++ b/viitteet.bib @@ -140,4 +140,33 @@ year = "2016", language={finnish}, note = {viitattu 5.4.2019} -} \ No newline at end of file +} + +@article{madras18, + title={Fairness Through Causal Awareness: Learning Latent-Variable Models for Biased Data}, + author={Madras, David and Creager, Elliot and Pitassi, Toniann and Zemel, Richard}, + journal={arXiv preprint arXiv:1809.02519}, + year={2018}, + language={finnish} +} + +@booklet{tira, + author = "Jyrki Kivinen", + title = "Tietorakenteet ja algoritmit", + year = "2018", + month = "Kevät", + note = "Samannimisen kurssin kurssimateriaali", + language={finnish} +} + +@article{scikit-learn, + title={Scikit-learn: Machine Learning in {P}ython}, + author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. + and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. + and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and + Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, + journal={Journal of Machine Learning Research}, + volume={12}, + pages={2825--2830}, + year={2011} +} \ No newline at end of file