Skip to content
Snippets Groups Projects
stan_modelling_theoretic.py 40.7 KiB
Newer Older
  • Learn to ignore specific revisions
  • Riku-Laine's avatar
    Riku-Laine committed
                    'X',
                    'decision_T',
                    'result_Y',
                    r / 10,
                    adjusted=True)
    
                # Human evaluation
    
                f_rate_human[i, r - 1] = humanEvaluationEvaluator(
                    df_labeled, 'judgeID_J', 'decision_T', 'result_Y',
                    'acceptanceRate_R', r / 10)
    
                # Contraction
    
                f_rate_cont[i, r - 1] = contractionEvaluator(
                    df_labeled, 'X', 'judgeID_J', 'decision_T', 'result_Y',
                    'acceptanceRate_R', r / 10)
    
                # Causal model - analytic solution
    
                f_rate_caus[i, r - 1] = monteCarloEvaluator(
                    df_labeled, 'X', 'decision_T', 'result_Y', 'acceptanceRate_R',
                    r / 10)
    
        failure_rates[:, 0] = np.mean(f_rate_true, axis=0)
        failure_rates[:, 1] = np.mean(f_rate_label, axis=0)
        failure_rates[:, 2] = np.mean(f_rate_label_adj, axis=0)
        failure_rates[:, 3] = np.mean(f_rate_human, axis=0)
        failure_rates[:, 4] = np.mean(f_rate_cont, axis=0)
        failure_rates[:, 5] = np.mean(f_rate_caus, axis=0)
        #failure_rates[:, 6] = f_rate_bayes
    
        failure_sems[:, 0] = scs.sem(f_rate_true, axis=0)
        failure_sems[:, 1] = scs.sem(f_rate_label, axis=0)
        failure_sems[:, 2] = scs.sem(f_rate_label_adj, axis=0)
        failure_sems[:, 3] = scs.sem(f_rate_human, axis=0)
        failure_sems[:, 4] = scs.sem(f_rate_cont, axis=0)
        failure_sems[:, 5] = scs.sem(f_rate_caus, axis=0)
        failure_sems[:, 6] = scs.sem(f_rate_bayes, axis=0, nan_policy='omit')
    
        x_ax = np.arange(0.1, 0.9, 0.1)
    
        labels = [
            'True Evaluation', 'Labeled outcomes', 'Labeled outcomes, adj.',
            'Human evaluation', 'Contraction', 'Analytic solution', 'Potential outcomes'
        ]
        colours = ['g', 'magenta', 'darkviolet', 'r', 'b', 'k', 'c']
    
        for i in range(failure_rates.shape[1]):
            plt.errorbar(x_ax,
                         failure_rates[:, i],
                         label=labels[i],
                         c=colours[i],
                         yerr=failure_sems[:, i])
    
        plt.title('Failure rate vs. Acceptance rate')
        plt.xlabel('Acceptance rate')
        plt.ylabel('Failure rate')
        plt.legend()
        plt.grid()
        
        if save: 
            plt.savefig(save_name + '_all')
        
        plt.show()
    
        print("\nFailure rates:")
        print(np.array2string(failure_rates, formatter={'float_kind':lambda x: "%.5f" % x}))
        
        print("\nMean absolute errors:")
        for i in range(1, failure_rates.shape[1]):
            print(
                labels[i].ljust(len(max(labels, key=len))),
                np.round(
                    np.mean(np.abs(failure_rates[:, 0] - failure_rates[:, i])), 5))
    
        drawDiagnostics(title=title,
                        save_name=save_name,
                        save=save,
                        f_rates=[
                            f_rate_true, f_rate_label, f_rate_label_adj,
                            f_rate_human, f_rate_cont, f_rate_caus, f_rate_bayes
                        ],
                        titles=labels)
    
    
    sm = pystan.StanModel(file=stan_code_file_name)
    
    if which == 1:
        print("Without unobservables (Bernoulli + independent decisions)")
    
        dg = lambda: bernoulliDGWithoutUnobservables(N_total=N_sim)
    
        decider = lambda x: quantileDecider(
            x, featureX_col="X", featureZ_col=None, nJudges_M=M_sim, beta_X=1, beta_Z=1)
    
        perfComp(
            dg, lambda x: decider(x),
            "Fluctuation of failure rate estimates across iterations\n" +
            "Bernoulli + independent decisions, without unobservables",
            figure_path + "sl_bernoulli_independent_without_Z"
        )
    
    gc.collect()
    plt.close('all')
    
    print("With unobservables in the data")
    
    if which == 2:
        print("\nBernoulli + independent decisions")
    
        dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim)
    
        decider = lambda x: quantileDecider(
            x, featureX_col="X", featureZ_col="Z", nJudges_M=M_sim, beta_X=1, beta_Z=1, add_epsilon=True)
    
        perfComp(
            dg, lambda x: decider(x),
            "Fluctuation of failure rate estimates across iterations \n" +
            "Bernoulli + independent decisions, with unobservables",
            figure_path + "sl_bernoulli_independent_with_Z",
        )
    
    gc.collect()
    plt.close('all')
    
    if which == 3:
        print("\nThreshold rule + independent decisions")
    
        dg = lambda: thresholdDGWithUnobservables(N_total=N_sim)
    
        decider = lambda x: quantileDecider(
            x, featureX_col="X", featureZ_col="Z", nJudges_M=M_sim, beta_X=1, beta_Z=1, add_epsilon=True)
    
        perfComp(
            dg, lambda x: decider(x),
            "Fluctuation of failure rate estimates across iterations \n" +
            "Threshold rule + independent decisions, with unobservables",
            figure_path + "sl_threshold_independent_with_Z",
        )
    
    gc.collect()
    plt.close('all')
    
    if which == 4:
        print("\nBernoulli + non-independent (batch) decisions")
    
        dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim)
    
        decider = lambda x: humanDeciderLakkaraju(
            x, featureX_col="X", featureZ_col="Z", nJudges_M=M_sim, beta_X=1, beta_Z=1, add_epsilon=True)
    
        perfComp(
            dg, lambda x: decider(x),
            "Fluctuation of failure rate estimates across iterations \n" +
            "Bernoulli + non-independent decisions, with unobservables",
            figure_path + "sl_bernoulli_batch_with_Z",
        )
    
    gc.collect()
    plt.close('all')
    
    if which == 5:
        print("\nThreshold rule + non-independent (batch) decisions")
    
        dg = lambda: thresholdDGWithUnobservables(N_total=N_sim)
    
        decider = lambda x: humanDeciderLakkaraju(
            x, featureX_col="X", featureZ_col="Z", nJudges_M=M_sim, beta_X=1, beta_Z=1, add_epsilon=True)
    
        perfComp(
            dg, lambda x: decider(x),
            "Fluctuation of failure rate estimates across iterations \n" +
            "Threshold rule + non-independent decisions, with unobservables",
            figure_path + "sl_threshold_batch_with_Z",
        )
    
    gc.collect()
    plt.close('all')
    
    if which == 6:
        print("\nRandom decider")
    
        dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim)
    
        decider = lambda x: randomDecider(
            x, nJudges_M=M_sim, use_acceptance_rates=True)
    
        perfComp(
            dg, lambda x: decider(x),
            "Bernoulli + random decider with leniency and unobservables",
            figure_path + "sl_random_decider_with_Z",
        )
    
    gc.collect()
    plt.close('all')
    
    if which == 7:
        print("\nBiased decider")
    
        dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim)
    
        decider = lambda x: biasDecider(x, 'X', 'Z', add_epsilon=True)
    
        perfComp(
            dg, lambda x: decider(x),
            "Bernoulli + biased decider with leniency and unobservables",
            figure_path + "sl_biased_decider_with_Z",
        )
    
    
    if which == 8:
        print("\nBad judge")
    
        dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim)
    
        decider = lambda x: quantileDecider(x, 'X', 'Z', beta_X=0.2, add_epsilon=True, nJudges_M=M_sim)
    
        perfComp(
            dg, lambda x: decider(x),
            "Bernoulli + 'bad' decider with leniency and unobservables",
            figure_path + "sl_bad_decider_with_Z"
        )
    
    gc.collect()
    plt.close('all')
    
    if which == 9:
        print("\nBernoulli + Bernoulli")
    
        dg = lambda: bernoulliDGWithUnobservables(N_total=N_sim)
    
        decider = lambda x: bernoulliDecider(x, 'X', 'Z', nJudges_M=M_sim)
    
        perfComp(
            dg, lambda x: decider(x),
            "Bernoulli + Bernoulli",
            figure_path + "sl_bernoulli_bernoulli_with_Z",
        )
        
    if which == 10:
        print("\nBeta_Z = 3, Threshold + batch")
    
        dg = lambda: thresholdDGWithUnobservables(N_total=N_sim, beta_Z=3.0)
    
        decider = lambda x: humanDeciderLakkaraju(
            x, featureX_col="X", featureZ_col="Z", nJudges_M=M_sim, beta_X=1, beta_Z=3, add_epsilon=True)
    
        perfComp(
            dg, lambda x: decider(x),
            "Beta_Z = 3, threshold + batch",
            figure_path + "sl_threshold_batch_beta_Z_3_with_Z",
        )
        
    if which == 11:
        print("\nBeta_Z = 5, Threshold + batch")
    
        dg = lambda: thresholdDGWithUnobservables(N_total=N_sim, beta_Z=5.0)
    
        decider = lambda x: humanDeciderLakkaraju(
            x, featureX_col="X", featureZ_col="Z", nJudges_M=M_sim, beta_X=1, beta_Z=5, add_epsilon=True)
    
        perfComp(
            dg, lambda x: decider(x),
            "Beta_Z = 5, threshold + batch",
            figure_path + "sl_threshold_batch_beta_Z_5_with_Z",
        )