code_linear_dependency.stan

data {
  int<lower=1> D; // Dimensions of the features an coefficient vectors
  int<lower=0> N_obs;  // Number of "observed observations" (with T = 1)
  int<lower=0> N_cens; // Number of "censored observations" (with T = 0)
  int<lower=0> M; // Number of judges
  int<lower=1, upper=M> jj_obs[N_obs];   // judge_ID array
  int<lower=1, upper=M> jj_cens[N_cens]; // judge_ID array
  int<lower=0, upper=1> dec_obs[N_obs];   // Decisions for the observed observations
  int<lower=0, upper=1> dec_cens[N_cens]; // Decisions for the censored observations
  row_vector[D] X_obs[N_obs];   // Features of the observed observations 
  row_vector[D] X_cens[N_cens]; // Features of the censored observations 
  int<lower=0, upper=1> y_obs[N_obs]; // Outcomes of the observed observations
}

parameters {
  // Latent variable
  vector[N_obs] Z_obs;
  vector[N_cens] Z_cens;
  
  // Intercepts
  real alpha_T[M];
  real alpha_Y;

  // Temporary variables to compute the coefficients
  vector[D] a_XT;
  vector[D] a_XY;
  real<lower=0> a_ZT; // Presume latent variable has a positive coefficient.
  real<lower=0> a_ZY;

  real<lower=0> tau_XT;
  real<lower=0> tau_XY;
  real<lower=0> tau_ZT;
  real<lower=0> tau_ZY;

}

transformed parameters {
  
  // Coefficients
  vector[D] beta_XT;
  vector[D] beta_XY;
  real<lower=0> beta_ZT; // Presume latent variable has a positive coefficient.
  real<lower=0> beta_ZY;

  beta_XT = a_XT / sqrt(tau_XT);
  beta_XY = a_XY / sqrt(tau_XY);
  beta_ZT = a_ZT / sqrt(tau_ZT);
  beta_ZY = a_ZY / sqrt(tau_ZY);
}

model {
  // Latent variable
  Z_obs ~ normal(0, 1);
  Z_cens ~ normal(0, 1);
  
  // Intercepts
  alpha_T ~ normal(0, 5);
  alpha_Y ~ normal(0, 5);
  
  // According to 
  // https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations
  // section "Prior for the regression coefficients in logistic regression
  // (non-sparse case)" a good prior is Student's with parameters 3>nu>7, 
  // 0 and 1.
  
  // Also, according to
  // https://mc-stan.org/docs/2_19/stan-users-guide/reparameterization-section.html
  // reparametrizing beta ~ student_t(nu, 0, 1) can be done by sampling
  // a ~ N(0, 1) and tau ~ gamma(nu/2, nu/2) so then beta = a * tau^(-1/2).
  // Reparametrizing is done to achieve better mixing.

  a_XT ~ normal(0, 1);
  a_XY ~ normal(0, 1);
  a_ZT ~ normal(0, 1);
  a_ZY ~ normal(0, 1);

  // nu = 5 -> nu/2 = 2.5
  tau_XT ~ gamma(2.5, 2.5);
  tau_XY ~ gamma(2.5, 2.5);
  tau_ZT ~ gamma(2.5, 2.5);
  tau_ZY ~ gamma(2.5, 2.5);
  
  // Compute the regressions for the observed observations
  for(i in 1:N_obs){
    dec_obs[i] ~ bernoulli_logit(alpha_T[jj_obs[i]] + X_obs[i] * beta_XT + beta_ZT * Z_obs[i]);
    y_obs[i] ~ bernoulli_logit(alpha_Y + X_obs[i] * beta_XY + beta_ZY * Z_obs[i]);
  }

  // Compute the regression for the censored observations
  for(i in 1:N_cens)
    dec_cens[i] ~ bernoulli_logit(alpha_T[jj_cens[i]] + X_cens[i] * beta_XT + beta_ZT * Z_cens[i]);
}

generated quantities {
  int<lower=0, upper=1> y_est[N_cens];
  
  // Generate a draw from the posterior predictive.
  for(i in 1:N_cens){
    y_est[i] = bernoulli_logit_rng(alpha_Y + X_cens[i] * beta_XY + beta_ZY * Z_cens[i]);
  }
}