Bayes HM

Author

Sarah Urbut

Published

June 14, 2024

Bayesian Posteriors

In this document we want to assess the hierarhcical model in which each study in a given population arises from a population mean, and the population means arise from a global mean:

[ \[\begin{align*} \beta_{ij} &\sim \mathcal{N}(\mu_i, \sigma_i) \\ \mu_i &\sim \mathcal{N}(\mu_{\text{global}}, \sigma_{\text{global}}) \end{align*}\] ]

Code

data=readRDS("df_summary_forbayes.rds")

# Convert factors to numeric for Stan
data$Population <- as.numeric(data$Population)
data$study <- as.numeric(data$study)

# Prepare data list for Stan
data_list <- list(
  N = nrow(data),
  J = length(unique(data$Population)),
  K = length(unique(data$study)),
  population = data$Population,
  study = data$study,
  log_or = data$log_or,
  se_log_or = data$se_log_or
)

Code

# Compile the Stan model
stan_model <- stan_model("hierarchical_model_global.stan")

# Fit the model to your data
fit <- sampling(stan_model, data = data_list, iter = 2000, chains = 4)
saveRDS(fit,"stanfit.rds")

Code

fit=readRDS("stanfit.rds")

# Extract the posterior samples
posterior_samples <- rstan::extract(fit)

# Extract mu_global and mu_i for plotting
mu_global_samples <- posterior_samples$mu_global
mu_i_samples <- posterior_samples$mu_i

# Convert to data frames for ggplot
mu_global_df <- data.frame(mu_global = mu_global_samples)

# Convert mu_i_samples to a data frame
mu_i_df <- as.data.frame(mu_i_samples)

# Add population labels (assuming there are 5 populations)
populations <- c("AFR", "AMR", "EAS", "EUR", "MID","SAS")
colnames(mu_i_df) <- populations

# Melt the data frame for population-specific means
mu_i_df_melt <- melt(mu_i_df, variable.name = "Population", value.name = "mu_i")

# Plot the global mean distribution
mu_g=ggplot(mu_global_df, aes(x = mu_global)) +
  geom_density(fill = "blue", alpha = 0.5) +
  ggtitle("Posterior Distribution of Global Mean (mu_global)") +
  xlab("mu_global,logOR") +
  ylab("Density")


ggplotly(mu_g+theme_classic())

plotting

Code

# Plot the population-specific mean distributions
popspec=ggplot(mu_i_df_melt, aes(x = mu_i, fill = Population)) +
  geom_density(alpha = 0.5) +
  ggtitle("Posterior Distributions of Population-Specific Mean Log OR") +
  xlab("mu_i,logOR") +
  ylab("Density") +
  facet_wrap(~Population, scales = "free")


ggplotly(popspec+theme_classic())

Reporting Quantiles

Then we can demonstrate the posterior probability at the 2.5%, 0.5% and 95

Code

# Function to extract desired percentiles
extract_percentiles <- function(samples, percentiles = c(0.025, 0.5, 0.975)) {
  apply(samples, 2, quantile, probs = percentiles)
}

# Extracting percentiles for mu_i
mu_i_percentiles <- data.frame(extract_percentiles(mu_i_samples))
names(mu_i_percentiles)=populations
mu_i_percentiles

             AFR       AMR       EAS       EUR       MID       SAS
2.5%  0.06622499 0.1263997 0.1928936 0.1991526 0.1770721 0.2396524
50%   0.31466583 0.3373048 0.3939561 0.3582289 0.3909759 0.4383958
97.5% 0.49614603 0.5608198 0.6371278 0.5371786 0.7031129 0.8436029

We can also calculate the probability that the posterior \(\mu_i\) (population specific log OR) is greater than 0, meaning that LDL-scaled PRS connotes increased risk.

Code

# Function to calculate probability that logOR > 0
calc_prob_greater_than_zero <- function(samples) {
  apply(samples, 2, function(x) mean(x > 0))
}

# Calculating probabilities for mu_i
prob_greater_than_zero <- calc_prob_greater_than_zero(mu_i_samples)
data.frame(Population=populations,prob_greater_than_zero)

  Population prob_greater_than_zero
1        AFR                0.99075
2        AMR                0.99725
3        EAS                0.99850
4        EUR                0.99925
5        MID                0.99700
6        SAS                0.99975