In this document we want to assess the hierarhcical model in which each study in a given population arises from a population mean, and the population means arise from a global mean:
data=readRDS("df_summary_forbayes.rds")# Convert factors to numeric for Standata$Population <-as.numeric(data$Population)data$study <-as.numeric(data$study)# Prepare data list for Standata_list <-list(N =nrow(data),J =length(unique(data$Population)),K =length(unique(data$study)),population = data$Population,study = data$study,log_or = data$log_or,se_log_or = data$se_log_or)
Code
# Compile the Stan modelstan_model <-stan_model("hierarchical_model_global.stan")# Fit the model to your datafit <-sampling(stan_model, data = data_list, iter =2000, chains =4)saveRDS(fit,"stanfit.rds")
Code
fit=readRDS("stanfit.rds")# Extract the posterior samplesposterior_samples <- rstan::extract(fit)# Extract mu_global and mu_i for plottingmu_global_samples <- posterior_samples$mu_globalmu_i_samples <- posterior_samples$mu_i# Convert to data frames for ggplotmu_global_df <-data.frame(mu_global = mu_global_samples)# Convert mu_i_samples to a data framemu_i_df <-as.data.frame(mu_i_samples)# Add population labels (assuming there are 5 populations)populations <-c("AFR", "AMR", "EAS", "EUR", "MID","SAS")colnames(mu_i_df) <- populations# Melt the data frame for population-specific meansmu_i_df_melt <-melt(mu_i_df, variable.name ="Population", value.name ="mu_i")# Plot the global mean distributionmu_g=ggplot(mu_global_df, aes(x = mu_global)) +geom_density(fill ="blue", alpha =0.5) +ggtitle("Posterior Distribution of Global Mean (mu_global)") +xlab("mu_global,logOR") +ylab("Density")ggplotly(mu_g+theme_classic())
plotting
Code
# Plot the population-specific mean distributionspopspec=ggplot(mu_i_df_melt, aes(x = mu_i, fill = Population)) +geom_density(alpha =0.5) +ggtitle("Posterior Distributions of Population-Specific Mean Log OR") +xlab("mu_i,logOR") +ylab("Density") +facet_wrap(~Population, scales ="free")ggplotly(popspec+theme_classic())
Reporting Quantiles
Then we can demonstrate the posterior probability at the 2.5%, 0.5% and 95
Code
# Function to extract desired percentilesextract_percentiles <-function(samples, percentiles =c(0.025, 0.5, 0.975)) {apply(samples, 2, quantile, probs = percentiles)}# Extracting percentiles for mu_imu_i_percentiles <-data.frame(extract_percentiles(mu_i_samples))names(mu_i_percentiles)=populationsmu_i_percentiles
AFR AMR EAS EUR MID SAS
2.5% 0.06622499 0.1263997 0.1928936 0.1991526 0.1770721 0.2396524
50% 0.31466583 0.3373048 0.3939561 0.3582289 0.3909759 0.4383958
97.5% 0.49614603 0.5608198 0.6371278 0.5371786 0.7031129 0.8436029
We can also calculate the probability that the posterior \(\mu_i\) (population specific log OR) is greater than 0, meaning that LDL-scaled PRS connotes increased risk.
Code
# Function to calculate probability that logOR > 0calc_prob_greater_than_zero <-function(samples) {apply(samples, 2, function(x) mean(x >0))}# Calculating probabilities for mu_iprob_greater_than_zero <-calc_prob_greater_than_zero(mu_i_samples)data.frame(Population=populations,prob_greater_than_zero)
Population prob_greater_than_zero
1 AFR 0.99075
2 AMR 0.99725
3 EAS 0.99850
4 EUR 0.99925
5 MID 0.99700
6 SAS 0.99975