This analysis is specifically to examine the effect of a joint PRS on CAD after transformation into a combined (joint) PRS. To summarize
We add the sex specific and GPS mult for all individuals and rescale to N(0,1)
We transform to a spline (with 2 basis functions based on data fit) and predict the probability of CAD
We then plot by PRS
To create the combined SCORE, we shall add the sex specific and GPS mult and rescale. Here we show the distributions:
Code
m=fread("~/Downloads/SD_CAD_prs_dataframe_with_10_SNPs_delta_diff.tsv")m$cardioprs_female =scale(m$SUM_RESULT_krishna_female_updated_main_beta)m$cardioprs2 =scale(m$SUM_RESULT_PRSmult)m$sex=as.factor(ifelse(m$Sex==0,"Female","Male"))## create a joint score for both PRSm$jointPRS=scale(m$SUM_RESULT_cardiogram+m$SUM_RESULT_krishna_female_updated_main_beta)ggplot(m,aes(x=jointPRS,y=sex,fill=sex))+ggridges::geom_density_ridges_gradient( )+ggridges::theme_ridges()+labs(x="Combined PRS",fill="Sex")
Picking joint bandwidth of 0.125
Now we will fit a model based on a transformed basis. Recall that a natural spline simply transforms the predictor into a new basis
Code
m=data.frame(m)m$PRS_spline <-ns(m$jointPRS, knots =2)m$sex=as.factor(ifelse(m$Sex==1,"male","female"))# Fit logistic regression model with PRS spline and sex interactionmodel <-glm(hard ~ PRS_spline * sex+ age_dnaextraction , data = m, family =binomial())# Summary of the fitted model, recall that each df will have it's own termsummary(model)
Call:
glm(formula = hard ~ PRS_spline * sex + age_dnaextraction, family = binomial(),
data = m)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -6.224801 0.261165 -23.835 < 2e-16 ***
PRS_spline1 1.300945 0.399717 3.255 0.001135 **
PRS_spline2 1.343029 0.517215 2.597 0.009414 **
sexmale -0.076005 0.322925 -0.235 0.813926
age_dnaextraction 0.053715 0.001451 37.028 < 2e-16 ***
PRS_spline1:sexmale 1.747275 0.525078 3.328 0.000876 ***
PRS_spline2:sexmale 0.559952 0.668539 0.838 0.402269
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 20814 on 37640 degrees of freedom
Residual deviance: 18591 on 37634 degrees of freedom
AIC: 18605
Number of Fisher Scoring iterations: 6
Let’s take a look at spline basis. Essentially each patient will now have two coefficients:
Code
# Create the natural spline basis with a specified number of knots, e.g., 4spline_basis <-ns(m$jointPRS, knots =2)# Convert the spline basis to a data frame for plottingspline_df <-as.data.frame(spline_basis)spline_df$jointPRS <- m$jointPRS # add the original predictor variable for plotting# Melt the data frame for ggplot2spline_long <- reshape2::melt(spline_df, id.vars ='jointPRS')# Plot the spline basisggplot(spline_long, aes(x = jointPRS, y = value, group = variable, color = variable)) +geom_line(size =1) +labs(title ="Natural Spline Basis", x ="jointPRS", y ="Basis functions") +theme_minimal()
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Code
# Range of PRS values for predictionprs_range <-seq(min(m$jointPRS), max(m$jointPRS), length.out =100)# Predictions for malesnewdata_male <-data.frame(PRS = prs_range, age_dnaextraction =mean(m$age_dnaextraction), sex ="male")## here split spline for males and remapnewdata_male$PRS_spline <-ns(newdata_male$PRS, knots =2)newdata_female <-data.frame(PRS = prs_range, age_dnaextraction =mean(m$age_dnaextraction), sex ="female")newdata_female$PRS_spline <-ns(newdata_female$PRS, knots =2)###pred_male <-predict(model, newdata_male, type ="response", se.fit =TRUE)pred_male_ci <- pred_male$fit +cbind(-2*pred_male$se.fit, 2*pred_male$se.fit)# Predictions with standard errors for femalespred_female <-predict(model, newdata_female, type ="response", se.fit =TRUE)pred_female_ci <- pred_female$fit +cbind(-2*pred_female$se.fit, 2*pred_female$se.fit)# Creating a data frame for ggplotpred_data <-data.frame(PRS = prs_range,PredictedMale = pred_male$fit,LowerMale = pred_male_ci[,1],UpperMale = pred_male_ci[,2],PredictedFemale = pred_female$fit,LowerFemale = pred_female_ci[,1],UpperFemale = pred_female_ci[,2])# Plot using ggplot2ggplot(pred_data, aes(x = PRS)) +geom_ribbon(aes(ymin = LowerMale, ymax = UpperMale), fill ="blue", alpha =0.2) +geom_ribbon(aes(ymin = LowerFemale, ymax = UpperFemale), fill ="red", alpha =0.2) +geom_line(aes(y = PredictedMale, colour ="Male"), size =1) +geom_line(aes(y = PredictedFemale, colour ="Female"), size =1) +labs(x ="Combined PRS", y ="Fitted Probability of CAD") +scale_color_manual(name ="Sex", values =c("Male"="blue", "Female"="red")) +theme_minimal() +guides(colour =guide_legend(override.aes =list(size=2)))
Code
pred_data1=pred_data
GPS mult alone
Here we do the same analysis for GPS alone
Code
m$cardioprs2=scale(m$cardioprs2)m$PRS_spline <-ns(m$cardioprs2, knots =2)m$sex=as.factor(ifelse(m$Sex==1,"male","female"))# Fit logistic regression model with PRS spline and sex interactionmodel <-glm(hard ~ PRS_spline * sex+ age_dnaextraction , data = m, family =binomial())# Range of PRS values for predictionprs_range <-seq(min(m$cardioprs2), max(m$cardioprs2), length.out =100)# Predictions for malesnewdata_male <-data.frame(PRS = prs_range, age_dnaextraction =mean(m$age_dnaextraction), sex ="male")## here split spline for males and remapnewdata_male$PRS_spline <-ns(newdata_male$PRS, knots =2)newdata_female <-data.frame(PRS = prs_range, age_dnaextraction =mean(m$age_dnaextraction), sex ="female")newdata_female$PRS_spline <-ns(newdata_female$PRS, knots =2)###pred_male <-predict(model, newdata_male, type ="response", se.fit =TRUE)pred_male_ci <- pred_male$fit +cbind(-2*pred_male$se.fit, 2*pred_male$se.fit)# Predictions with standard errors for femalespred_female <-predict(model, newdata_female, type ="response", se.fit =TRUE)pred_female_ci <- pred_female$fit +cbind(-2*pred_female$se.fit, 2*pred_female$se.fit)# Creating a data frame for ggplotpred_data <-data.frame(PRS = prs_range,PredictedMale = pred_male$fit,LowerMale = pred_male_ci[,1],UpperMale = pred_male_ci[,2],PredictedFemale = pred_female$fit,LowerFemale = pred_female_ci[,1],UpperFemale = pred_female_ci[,2])# Plot using ggplot2ggplot(pred_data, aes(x = PRS)) +geom_ribbon(aes(ymin = LowerMale, ymax = UpperMale), fill ="blue", alpha =0.2) +geom_ribbon(aes(ymin = LowerFemale, ymax = UpperFemale), fill ="red", alpha =0.2) +geom_line(aes(y = PredictedMale, colour ="Male"), size =1) +geom_line(aes(y = PredictedFemale, colour ="Female"), size =1) +labs(x ="GPS Mult (not combined)", y ="Fitted Probability of CAD") +scale_color_manual(name ="Sex", values =c("Male"="blue", "Female"="red")) +theme_minimal() +guides(colour =guide_legend(override.aes =list(size=2)))
Now let’s put everything together:
Code
combined_df=pred_data1[,c("PRS","PredictedFemale","LowerFemale","UpperFemale")]names(combined_df)=c("PRS","Combined_score_female","Lower_CSFemale","Upper_CSFemale")plot_data <-rbind(data.frame(PRS = pred_data$PRS, Prediction = pred_data$PredictedMale, Group ='GPSMult Male', Lower = pred_data$LowerMale, Upper = pred_data$UpperMale),data.frame(PRS = pred_data$PRS, Prediction = pred_data$PredictedFemale, Group ='GPSMult Female', Lower = pred_data$LowerFemale, Upper = pred_data$UpperFemale),data.frame(PRS = combined_df$PRS, Prediction = combined_df$Combined_score_female, Group ='Combined Score Female', Lower = combined_df$Lower_CSFemale, Upper = combined_df$Upper_CSFemale))ggplot(plot_data, aes(x = PRS, y = Prediction, color = Group)) +geom_line() +geom_ribbon(aes(ymin = Lower, ymax = Upper, fill = Group), alpha =0.2) +labs(x ="PRS", y ="Prediction", color ="Group", fill ="Group") +theme_minimal()
Incremental plot
let’s try and do this incremental plot:
Initially I wanted to have Female GPS + sex specific (10 or GW) but then decided this was hard to show the CI of the differences. So I like this…
Code
library(ggplot2)# Create a data frame for the valuesdata <-data.frame(Group =c("Female GPS", "Female GPS + 10 loci", "Female GPS + Genome-wide", "Female & Male"),Effect =c(1.35, 1.39, 1.41, 1.45),ymin =c(1.27, 1.29, 1.36, 1.39),ymax =c(1.45, 1.49, 1.46, 1.51))# Order factor levels to ensure the plot orderdata$Group <-factor(data$Group, levels =c("Female GPS", "Female GPS + 10 loci", "Female GPS + Genome-wide", "Female & Male"))# Plotggplot(data, aes(x = Group, y = Effect, fill = Group)) +geom_bar(stat ="identity", position =position_dodge(), width =0.7) +geom_errorbar(aes(ymin = ymin, ymax = ymax), width =0.25, position =position_dodge(0.7)) +scale_fill_brewer(palette ="Pastel1") +labs(x ="Group", y ="Effect Size (Odds Ratio)") +theme_minimal() +theme(legend.position ="none") # Hide the legend
Code
# Print the plotggsave("ordered_comparison_plot.png", width =10, height =8, dpi =300)# Print the plotggsave("comparison_plot.png", width =10, height =8, dpi =300)