CAD Sex Specific PRS

Exploration of Genetics and PRS over time

Author
Affiliation

Sarah Urbut

MGH

Published

March 23, 2001

Combined PRS

This analysis is specifically to examine the effect of a joint PRS on CAD after transformation into a combined (joint) PRS. To summarize

  • We add the sex specific and GPS mult for all individuals and rescale to N(0,1)

  • We transform to a spline (with 2 basis functions based on data fit) and predict the probability of CAD

  • We then plot by PRS

To create the combined SCORE, we shall add the sex specific and GPS mult and rescale. Here we show the distributions:

Code
m=fread("~/Downloads/SD_CAD_prs_dataframe_with_10_SNPs_delta_diff.tsv")
m$cardioprs_female = scale(m$SUM_RESULT_krishna_female_updated_main_beta)
m$cardioprs2 = scale(m$SUM_RESULT_PRSmult)
m$sex=as.factor(ifelse(m$Sex==0,"Female","Male"))
## create a joint score for both PRS

m$jointPRS=scale(m$SUM_RESULT_cardiogram+m$SUM_RESULT_krishna_female_updated_main_beta)


ggplot(m,aes(x=jointPRS,y=sex,fill=sex))+ggridges::geom_density_ridges_gradient( )+ggridges::theme_ridges()+labs(x="Combined PRS",fill="Sex")
Picking joint bandwidth of 0.125

Now we will fit a model based on a transformed basis. Recall that a natural spline simply transforms the predictor into a new basis

Code
m=data.frame(m)
m$PRS_spline <- ns(m$jointPRS, knots = 2)
m$sex=as.factor(ifelse(m$Sex==1,"male","female"))

# Fit logistic regression model with PRS spline and sex interaction
model <- glm(hard ~ PRS_spline * sex+ age_dnaextraction , data = m, family = binomial())

# Summary of the fitted model, recall that each df will have it's own term
summary(model) 

Call:
glm(formula = hard ~ PRS_spline * sex + age_dnaextraction, family = binomial(), 
    data = m)

Coefficients:
                     Estimate Std. Error z value Pr(>|z|)    
(Intercept)         -6.224801   0.261165 -23.835  < 2e-16 ***
PRS_spline1          1.300945   0.399717   3.255 0.001135 ** 
PRS_spline2          1.343029   0.517215   2.597 0.009414 ** 
sexmale             -0.076005   0.322925  -0.235 0.813926    
age_dnaextraction    0.053715   0.001451  37.028  < 2e-16 ***
PRS_spline1:sexmale  1.747275   0.525078   3.328 0.000876 ***
PRS_spline2:sexmale  0.559952   0.668539   0.838 0.402269    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 20814  on 37640  degrees of freedom
Residual deviance: 18591  on 37634  degrees of freedom
AIC: 18605

Number of Fisher Scoring iterations: 6

Let’s take a look at spline basis. Essentially each patient will now have two coefficients:

Code
# Create the natural spline basis with a specified number of knots, e.g., 4
spline_basis <- ns(m$jointPRS, knots = 2)

# Convert the spline basis to a data frame for plotting
spline_df <- as.data.frame(spline_basis)
spline_df$jointPRS <- m$jointPRS  # add the original predictor variable for plotting

# Melt the data frame for ggplot2
spline_long <- reshape2::melt(spline_df, id.vars = 'jointPRS')

# Plot the spline basis
ggplot(spline_long, aes(x = jointPRS, y = value, group = variable, color = variable)) +
  geom_line(size = 1) +
  labs(title = "Natural Spline Basis", x = "jointPRS", y = "Basis functions") +
  theme_minimal()
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Code
# Range of PRS values for prediction
prs_range <- seq(min(m$jointPRS), max(m$jointPRS), length.out = 100)

# Predictions for males
newdata_male <- data.frame(PRS = prs_range, 
                              age_dnaextraction = mean(m$age_dnaextraction), 
                              sex = "male")

## here split spline for  males and remap
newdata_male$PRS_spline <- ns(newdata_male$PRS, knots = 2)

newdata_female <- data.frame(PRS = prs_range, 
                              age_dnaextraction  = mean(m$age_dnaextraction), 
                              sex = "female")
newdata_female$PRS_spline <- ns(newdata_female$PRS, knots = 2)
###


pred_male <- predict(model, newdata_male, type = "response", se.fit = TRUE)
pred_male_ci <- pred_male$fit + cbind(-2*pred_male$se.fit, 2*pred_male$se.fit)

# Predictions with standard errors for females
pred_female <- predict(model, newdata_female, type = "response", se.fit = TRUE)
pred_female_ci <- pred_female$fit + cbind(-2*pred_female$se.fit, 2*pred_female$se.fit)

# Creating a data frame for ggplot
pred_data <- data.frame(PRS = prs_range,
                        PredictedMale = pred_male$fit,
                        LowerMale = pred_male_ci[,1],
                        UpperMale = pred_male_ci[,2],
                        PredictedFemale = pred_female$fit,
                        LowerFemale = pred_female_ci[,1],
                        UpperFemale = pred_female_ci[,2])

# Plot using ggplot2
ggplot(pred_data, aes(x = PRS)) +
  geom_ribbon(aes(ymin = LowerMale, ymax = UpperMale), fill = "blue", alpha = 0.2) +
  geom_ribbon(aes(ymin = LowerFemale, ymax = UpperFemale), fill = "red", alpha = 0.2) +
  geom_line(aes(y = PredictedMale, colour = "Male"), size = 1) +
  geom_line(aes(y = PredictedFemale, colour = "Female"), size = 1) +
  labs(x = "Combined PRS", y = "Fitted Probability of CAD") +
  scale_color_manual(name = "Sex", values = c("Male" = "blue", "Female" = "red")) +
  theme_minimal() +
  guides(colour = guide_legend(override.aes = list(size=2)))

Code
pred_data1=pred_data

GPS mult alone

Here we do the same analysis for GPS alone

Code
m$cardioprs2=scale(m$cardioprs2)
m$PRS_spline <- ns(m$cardioprs2, knots = 2)
m$sex=as.factor(ifelse(m$Sex==1,"male","female"))

# Fit logistic regression model with PRS spline and sex interaction
model <- glm(hard ~ PRS_spline * sex+ age_dnaextraction , data = m, family = binomial())


# Range of PRS values for prediction
prs_range <- seq(min(m$cardioprs2), max(m$cardioprs2), length.out = 100)

# Predictions for males
newdata_male <- data.frame(PRS = prs_range, 
                              age_dnaextraction = mean(m$age_dnaextraction), 
                              sex = "male")

## here split spline for  males and remap
newdata_male$PRS_spline <- ns(newdata_male$PRS, knots = 2)

newdata_female <- data.frame(PRS = prs_range, 
                              age_dnaextraction  = mean(m$age_dnaextraction), 
                              sex = "female")
newdata_female$PRS_spline <- ns(newdata_female$PRS, knots = 2)
###


pred_male <- predict(model, newdata_male, type = "response", se.fit = TRUE)
pred_male_ci <- pred_male$fit + cbind(-2*pred_male$se.fit, 2*pred_male$se.fit)

# Predictions with standard errors for females
pred_female <- predict(model, newdata_female, type = "response", se.fit = TRUE)
pred_female_ci <- pred_female$fit + cbind(-2*pred_female$se.fit, 2*pred_female$se.fit)

# Creating a data frame for ggplot
pred_data <- data.frame(PRS = prs_range,
                        PredictedMale = pred_male$fit,
                        LowerMale = pred_male_ci[,1],
                        UpperMale = pred_male_ci[,2],
                        PredictedFemale = pred_female$fit,
                        LowerFemale = pred_female_ci[,1],
                        UpperFemale = pred_female_ci[,2])

# Plot using ggplot2
ggplot(pred_data, aes(x = PRS)) +
  geom_ribbon(aes(ymin = LowerMale, ymax = UpperMale), fill = "blue", alpha = 0.2) +
  geom_ribbon(aes(ymin = LowerFemale, ymax = UpperFemale), fill = "red", alpha = 0.2) +
  geom_line(aes(y = PredictedMale, colour = "Male"), size = 1) +
  geom_line(aes(y = PredictedFemale, colour = "Female"), size = 1) +
  labs(x = "GPS Mult (not combined)", y = "Fitted Probability of CAD") +
  scale_color_manual(name = "Sex", values = c("Male" = "blue", "Female" = "red")) +
  theme_minimal() +
  guides(colour = guide_legend(override.aes = list(size=2)))

Now let’s put everything together:

Code
combined_df=pred_data1[,c("PRS","PredictedFemale","LowerFemale","UpperFemale")]
names(combined_df)=c("PRS","Combined_score_female","Lower_CSFemale","Upper_CSFemale")


plot_data <- rbind(
  data.frame(PRS = pred_data$PRS, Prediction = pred_data$PredictedMale, 
             Group = 'GPSMult Male', Lower = pred_data$LowerMale, Upper = pred_data$UpperMale),
  data.frame(PRS = pred_data$PRS, Prediction = pred_data$PredictedFemale, 
             Group = 'GPSMult Female', Lower = pred_data$LowerFemale, Upper = pred_data$UpperFemale),
  data.frame(PRS = combined_df$PRS, Prediction = combined_df$Combined_score_female, 
             Group = 'Combined Score Female', Lower = combined_df$Lower_CSFemale, Upper = combined_df$Upper_CSFemale)
)





 ggplot(plot_data, aes(x = PRS, y = Prediction, color = Group)) +
    geom_line() +
     geom_ribbon(aes(ymin = Lower, ymax = Upper, fill = Group), alpha = 0.2) +
     labs(x = "PRS", y = "Prediction", color = "Group", fill = "Group") +
     theme_minimal()

Incremental plot

let’s try and do this incremental plot:

Initially I wanted to have Female GPS + sex specific (10 or GW) but then decided this was hard to show the CI of the differences. So I like this…

Code
library(ggplot2)

# Create a data frame for the values
data <- data.frame(
  Group = c("Female GPS", "Female GPS + 10 loci", "Female GPS + Genome-wide", "Female & Male"),
  Effect = c(1.35, 1.39, 1.41, 1.45),
  ymin = c(1.27, 1.29, 1.36, 1.39),
  ymax = c(1.45, 1.49, 1.46, 1.51)
)

# Order factor levels to ensure the plot order
data$Group <- factor(data$Group, levels = c("Female GPS", "Female GPS + 10 loci", "Female GPS + Genome-wide", "Female & Male"))

# Plot
ggplot(data, aes(x = Group, y = Effect, fill = Group)) +
  geom_bar(stat = "identity", position = position_dodge(), width = 0.7) +
  geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.25, position = position_dodge(0.7)) +
  scale_fill_brewer(palette = "Pastel1") +
  labs(x = "Group", y = "Effect Size (Odds Ratio)") +
  theme_minimal() +
  theme(legend.position = "none")  # Hide the legend

Code
# Print the plot
ggsave("ordered_comparison_plot.png", width = 10, height = 8, dpi = 300)


# Print the plot
ggsave("comparison_plot.png", width = 10, height = 8, dpi = 300)