CoronaryProject

Author

Sarah Urbut

Published

November 9, 2024

Anomaly Workbook

As a first analysis, we aim to compare the survival of patients with congenital coronary anomalies uisng age as a time scale, with survival as the outcome and time from birth as the time scale so as to avoid lead time bias. We will compare the survival of patients with and without intervention, also matching individuals who received intervention by age and sex for sensitivity.

Code

library('data.table')
library(lubridate)
library(MatchIt)
library(survival)
library(survminer)

df=fread("~/Dropbox (Personal)/jack_data.csv")
df$DOB=as.Date(df$DOB,format="%Y-%m-%d")
df$today=as.Date(df$`Todays Date`,format = "%m/%d/%y")
df$Survival=as.numeric(difftime(df$today,df$`Date of diagnosis`,units = "days")/365.25)
df$contact_time=as.numeric(difftime(df$`Date of last contact`,df$`Date of diagnosis`,units = "days")/365.25)
# summary(df$contact_time[df$`Alive?`=="Yes"])
# # 26 folks who we don't have contact time with 
# plot(df$Survival[df$`Alive?`=="Yes"],df$contact_time[df$`Alive?`=="Yes"],xlab="Survival",ylab="Last Known Contact Time")


df$Int=as.factor(ifelse(df$Intervention=="Yes",1,0))
df$dm=ifelse(df$`Comorbidities (choice=DM)`=="Checked",1,0)
df$hypertension=ifelse(df$`Comorbidities (choice=Hypertension)`=="Checked",1,0)
df$smoke=ifelse(df$`Comorbidities (choice=Smoker (ever))`=="Checked",1,0)
df$stroke=ifelse(df$`Comorbidities (choice=Stroke)`=="Checked",1,0)
df$hyperlip=ifelse(df$`Comorbidities (choice=Hyperlipidemia)`=="Checked",1,0)
df$atrial.arrhythmias=ifelse(df$`Comorbidities (choice=Atrial arrhythmias (ever))`=="Checked",1,0)
df$vent.arrhythmias=ifelse(df$`Comorbidities (choice=Ventricular arrhythmias (ever))`=="Checked",1,0)
df$sexnum=df$Sex=ifelse(df$Sex=="Male",1,0)
df$alive=ifelse(!is.na(df$`Date of death`),0,1)
df$Date.of.death=df$`Date of death`
df$Alive.as.of=df$`Alive as of ---`
df$Age.at.diagnosis=as.numeric(difftime(df$`Date of diagnosis`,df$DOB,units = "days")/365.25)


# Define end of follow-up date
df[, end_followup := fifelse(!is.na(Date.of.death), Date.of.death, pmin(Alive.as.of, today, na.rm = TRUE))]

df$case_control=rep(1,nrow(df))
# Calculate survival time from age of birth to end of follow-up
df[, survival_time := as.numeric(difftime(end_followup, DOB, units = "days")) / 365.25]

# Define the event indicator (1 if dead, 0 otherwise)
df[, event := ifelse(!is.na(Date.of.death), 1, 0)]

# Create the survival object
surv_obj <- Surv(time = df$survival_time, event = df$event)

# Fit Kaplan-Meier survival curves by intervention status
km_fit <- survfit(surv_obj ~ Intervention, data = df)

# Plot the survival curves
ggsurvplot(km_fit, data = df, pval = TRUE, conf.int = TRUE,
           ggtheme = theme_minimal(),
           title = "Survival by Intervention Status")

Propensity Matching among patients

can’t be done without Comorbid data

Code

covariates <- c("Age.at.diagnosis", "Sex", "dm", "smoke", "hypertension", "stroke", "hyperlip",
"atrial.arrhythmias", "vent.arrhythmias", "survival_time", "event", "Int")

# Logistic regression for propensity scores
ps_model <- glm(Int ~ Sex+ dm+smoke+hypertension+stroke+hyperlip,
, data = df, family = binomial)

# Calculate propensity scores
df[, propensity_score := predict(ps_model, type = "response")]

# Perform matching
match_out <- matchit(Int~ propensity_score, data = df, method = "nearest")

# Create matched dataset
matched_data <- match.data(match_out)

# Fit Kaplan-Meier survival curves for matched data
matched_surv_obj <- Surv(time = matched_data$survival_time, event = matched_data$event)
matched_km_fit <- survfit(matched_surv_obj ~ Intervention, data = matched_data)

# Plot the survival curves for matched data
ggsurvplot(matched_km_fit, data = matched_data, pval = TRUE, conf.int = TRUE,
           ggtheme = theme_minimal(),
           title = "Survival by Intervention Status (Matched Data)")

Normal controls

We’d now like to compare this with the larger population of MGB subjects. We will again perform this analysis in two ways;

Match patients against MGB controls for age and sex, and compare survival among matched controls and patients with and wihtout intervention.
Match patients to intervention, thus controlling for higher/lower risk intervnetion status, then merge with MGB controls, again matching for age and sex, and compare surviva.

Propensity Score Matching

Match cases/controls (independent of intervention)

First, let’s look at differential survival, after matching by age with the population. Here we will not match for the intervention

Code

f=fread("~/Downloads//MGBB_Phenos_2CODE 2022-06-21.csv")

f$hyperlip=ifelse(f$Has_hyperlipidemia==1,1,0)
f$stroke=ifelse(f$Has_all_stroke_noTIA==1,1,0)
f$hypertension=ifelse(f$Has_htn==1,1,0)

cov=fread("~/Dropbox (Personal)/PRS_ethnicity/mgbb_data/mgbb_pce_collection_date.csv")

f=merge(f,cov,by="EMPI")
f$smoke=ifelse(f$current_smoking_imputed=="yes",1,0)
f$dm=ifelse(f$dm=="yes",1,0)
f$race=ifelse(f$race_imputed=="white",1,0)

## remove those without a date of death
f2=f[!which(f$Vital_Status=="Deceased"&is.na(f$Date_of_Death)),]

f2$censor.age=as.numeric(difftime(f2$Date_of_Death,f2$Date_of_Birth,units = "days")/365.25)

# w=which(f2$Vital_Status=="Living")
# n=which(is.na(f2$censor.age))
# all.equal(w,n)
alive=which(f2$Vital_Status=="Living")
f2$censor.age[alive]=difftime(f2$Censor_Date[alive],f2$Date_of_Birth[alive],units = "days")/365.25

#########

f2$case_control=rep(0,nrow(f2))
f2$Int=rep(0,nrow(f2))
f2$Sex=ifelse(f2$sex_imputed=="male",1,0)
f2$Vital_Status=ifelse(f2$Vital_Status=="Living",1,0)

f2_subset=f2[,c("Biobank_Subject_ID","Sex","Date_of_Birth","Vital_Status","censor.age","hyperlip","hypertension","stroke","dm","smoke","Int","case_control")]
cor_subset=df[,c("MRN","sexnum","DOB","alive","survival_time","hyperlip","hypertension","stroke","dm","smoke","Int","case_control")]

cor_subset$DOB=as.Date(cor_subset$DOB)

f2_subset$Date_of_Birth=as.Date(f2_subset$Date_of_Birth)
names(f2_subset)=names(cor_subset)


combined_df=rbind(cor_subset,f2_subset)
combined_df$age.today=as.numeric(difftime(as.Date("06/30/2024",format = "%m/%d/%y"),as.Date(combined_df$DOB),units = "days")/365.25)
combined_df$death=ifelse(combined_df$alive==0,1,0)
f2_subset=combined_df[combined_df$case_control==0,]



m.out0 <- matchit(case_control ~ age.today + sexnum, data = combined_df,distance = "glm",replace = TRUE)

m=match.data(m.out0)
m$status=as.factor(ifelse(m$case_control==0,"Control",ifelse(m$Int==0,"Untreated","Treated")))
fit=survfit(Surv(time = survival_time,event = death)~status,data=m)


ggsurvplot(
  fit,
  data = m,
  size = 1,                 # change line size
# custom color palettes
  conf.int = TRUE,          # Add confidence interval
  pval = TRUE,              # Add p-value
  risk.table = TRUE,        # Add risk table
  risk.table.col = "strata",# Risk table color by groups
 
  risk.table.height = 0.25, # Useful to change when you have multiple groups
  ggtheme = theme_bw()      # Change ggplot2 theme
)

Code

f=fread("~/Downloads//MGBB_Phenos_2CODE 2022-06-21.csv")

f$hyperlip=ifelse(f$Has_hyperlipidemia==1,1,0)
f$stroke=ifelse(f$Has_all_stroke_noTIA==1,1,0)
f$hypertension=ifelse(f$Has_htn==1,1,0)

cov=fread("~/Dropbox (Personal)/PRS_ethnicity/mgbb_data/mgbb_pce_collection_date.csv")

f=merge(f,cov,by="EMPI")
f$smoke=ifelse(f$current_smoking_imputed=="yes",1,0)
f$dm=ifelse(f$dm=="yes",1,0)
f$race=ifelse(f$race_imputed=="white",1,0)

## remove those without a date of death
f2=f[!which(f$Vital_Status=="Deceased"&is.na(f$Date_of_Death)),]

f2$censor.age=as.numeric(difftime(f2$Date_of_Death,f2$Date_of_Birth,units = "days")/365.25)

# w=which(f2$Vital_Status=="Living")
# n=which(is.na(f2$censor.age))
# all.equal(w,n)
alive=which(f2$Vital_Status=="Living")
f2$censor.age[alive]=difftime(f2$Censor_Date[alive],f2$Date_of_Birth[alive],units = "days")/365.25

#########

f2$case_control=rep(0,nrow(f2))
f2$Int=rep(0,nrow(f2))
f2$Sex=ifelse(f2$sex_imputed=="male",1,0)
f2$Vital_Status=ifelse(f2$Vital_Status=="Living",1,0)

f2_subset=f2[,c("Biobank_Subject_ID","Sex","Date_of_Birth","Vital_Status","censor.age","hyperlip","hypertension","stroke","dm","smoke","Int","case_control")]
cor_subset=df[,c("MRN","sexnum","DOB","alive","survival_time","hyperlip","hypertension","stroke","dm","smoke","Int","case_control")]

cor_subset$DOB=as.Date(cor_subset$DOB)

f2_subset$Date_of_Birth=as.Date(f2_subset$Date_of_Birth)
names(f2_subset)=names(cor_subset)


combined_df=rbind(cor_subset,f2_subset)
combined_df$age.today=as.numeric(difftime(as.Date("06/30/2024",format = "%m/%d/%y"),as.Date(combined_df$DOB),units = "days")/365.25)
combined_df$death=ifelse(combined_df$alive==0,1,0)
f2_subset=combined_df[combined_df$case_control==0,]



m.out0 <- matchit(case_control ~ age.today + sexnum, data = combined_df,distance = "glm",replace = TRUE)

m=match.data(m.out0)
m$status=as.factor(ifelse(m$case_control==0,"Control",ifelse(m$Int==0,"Untreated","Treated")))
fit=survfit(Surv(time = survival_time,event = death)~status,data=m)


ggsurvplot(
  fit,
  data = m,
  size = 1,                 # change line size
# custom color palettes
  conf.int = TRUE,          # Add confidence interval
  pval = TRUE,              # Add p-value
  risk.table = TRUE,        # Add risk table
  risk.table.col = "strata",# Risk table color by groups
 
  risk.table.height = 0.25, # Useful to change when you have multiple groups
  ggtheme = theme_bw()      # Change ggplot2 theme
)

Match patients, combine with controls

Now match patients for intervention status, and merge with controls .

Code

m.out0 <- matchit(Int ~ age.today + sexnum, data = combined_df[combined_df$case_control==1,],distance = "glm",replace = TRUE)

md=match.data(m.out0)
# 
combined_df2=rbind(data.frame(md[,c("MRN","sexnum","smoke","alive","age.today","dm","Int","case_control","survival_time","death")]),f2_subset[,c("MRN","sexnum","smoke","alive","age.today","dm","Int","case_control","survival_time","death")])
m.out1 <- matchit(case_control ~ age.today + sexnum, data = combined_df2,distance = "glm",replace = TRUE)

m=match.data(m.out1)
m$status=as.factor(ifelse(m$case_control==0,"Control",ifelse(m$Int==0,"Untreated","Treated")))
fit=survfit(Surv(time = survival_time,event = death)~status,data=m)


ggsurvplot(
  fit,
  data = m,
  size = 1,                 # change line size
# custom color palettes
  conf.int = TRUE,          # Add confidence interval
  pval = TRUE,              # Add p-value
  risk.table = TRUE,        # Add risk table
  risk.table.col = "strata",# Risk table color by groups
 
  risk.table.height = 0.25, # Useful to change when you have multiple groups
  ggtheme = theme_bw()      # Change ggplot2 theme
)

Summary

Compare differential survival by intervention status among cases.

Perform propensity score matching within the disease group to balance the intervention groups. Comparison with Healthy Controls:

Compare the matched cases to healthy controls.

Perform propensity score matching between cases and controls based on health status. Combined Analysis:

Merge the intervention-matched cases with healthy controls. Perform a final propensity score matching to compare the combined dataset’s survival.