This analysis compares survival outcomes in patients with right coronary anomalies. We use age as the time scale and survival as the primary outcome, measuring time from birth to avoid lead time bias. The analysis compares patients with and without intervention, incorporating matching by age and sex for sensitivity analysis.
Initial Survival Analysis
We define survival time as the minimum of today’s date and last known alive date, and calculate age at the end of follow-up. We then categorize patients as cases (with intervention) or controls (without intervention) based on the intervention status.
We perform matching as described below, matching cases and controls first 1:1 based on age, sex, htn, dm, smoking and hyperlipidemia, then in 2:1 matching, and with skeptical, flat, and optimistic priors.
Code
# Load required libraries# Load required librarieslibrary(data.table)library(lubridate)library(MatchIt)library(survival)library(survminer)# 1. Load and prepare case data#df <- fread("~/Dropbox (Personal)/jack_data.csv")df <-fread("~/Dropbox (Personal)/arca_jack_2.csv")# First, let's check the actual column names in dfprint("Column names in case data:")
[1] "Column names in case data:"
Code
print(names(df))
[1] "Record ID"
[2] "Last name"
[3] "First name"
[4] "MRN"
[5] "DOB"
[6] "Sex"
[7] "Comorbidities (choice=DM)"
[8] "Comorbidities (choice=Smoker (ever))"
[9] "Comorbidities (choice=Hyperlipidemia)"
[10] "Comorbidities (choice=Hypertension)"
[11] "Comorbidities (choice=Stroke)"
[12] "Comorbidities (choice=Atrial arrhythmias (ever))"
[13] "Comorbidities (choice=Ventricular arrhythmias (ever))"
[14] "Comorbidities (choice=Others)"
[15] "Comorbidities (choice=None)"
[16] "Comorbidities (choice=Unknown)"
[17] "Date of diagnosis"
[18] "Age at diagnosis"
[19] "Alive?"
[20] "Date of death"
[21] "Alive as of ---"
[22] "Intervention"
[23] "Date of intervention"
[24] "Age at last followup"
[25] "Age at death"
[26] "Survival after diagnosis"
[27] "Duration of followup"
Code
# Adjust the column names based on your actual datacases <- df[, .(hyperlip=ifelse(`Comorbidities (choice=Hyperlipidemia)`=="Checked",1,0),htn =ifelse(`Comorbidities (choice=Hypertension)`=="Checked",1,0),dm =ifelse(`Comorbidities (choice=DM)`=="Checked",1,0),smoking =ifelse(`Comorbidities (choice=Smoker (ever))`=="Checked",1,0),MRN = MRN,sexnum =ifelse(Sex =="Male", 1, 0),DOB =as.Date(DOB, format ="%Y-%m-%d"),Date_of_death =as.Date(`Date of death`), # Adjust if column name is differentAlive_as_of =as.Date(`Alive as of ---`), # Adjust if column name is differentalive =ifelse(`Alive?`=="Yes",1,0),Int =ifelse(Intervention =="Yes", 1, 0),case_control =1# Mark as cases)]# Set today's date explicitlytoday_date <-as.Date("2024-11-20")# Calculate end of follow-up and survival time for casescases[, `:=`(end_followup =fifelse(!is.na(Date_of_death), Date_of_death, pmin(Alive_as_of, today_date, na.rm =TRUE)),survival_time =as.numeric(difftime(fifelse(!is.na(Date_of_death), Date_of_death,pmin(Alive_as_of, today_date, na.rm =TRUE)), DOB, units ="days" ))/365.25)]# 2. Load and prepare control data (MGBB data)controls <-fread("~/Downloads/MGBB_Phenos_2CODE 2022-06-21.csv")mgbb_pce=fread("~/Dropbox (Personal)/PRS_ethnicity/mgbb_data/mgbb_pce_collection_date.csv")controls=merge(controls,mgbb_pce,by="EMPI")# Check control data column namesprint("Column names in control data:")
controls <- controls[!which(controls$Vital_Status=="Deceased"&is.na(controls$Date_of_Death)), .(MRN = Biobank_Subject_ID,sexnum =ifelse(Sex =="Male", 1, 0),htn=Has_htn,hyperlip=Has_hyperlipidemia,dm=Has_dm,smoking=ifelse(current_smoking_imputed=="yes",1,0),DOB =as.Date(Date_of_Birth),Date_of_death =as.Date(Date_of_Death),Censor_Date =as.Date(Censor_Date),alive =ifelse(Vital_Status =="Living", 1, 0),Int =0, # Controls have no interventioncase_control =0# Mark as controls)]# Calculate survival time for controlscontrols[, `:=`(end_followup =fifelse(!is.na(Date_of_death), Date_of_death, Censor_Date),survival_time =as.numeric(difftime(fifelse(!is.na(Date_of_death), Date_of_death, Censor_Date), DOB,units ="days" ))/365.25)]# 3. Combine cases and controlscombined_df <-rbind(cases, controls, fill =TRUE)# 4. Add age calculation as of a specific datecombined_df[, age.today :=as.numeric(difftime(today_date, DOB, units ="days")/365.25)]# 5. Add death indicatorcombined_df[, death :=ifelse(alive ==0, 1, 0)]# Verify the dataprint("Summary of survival times:")
[1] "Summary of survival times:"
Code
print(summary(combined_df$survival_time))
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.856 43.822 60.033 57.553 71.288 106.913
Code
print("Distribution of cases/controls:")
[1] "Distribution of cases/controls:"
Code
print(table(combined_df$case_control))
0 1
63521 291
Code
print("Distribution of deaths:")
[1] "Distribution of deaths:"
Code
print(table(combined_df$death))
0 1
59443 4369
Code
print("Distribution of intervention by case/control status:")
[1] "Distribution of intervention by case/control status:"
# Approach 1: Direct Case-Control Matchingperform_direct_matching <-function(combined_df) {# Match cases to controls based on age and sex m.out <-matchit(case_control ~ age.today + sexnum+dm+htn+smoking+hyperlip, data = combined_df,distance ="glm",replace =TRUE) matched_data <-match.data(m.out)return(matched_data)}## keep more untreated among the treated # Plot survival curvesplot_survival <-function(matched_data) { fit <-survfit(Surv(time = survival_time, event = death) ~ case_control, data = matched_data)ggsurvplot( fit,data = matched_data,size =1,conf.int =TRUE,pval =TRUE,risk.table =TRUE,risk.table.col ="strata",risk.table.height =0.25,xlab="Age (years)",ggtheme =theme_bw() )}
Direct Case-Control Matching (Approach 1): - Matches cases and controls directly Preserves the original intervention distribution in cases. Uses Sex, Smoking, Hypertension, and Hyperlipidemia.
Code
# For Approach 1matched_direct <-perform_direct_matching(combined_df)plot_survival(matched_direct)
using more controls:
Code
perform_direct_matching <-function(combined_df, ratio =2) {# Match cases to controls based on age and sex m.out <-matchit(case_control ~ age.today + sexnum + dm + htn + smoking+hyperlip, data = combined_df,distance ="glm",ratio = ratio, # Specify number of controls per casereplace =FALSE, # Sample without replacementmethod ="nearest"# Use nearest neighbor matching ) matched_data <-match.data(m.out)# Print matching summaryprint(summary(m.out))return(matched_data)}# Try with 4 controls per casematched_direct <-perform_direct_matching(combined_df, ratio =2)
Call:
matchit(formula = case_control ~ age.today + sexnum + dm + htn +
smoking + hyperlip, data = combined_df, method = "nearest",
distance = "glm", replace = FALSE, ratio = ratio)
Summary of Balance for All Data:
Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
distance 0.0267 0.0045 0.5668 19.4105 0.2616
age.today 67.6184 61.7963 0.3303 1.0148 0.0771
sexnum 0.6289 0.4448 0.3811 . 0.1841
dm 0.1924 0.1007 0.2328 . 0.0918
htn 0.5430 0.3015 0.4846 . 0.2414
smoking 0.3058 0.0286 0.6017 . 0.2772
hyperlip 0.4605 0.2039 0.5148 . 0.2566
eCDF Max
distance 0.3757
age.today 0.1196
sexnum 0.1841
dm 0.0918
htn 0.2414
smoking 0.2772
hyperlip 0.2566
Summary of Balance for Matched Data:
Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
distance 0.0267 0.0266 0.0006 1.0044 0.0000
age.today 67.6184 67.8270 -0.0118 1.3566 0.0320
sexnum 0.6289 0.6306 -0.0036 . 0.0017
dm 0.1924 0.2148 -0.0567 . 0.0223
htn 0.5430 0.5481 -0.0103 . 0.0052
smoking 0.3058 0.3076 -0.0037 . 0.0017
hyperlip 0.4605 0.4502 0.0207 . 0.0103
eCDF Max Std. Pair Dist.
distance 0.0086 0.0019
age.today 0.0739 0.6425
sexnum 0.0017 0.6651
dm 0.0223 0.7627
htn 0.0052 0.6243
smoking 0.0017 0.0037
hyperlip 0.0103 0.2413
Sample Sizes:
Control Treated
All 63521 291
Matched 582 291
Unmatched 62939 0
Discarded 0 0
Code
# Plot survival curves with more controlsplot_survival <-function(matched_data) { fit <-survfit(Surv(time = survival_time, event = death) ~ case_control, data = matched_data)ggsurvplot( fit,data = matched_data,size =1,conf.int =TRUE,pval =TRUE,risk.table =TRUE,risk.table.col ="strata",risk.table.height =0.25,ggtheme =theme_bw(),xlab="Age (years)",title ="Survival Analysis with 2:1 Control Matching" )}plot_survival(matched_direct)
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 16.0.0 (clang-1600.0.26.3)’
using SDK: ‘MacOSX15.0.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
679 | #include <cmath>
| ^~~~~~~
1 error generated.
make: *** [foo.o] Error 1