Code
grep -E 'Byetta|Ozempic|Wegovy|Bydureon|Trulicity|Lixumia|Victoza|Rybelsus|Liraglutide|Dulaglutide|Exenatide|Lixisenatide|Semaglutide' gp_scripts.txt > glp1s_withi.txt
Minimum age at initiation 18 years old
Initiation of GLP1-RA treatment (T0). Initiation is defined as the date of first purchase or prescription of GLP1-RA, if purchase data is not available
At least 1 body weight measured maximum 365 days before initiation (or within 14 days after if no measure before initiation is available). If multiple measurements are within this time frame, only the closest one to initiation (T0) is to be considered. This variable is called W0
grep -E 'Byetta|Ozempic|Wegovy|Bydureon|Trulicity|Lixumia|Victoza|Rybelsus|Liraglutide|Dulaglutide|Exenatide|Lixisenatide|Semaglutide' gp_scripts.txt > glp1s_withi.txt
library(lubridate)
library(dplyr)
library(tidyr)
library(data.table)
library(table1)
## From the UKB showcase field
=readRDS("~/Library/CloudStorage/Dropbox-Personal/pheno_dir/prs_subset.rds")
prs_subset=readRDS("~/Library/CloudStorage/Dropbox-Personal/pheno_dir/output/dfukb_baseline.rds")
df_baseline=fread("~/Library/CloudStorage/Dropbox-Personal//glp1s_withi.txt")
glp1$DrugName <- as.factor(sub("([A-Za-z]+[ ]?[A-Za-z]*).*", "\\1",glp1$V7))
glp1
colnames(glp1)[1]=c("Identifier")
colnames(glp1)[3]=c("event_dt")
$event_dt <- as.Date(glp1$event_dt, format="%d/%m/%Y")
glp1
=readRDS("~/Library/CloudStorage/Dropbox-Personal/ukbb-ehr-data/data/biomarkers.rds")
biomarkers=biomarkers[biomarkers$variable%in%"height",]%>%group_by(eid)%>%summarise(m=mean(value))
height
=biomarkers[biomarkers$variable%in%"weight",]
weight$date <- as.Date(weight$date, format="%Y/%m/%d")
weight
<- glp1 %>%
patient_med_data group_by(Identifier) %>%
summarize(first_script_date = min(event_dt),last_script_date = max(event_dt),med_names=DrugName[1])
colnames(patient_med_data)=c("eid","first_script_date","last_script_date","med_name")
$first_script_date <- as.Date(patient_med_data$first_script_date, format = "%Y-%m-%d")
patient_med_data$last_script_date <- as.Date(patient_med_data$last_script_date, format = "%Y-%m-%d") patient_med_data
We then grab treated pateints. Median of body weight measurements at least 180 days after initiation and maximum 365 days after initiation. (different than our definition which was one year after end)
This variable is called mW1
Exclusion: Bariatric Surgery during therapy and one year before initiation of therapy
The definition of minimum treatment duration is left to each biobank because of different data availability in terms of prescription vs purchases and prescription duration. Individuals with a clear indication of treatment discontinuation within one year should be excluded. In practice, if there is only one prescription that covers less than one year, the patient should not be included.
=weight[weight$eid%in%patient_med_data$eid,c("eid","date","value")]
patient_weight_data=df_baseline[,c("identifier","f.53.0.0","f.21001.0.0")]
bl$f.53.0.0=as.Date(bl$f.53.0.0)
bl
=rbind(patient_weight_data,bl[,c("identifier","f.53.0.0","f.21001.0.0")],use.names=FALSE)
patient_weight_data
$date <- as.Date(patient_weight_data$date, format = "%Y-%m-%d")
patient_weight_data<- merge(patient_weight_data, patient_med_data, by = "eid")
merged_data
### here i use median weight beofre and the minimum date
<- merged_data %>%
w0 filter(date < first_script_date & date > (first_script_date - years(1)))%>%
group_by(eid) %>%
summarise("beforedate"=min(date),"medweight"=median(value),"number"=length(value),"med_name"=med_name[1])
summary(w0$number)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000 1.000 2.000 2.688 3.000 11.000
<- merged_data %>%
mW1 filter(date > (first_script_date + days(180)) & date < (first_script_date + years(1))) %>%
group_by(eid) %>%
summarise("afterdate"=min(date),"medweight"=median(value),"number"=length(value))
summary(mW1$number)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.0 1.0 1.0 1.7 2.0 7.0
N individuals included in the analysis
% females
Age in years (mean±SD)
T2D prevalence at initiation. The definition of T2D is left to individual cohorts.
Percentage breakdown of GLP1RA medications within the drug class.
Body weight (kg) and BMI (kg/m2) (mean±SD) at W0 (see 4.1 / 4.2)
Median body weight (kg) and BMI (kg/m2) between 180 and 365 days (or 1460 days for bariatric surgery)
Average percentage change in body weight between the two time points:
\(\sum_i (mW1 - W0)/W0 * 100 ) / n\)
=merge(w0[,c("eid","beforedate","medweight","med_name")],mW1[,c("eid","afterdate","medweight")],by="eid")
dat# Trim spaces from the medication names
$med_name <- stringr::str_trim(dat$med_name)
dat
colnames(dat)=c("eid","t1","w0","med_name","t2","mW1")
$med_name=as.factor(dat$med_name)
datsummary(dat$med_name)
Bydureon Byetta Dulaglutide
23 26 3
Exenatide Exenatide injection Liraglutide
194 1 265
Liraglutide Prefilled Lixisenatide Trulicity
1 60 2
Victoza
70
<- dat %>%
dat mutate(med_group = case_when(
%in% c("Bydureon", "Byetta", "Exenatide", "Exenatide injection") ~ "Exenatide",
med_name %in% c("Dulaglutide", "Trulicity") ~ "Dulaglutide",
med_name %in% c("Liraglutide", "Liraglutide Prefilled", "Victoza", "Victoza Prefilled") ~ "Liraglutide",
med_name %in% c("Lixisenatide") ~ "Lixisenatide",
med_name TRUE ~ "Other" # This line is for medications that don't fit into any group
))
=merge(dat,height,by="eid")
dat$bmi_before=dat$w0/(dat$m)^2
dat$bmi_after=dat$mW1/(dat$m)^2
datload("~/Library/CloudStorage/Dropbox-Personal/pheno_dir/output/merged_pheno_censor_final_withdrugs_smoke.rds")
=dfh[,c("identifier","Dm_0_Any","smoke","f.31.0.0","Birthdate")]
phenos
$eid=as.numeric(as.character(phenos$identifier))
phenos=merge(dat,phenos,by="eid")
dat$enrollage=as.numeric(difftime(dat$t1,dat$Birthdate,units = "days"))/365.4
dat$delta_wt=dat$mW1-dat$w0
dat$delta_bmi=dat$bmi_after-dat$bmi_before
dat$med_name=as.factor(dat$med_name)
dat## you may want to add these too
# dat$HyperLip_0_Any=factor(dat$HyperLip_0_Any,levels = c(1,2),labels = c("No","Yes"))
# dat$Ht_0_Any=factor(dat$Ht_0_Any,levels = c(1,2),labels = c("No","Yes"))
$Dm_0_Any=factor(dat$Dm_0_Any,levels = c(1,2),labels = c("No","Yes"))
dat$sex=factor(dat$f.31.0.0,levels = c(0,1),labels = c("Female","Male"))
dat$enrollage=as.numeric(dat$enrollage)
dat#dat$smoke=factor(dat$smoke,levels=c(0,1),labels = c("No","Yes"))
#dat$antihtn=factor(dat$antihtn,levels=c(0,1),labels = c("No","Yes"))
#dat$statin=factor(dat$statin,levels=c(0,1),labels = c("No","Yes"))
#dat$treat=factor(dat$treat,levels=c(0,1),labels=c("Untreated","Treated"))
label(dat$delta_wt)= "Change in wt (kg)"
label(dat$delta_bmi)= "Change in wt (bmi)"
label(dat$sex)<- "Sex"
label(dat$w0)<- "Pre-treat Wt (kg)"
label(dat$mW1)<- "Post-treat Wt (kg)"
label(dat$enrollage) <- "Age"
label(dat$bmi_before) <- "Pre-Treat BMI"
#label(dat$HyperLip_0_Any) = "Hyperlipidemia"
#label(dat$Ht_0_Any) = "Hypertension"
label(dat$Dm_0_Any) = "Diabetes Type 2"
label(dat$med_group)="Medication"
# One level of stratification
table1(~sex + enrollage +bmi_before+Dm_0_Any+w0+mW1+delta_wt+delta_bmi+med_group,data = dat)
Overall (N=614) |
|
---|---|
Sex | |
Female | 272 (44.3%) |
Male | 342 (55.7%) |
Age | |
Mean (SD) | 60.5 (7.15) |
Median [Min, Max] | 60.7 [41.3, 76.9] |
Pre-Treat BMI | |
Mean (SD) | 36.7 (6.14) |
Median [Min, Max] | 35.7 [21.8, 63.2] |
Diabetes Type 2 | |
No | 1 (0.2%) |
Yes | 613 (99.8%) |
Pre-treat Wt (kg) | |
Mean (SD) | 105 (19.9) |
Median [Min, Max] | 104 [52.0, 188] |
Post-treat Wt (kg) | |
Mean (SD) | 101 (19.7) |
Median [Min, Max] | 100 [53.0, 183] |
Change in wt (kg) | |
Mean (SD) | -4.00 (7.10) |
Median [Min, Max] | -3.27 [-37.6, 53.4] |
Change in wt (bmi) | |
Mean (SD) | -1.42 (2.52) |
Median [Min, Max] | -1.07 [-12.8, 17.9] |
Medication | |
Dulaglutide | 5 (0.8%) |
Exenatide | 237 (38.6%) |
Liraglutide | 315 (51.3%) |
Lixisenatide | 57 (9.3%) |
Now let’s merge with PRS for our analysis. We first do the analysis based on BMI prs
Model_A (percent body weight change as outcome):
\(((mW1 - W0)/W0) * 100 \sim ~ exposure +W0 + sex + age at initiation (years) +medication_type + PC1:20 + covariates\)
=readRDS("~/Library/CloudStorage/Dropbox-Personal/pheno_dir/prs_subset.rds")
prs_df=merge(dat,prs_df[,c("Identifier","BMI","T2D")],by.x = "eid",by.y="Identifier")
dat_with_prs=data.frame(readRDS("~/Library/CloudStorage/Dropbox-Personal/pheno_dir/baseline_withPCS.rds"))
pcs=data.frame(pcs[,grep(names(pcs),pattern="22009")[1:20]])
pc_dfnames(pc_df)=rep(paste0("PC",seq(1:20)))
$eid=pcs$identifier
pc_df=merge(dat_with_prs,pc_df,by="eid")
dat_with_all$sex <- relevel(dat_with_all$sex, ref = "Female")
dat_with_all
=lm((delta_wt/w0)*100 ~ BMI+w0+sex+enrollage+as.factor(med_group)
MA_BMI+PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_with_all)
summary(MA_BMI)$coefficients[1:5,c(1:2)]
Estimate Std. Error
(Intercept) 7.09332973 4.46417319
BMI 0.05117668 0.28874618
w0 -0.06252993 0.01526156
sexMale 2.48922152 0.58405577
enrollage -0.07304020 0.03860876
Model_B (interaction between weight at baseline and exposure):
\(m_{W1} \sim exposure:W0_{kg} + sex_{male}+ age_{initiation}+ medication_{type}+ PC1:20 + covariates\)
=lm(mW1~BMI*w0+sex+enrollage+as.factor(med_group)
MB_BMI+PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_with_all)
summary(MB_BMI)$coefficients[c(1:5,29),c(1:2)]
Estimate Std. Error
(Intercept) 12.47644794 4.68722478
BMI -1.15859968 1.49495592
w0 0.88802525 0.01800920
sexMale 2.60023058 0.60832107
enrollage -0.08272773 0.04023724
BMI:w0 0.01211408 0.01362185
Model_A (percent body weight change as outcome):
\(((mW1 - W0)/W0) * 100 \sim exposure +W0 + sex + age atinitiation (years) +medication_type + PC1:20 + covariates\)
=lm((delta_wt/w0)*100 ~ T2D+w0+sex+enrollage+as.factor(med_group)+
MA_T2D+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_with_all)
PC1
summary(MA_T2D)$coefficients[1:5,c(1:2)]
Estimate Std. Error
(Intercept) 7.6987628 4.5264888
T2D -0.1940034 0.2861732
w0 -0.0633339 0.0151754
sexMale 2.4952069 0.5837706
enrollage -0.0788371 0.0393194
Model_B (interaction between weight at baseline and exposure):
\(m_{W1} \sim exposure:W0_{kg} + sex_{male}+ age_{initiation}+ medication_{type}+ PC1:20 + covariates\)
=lm(mW1 ~ T2D*w0+sex+enrollage+as.factor(med_group)
MB_T2D+PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_with_all)
summary(MB_T2D)$coefficients[c(1:5,29),c(1:2)]
Estimate Std. Error
(Intercept) 14.46600344 4.93628221
T2D -2.12205691 1.53778808
w0 0.88117342 0.01936406
sexMale 2.55666713 0.60916559
enrollage -0.09072054 0.04092055
T2D:w0 0.01862079 0.01433966
=readRDS("./chrlist.rds")
snplist
=do.call(rbind,snplist)
snp_frame# Convert dat_with_all$eid to a vector of column indices
<- which(names(snp_frame) %in% dat_with_all$eid)
col_indices # Subset the data.table by these columns
<- snp_frame[, ..col_indices]
selected_snp_frame
=vector("list", nrow(snp_frame))
snp_list
for(i in c(1:10,13)){
=data.frame(t(selected_snp_frame[i,]))
new_snpcolnames(new_snp)="newsnp"
$eid=rownames(new_snp)
new_snp## now add that snps data as a column
=na.omit(merge(dat_with_all,new_snp,by="eid"))
dat_updated#print(colnames(dat_updated))
## recompute lm
=lm((delta_wt/w0)*100 ~ newsnp+w0+sex+enrollage+as.factor(med_group)+
model_a+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_updated)
PC1
=summary(model_a)$coefficients[1:5,c(1:2)]
sum1
=lm(mW1 ~ newsnp*w0+sex+enrollage+as.factor(med_group)
model_b+PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_updated)
=summary(model_b)$coefficients[c(1:5,29),c(1:2)]
sum2=snp_frame$rsid[i]
snp_name=snp_frame$alleleA[i]
effect_allele
=list(snp_name=snp_name,effect=snp_frame$alleleA[i],alt_allele=snp_frame$alleleB[i],sum_model_a=sum1,sum_model_b=sum2)
snp_list[[i]]
}
# Now create a new data frame to store all the data in the desired format
<- data.frame(
results_df_a snp_name = character(),
effect = character(),
alt = character(),
Beta_exposure = numeric(),
SE_exposure = numeric(),
Beta_W0 = numeric(),
SE_W0 = numeric(),
Beta_sex = numeric(),
SE_sex = numeric(),
Beta_age_at_initiation_in_years = numeric(),
SE_age_at_initiation_in_years = numeric(),
stringsAsFactors = FALSE
)
# Fill in the results data frame
for(i in c(1:10, 13)) {
<- snp_list[[i]]
snp_info <- rbind(results_df_a, data.frame(
results_df_a snp_name = snp_info$snp_name,
effect = snp_info$effect,
alt = snp_info$alt_allele,
Beta_exposure = snp_info$sum_model_a[2, "Estimate"],
SE_exposure = snp_info$sum_model_a[2, "Std. Error"],
Beta_W0 = snp_info$sum_model_a[3, "Estimate"],
SE_W0= snp_info$sum_model_a[3, "Std. Error"],
Beta_sex = snp_info$sum_model_a[4, "Estimate"],
SE_sex = snp_info$sum_model_a[4, "Std. Error"],
Beta_age_at_initiation_in_years = snp_info$sum_model_a[5, "Estimate"],
SE_age_at_initiation_in_years = snp_info$sum_model_a[5, "Std. Error"]))}
results_df_a
snp_name effect alt Beta_exposure SE_exposure Beta_W0 SE_W0
1 rs6235 C G 0.38326336 0.4212125 -0.06195969 0.01506290
2 rs6232 T C 0.39578023 0.8166527 -0.06228691 0.01507433
3 rs10305420 C T 0.35995116 0.3878158 -0.06192777 0.01506283
4 rs10305421 G A -0.73542339 2.3268614 -0.06168054 0.01513119
5 rs2295006 G A 396.62008831 584.0730816 -0.06212602 0.01506679
6 rs3765467 G A -0.83095047 2.4398757 -0.06209513 0.01507123
7 rs6923761 G A 0.30797400 0.4012858 -0.06270186 0.01508516
8 rs1042044 A C -0.07110585 0.3999292 -0.06209934 0.01507232
9 rs10305492 G A 1.48586835 1.4961174 -0.06180517 0.01506305
10 rs146868158 C T -0.36243225 17.0734872 -0.06210147 0.01507373
11 rs429358 T C -0.66432669 0.5700902 -0.06299125 0.01507444
Beta_sex SE_sex Beta_age_at_initiation_in_years
1 2.465141 0.5839108 -0.07223163
2 2.497087 0.5841541 -0.07344297
3 2.517099 0.5843629 -0.07096233
4 2.478190 0.5843834 -0.07321292
5 2.467644 0.5842789 -0.07310704
6 2.482448 0.5839127 -0.07285575
7 2.468487 0.5840286 -0.07311670
8 2.488347 0.5839415 -0.07314149
9 2.449146 0.5845693 -0.07372557
10 2.486272 0.5838878 -0.07345527
11 2.502562 0.5833445 -0.07466354
SE_age_at_initiation_in_years
1 0.03852831
2 0.03852355
3 0.03859809
4 0.03853695
5 0.03851990
6 0.03857049
7 0.03851471
8 0.03857623
9 0.03849963
10 0.03854427
11 0.03850011
write.csv(results_df_a,file="results_model_a.csv",quote = F)
# Now create a new data frame to store all the data in the desired format
<- data.frame(
results_df_b snp_name = character(),
effect = character(),
alt = character(),
Beta_exposure = numeric(),
SE_exposure = numeric(),
Beta_W0 = numeric(),
SE_W0 = numeric(),
Beta_exposure_W0 = numeric(),
SE_exposure_W0 = numeric(),
Beta_sex = numeric(),
SE_sex = numeric(),
Beta_age_at_initiation_in_years = numeric(),
SE_age_at_initiation_in_years = numeric(),
stringsAsFactors = FALSE
)
# Fill in the results data frame
for(i in c(1:10, 13)) {
<- snp_list[[i]]
snp_info <- rbind(results_df_b, data.frame(
results_df_b snp_name = snp_info$snp_name,
effect = snp_info$effect,
alt = snp_info$alt_allele,
Beta_exposure = snp_info$sum_model_b[2, "Estimate"],
SE_exposure = snp_info$sum_model_b[2, "Std. Error"],
Beta_W0 = snp_info$sum_model_b[3, "Estimate"],
SE_W0= snp_info$sum_model_b[3, "Std. Error"],
Beta_exposure_W0 = snp_info$sum_model_b[6, "Estimate"],
SE_exposure_W0 = snp_info$sum_model_b[6, "Std. Error"],
Beta_sex = snp_info$sum_model_b[4, "Estimate"],
SE_sex = snp_info$sum_model_b[4, "Std. Error"],
Beta_age_at_initiation_in_years = snp_info$sum_model_b[5, "Estimate"],
SE_age_at_initiation_in_years = snp_info$sum_model_b[5, "Std. Error"]))}
write.csv(results_df_b,file="results_model_b.csv",quote = F)