GLP1 Analysis

Author

Sarah Urbut

Published

January 7, 2024

Enrollment Criteria (per AP documentation)

Minimum age at initiation 18 years old
Initiation of GLP1-RA treatment (T0). Initiation is defined as the date of first purchase or prescription of GLP1-RA, if purchase data is not available
At least 1 body weight measured maximum 365 days before initiation (or within 14 days after if no measure before initiation is available). If multiple measurements are within this time frame, only the closest one to initiation (T0) is to be considered. This variable is called W0
- I think we should use median of measurements prior (to avoid erratic one off prior to initiation and establish continuosu baseline)
  Note in the UKB we use the following GLP1s:

Code

grep -E 'Byetta|Ozempic|Wegovy|Bydureon|Trulicity|Lixumia|Victoza|Rybelsus|Liraglutide|Dulaglutide|Exenatide|Lixisenatide|Semaglutide' gp_scripts.txt > glp1s_withi.txt

Code

library(lubridate)
library(dplyr)
library(tidyr)
library(data.table)
library(table1)

## From the UKB showcase field
prs_subset=readRDS("~/Library/CloudStorage/Dropbox-Personal/pheno_dir/prs_subset.rds")
df_baseline=readRDS("~/Library/CloudStorage/Dropbox-Personal/pheno_dir/output/dfukb_baseline.rds")
glp1=fread("~/Library/CloudStorage/Dropbox-Personal//glp1s_withi.txt")
glp1$DrugName <- as.factor(sub("([A-Za-z]+[ ]?[A-Za-z]*).*", "\\1",glp1$V7))


colnames(glp1)[1]=c("Identifier")
colnames(glp1)[3]=c("event_dt")
glp1$event_dt <- as.Date(glp1$event_dt, format="%d/%m/%Y")


biomarkers=readRDS("~/Library/CloudStorage/Dropbox-Personal/ukbb-ehr-data/data/biomarkers.rds")
height=biomarkers[biomarkers$variable%in%"height",]%>%group_by(eid)%>%summarise(m=mean(value))

weight=biomarkers[biomarkers$variable%in%"weight",]
weight$date <- as.Date(weight$date, format="%Y/%m/%d")

patient_med_data <- glp1 %>%
  group_by(Identifier) %>%
  summarize(first_script_date = min(event_dt),last_script_date = max(event_dt),med_names=DrugName[1])

colnames(patient_med_data)=c("eid","first_script_date","last_script_date","med_name")
patient_med_data$first_script_date <- as.Date(patient_med_data$first_script_date, format = "%Y-%m-%d")
patient_med_data$last_script_date <- as.Date(patient_med_data$last_script_date, format = "%Y-%m-%d")

We then grab treated pateints. Median of body weight measurements at least 180 days after initiation and maximum 365 days after initiation. (different than our definition which was one year after end)

This variable is called mW1

Exclusion: Bariatric Surgery during therapy and one year before initiation of therapy
The definition of minimum treatment duration is left to each biobank because of different data availability in terms of prescription vs purchases and prescription duration. Individuals with a clear indication of treatment discontinuation within one year should be excluded. In practice, if there is only one prescription that covers less than one year, the patient should not be included.

Code

patient_weight_data=weight[weight$eid%in%patient_med_data$eid,c("eid","date","value")]
bl=df_baseline[,c("identifier","f.53.0.0","f.21001.0.0")]
bl$f.53.0.0=as.Date(bl$f.53.0.0)

patient_weight_data=rbind(patient_weight_data,bl[,c("identifier","f.53.0.0","f.21001.0.0")],use.names=FALSE)

patient_weight_data$date <- as.Date(patient_weight_data$date, format = "%Y-%m-%d")
merged_data <- merge(patient_weight_data, patient_med_data, by = "eid")

### here i use median weight beofre and the minimum date
w0 <- merged_data %>%
  filter(date < first_script_date & date > (first_script_date - years(1)))%>%
  group_by(eid) %>%
  summarise("beforedate"=min(date),"medweight"=median(value),"number"=length(value),"med_name"=med_name[1])

summary(w0$number)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  1.000   1.000   2.000   2.688   3.000  11.000

Code

mW1 <- merged_data %>%
  filter(date > (first_script_date + days(180)) & date < (first_script_date + years(1))) %>%
  
  group_by(eid) %>%
  summarise("afterdate"=min(date),"medweight"=median(value),"number"=length(value))

summary(mW1$number)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    1.0     1.0     1.0     1.7     2.0     7.0

Cohort descriptors

N individuals included in the analysis
% females
Age in years (mean±SD)
T2D prevalence at initiation. The definition of T2D is left to individual cohorts.
Percentage breakdown of GLP1RA medications within the drug class.
Body weight (kg) and BMI (kg/m2) (mean±SD) at W0 (see 4.1 / 4.2)
Median body weight (kg) and BMI (kg/m2) between 180 and 365 days (or 1460 days for bariatric surgery)
- Shouldn’t we consider if patient dropped med?

Average percentage change in body weight between the two time points:

\(\sum_i (mW1 - W0)/W0 * 100 ) / n\)

Code

dat=merge(w0[,c("eid","beforedate","medweight","med_name")],mW1[,c("eid","afterdate","medweight")],by="eid")
# Trim spaces from the medication names
dat$med_name <- stringr::str_trim(dat$med_name)

colnames(dat)=c("eid","t1","w0","med_name","t2","mW1")
dat$med_name=as.factor(dat$med_name)
summary(dat$med_name)

             Bydureon                Byetta           Dulaglutide 
                   23                    26                     3 
            Exenatide   Exenatide injection           Liraglutide 
                  194                     1                   265 
Liraglutide Prefilled          Lixisenatide             Trulicity 
                    1                    60                     2 
              Victoza 
                   70

Code

dat <- dat %>%
  mutate(med_group = case_when(
    med_name %in% c("Bydureon", "Byetta", "Exenatide", "Exenatide injection") ~ "Exenatide",
    med_name %in% c("Dulaglutide", "Trulicity") ~ "Dulaglutide",
    med_name %in% c("Liraglutide", "Liraglutide Prefilled", "Victoza", "Victoza Prefilled") ~ "Liraglutide",
    med_name %in% c("Lixisenatide") ~ "Lixisenatide",
    TRUE ~ "Other"  # This line is for medications that don't fit into any group
  ))

dat=merge(dat,height,by="eid")
dat$bmi_before=dat$w0/(dat$m)^2
dat$bmi_after=dat$mW1/(dat$m)^2
load("~/Library/CloudStorage/Dropbox-Personal/pheno_dir/output/merged_pheno_censor_final_withdrugs_smoke.rds")
phenos=dfh[,c("identifier","Dm_0_Any","smoke","f.31.0.0","Birthdate")]

phenos$eid=as.numeric(as.character(phenos$identifier))
dat=merge(dat,phenos,by="eid")
dat$enrollage=as.numeric(difftime(dat$t1,dat$Birthdate,units = "days"))/365.4
dat$delta_wt=dat$mW1-dat$w0
dat$delta_bmi=dat$bmi_after-dat$bmi_before
dat$med_name=as.factor(dat$med_name)
## you may want to add these too
# dat$HyperLip_0_Any=factor(dat$HyperLip_0_Any,levels = c(1,2),labels = c("No","Yes"))
# dat$Ht_0_Any=factor(dat$Ht_0_Any,levels = c(1,2),labels = c("No","Yes"))
dat$Dm_0_Any=factor(dat$Dm_0_Any,levels = c(1,2),labels = c("No","Yes"))
dat$sex=factor(dat$f.31.0.0,levels = c(0,1),labels = c("Female","Male"))
dat$enrollage=as.numeric(dat$enrollage)
#dat$smoke=factor(dat$smoke,levels=c(0,1),labels = c("No","Yes"))
#dat$antihtn=factor(dat$antihtn,levels=c(0,1),labels = c("No","Yes"))
#dat$statin=factor(dat$statin,levels=c(0,1),labels = c("No","Yes"))
#dat$treat=factor(dat$treat,levels=c(0,1),labels=c("Untreated","Treated"))
label(dat$delta_wt)= "Change in wt (kg)"
label(dat$delta_bmi)= "Change in wt (bmi)"
label(dat$sex)<- "Sex"
label(dat$w0)<- "Pre-treat Wt (kg)"
label(dat$mW1)<- "Post-treat Wt (kg)"
label(dat$enrollage) <- "Age"
label(dat$bmi_before) <- "Pre-Treat BMI"
#label(dat$HyperLip_0_Any) = "Hyperlipidemia"
#label(dat$Ht_0_Any) = "Hypertension"
label(dat$Dm_0_Any) = "Diabetes Type 2"
label(dat$med_group)="Medication"
# One level of stratification
table1(~sex + enrollage +bmi_before+Dm_0_Any+w0+mW1+delta_wt+delta_bmi+med_group,data = dat)

	Overall (N=614)
Sex
Female	272 (44.3%)
Male	342 (55.7%)
Age
Mean (SD)	60.5 (7.15)
Median [Min, Max]	60.7 [41.3, 76.9]
Pre-Treat BMI
Mean (SD)	36.7 (6.14)
Median [Min, Max]	35.7 [21.8, 63.2]
Diabetes Type 2
No	1 (0.2%)
Yes	613 (99.8%)
Pre-treat Wt (kg)
Mean (SD)	105 (19.9)
Median [Min, Max]	104 [52.0, 188]
Post-treat Wt (kg)
Mean (SD)	101 (19.7)
Median [Min, Max]	100 [53.0, 183]
Change in wt (kg)
Mean (SD)	-4.00 (7.10)
Median [Min, Max]	-3.27 [-37.6, 53.4]
Change in wt (bmi)
Mean (SD)	-1.42 (2.52)
Median [Min, Max]	-1.07 [-12.8, 17.9]
Medication
Dulaglutide	5 (0.8%)
Exenatide	237 (38.6%)
Liraglutide	315 (51.3%)
Lixisenatide	57 (9.3%)

PRS Analysis

Now let’s merge with PRS for our analysis. We first do the analysis based on BMI prs

Model_A (percent body weight change as outcome):

\(((mW1 - W0)/W0) * 100 \sim ~ exposure +W0 + sex + age at initiation (years) +medication_type + PC1:20 + covariates\)

Code

prs_df=readRDS("~/Library/CloudStorage/Dropbox-Personal/pheno_dir/prs_subset.rds")
dat_with_prs=merge(dat,prs_df[,c("Identifier","BMI","T2D")],by.x = "eid",by.y="Identifier")
pcs=data.frame(readRDS("~/Library/CloudStorage/Dropbox-Personal/pheno_dir/baseline_withPCS.rds"))
pc_df=data.frame(pcs[,grep(names(pcs),pattern="22009")[1:20]])
names(pc_df)=rep(paste0("PC",seq(1:20)))
pc_df$eid=pcs$identifier
dat_with_all=merge(dat_with_prs,pc_df,by="eid")
dat_with_all$sex <- relevel(dat_with_all$sex, ref = "Female")

MA_BMI=lm((delta_wt/w0)*100 ~ BMI+w0+sex+enrollage+as.factor(med_group)
            +PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_with_all)

summary(MA_BMI)$coefficients[1:5,c(1:2)]

               Estimate Std. Error
(Intercept)  7.09332973 4.46417319
BMI          0.05117668 0.28874618
w0          -0.06252993 0.01526156
sexMale      2.48922152 0.58405577
enrollage   -0.07304020 0.03860876

Model_B (interaction between weight at baseline and exposure):

\(m_{W1} \sim exposure:W0_{kg} + sex_{male}+ age_{initiation}+ medication_{type}+ PC1:20 + covariates\)

Code

MB_BMI=lm(mW1~BMI*w0+sex+enrollage+as.factor(med_group)
          +PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_with_all)

summary(MB_BMI)$coefficients[c(1:5,29),c(1:2)]

               Estimate Std. Error
(Intercept) 12.47644794 4.68722478
BMI         -1.15859968 1.49495592
w0           0.88802525 0.01800920
sexMale      2.60023058 0.60832107
enrollage   -0.08272773 0.04023724
BMI:w0       0.01211408 0.01362185

And now using the PRS for T2D:

Model_A (percent body weight change as outcome):

\(((mW1 - W0)/W0) * 100 \sim exposure +W0 + sex + age atinitiation (years) +medication_type + PC1:20 + covariates\)

Code

MA_T2D=lm((delta_wt/w0)*100 ~ T2D+w0+sex+enrollage+as.factor(med_group)+
            PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_with_all)

summary(MA_T2D)$coefficients[1:5,c(1:2)]

              Estimate Std. Error
(Intercept)  7.6987628  4.5264888
T2D         -0.1940034  0.2861732
w0          -0.0633339  0.0151754
sexMale      2.4952069  0.5837706
enrollage   -0.0788371  0.0393194

Model_B (interaction between weight at baseline and exposure):

\(m_{W1} \sim exposure:W0_{kg} + sex_{male}+ age_{initiation}+ medication_{type}+ PC1:20 + covariates\)

Code

MB_T2D=lm(mW1 ~ T2D*w0+sex+enrollage+as.factor(med_group)
            +PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_with_all)

summary(MB_T2D)$coefficients[c(1:5,29),c(1:2)]

               Estimate Std. Error
(Intercept) 14.46600344 4.93628221
T2D         -2.12205691 1.53778808
w0           0.88117342 0.01936406
sexMale      2.55666713 0.60916559
enrollage   -0.09072054 0.04092055
T2D:w0       0.01862079 0.01433966

With Single SNPs

Code

snplist=readRDS("./chrlist.rds")

snp_frame=do.call(rbind,snplist)
# Convert dat_with_all$eid to a vector of column indices
col_indices <- which(names(snp_frame) %in% dat_with_all$eid)
# Subset the data.table by these columns
selected_snp_frame <- snp_frame[, ..col_indices]

snp_list=vector("list", nrow(snp_frame))


for(i in c(1:10,13)){
  
  new_snp=data.frame(t(selected_snp_frame[i,]))
  colnames(new_snp)="newsnp"
  new_snp$eid=rownames(new_snp)
  ## now add that snps data as a column
  dat_updated=na.omit(merge(dat_with_all,new_snp,by="eid"))
  #print(colnames(dat_updated))
  
  
  ## recompute lm
  model_a=lm((delta_wt/w0)*100 ~ newsnp+w0+sex+enrollage+as.factor(med_group)+
            PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_updated)
  
  sum1=summary(model_a)$coefficients[1:5,c(1:2)]
  
model_b=lm(mW1 ~ newsnp*w0+sex+enrollage+as.factor(med_group)
            +PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_updated)

sum2=summary(model_b)$coefficients[c(1:5,29),c(1:2)]
snp_name=snp_frame$rsid[i]
effect_allele=snp_frame$alleleA[i]

snp_list[[i]]=list(snp_name=snp_name,effect=snp_frame$alleleA[i],alt_allele=snp_frame$alleleB[i],sum_model_a=sum1,sum_model_b=sum2)
}



# Now create a new data frame to store all the data in the desired format
results_df_a <- data.frame(
  snp_name = character(),
  effect = character(),
  alt = character(),
  Beta_exposure = numeric(),
  SE_exposure = numeric(),
  Beta_W0 = numeric(),
  SE_W0 = numeric(),
  Beta_sex = numeric(),
  SE_sex = numeric(),
  Beta_age_at_initiation_in_years = numeric(),
  SE_age_at_initiation_in_years = numeric(),
  stringsAsFactors = FALSE
)

# Fill in the results data frame
for(i in c(1:10, 13)) {
  snp_info <- snp_list[[i]]
  results_df_a <- rbind(results_df_a, data.frame(
    snp_name = snp_info$snp_name,
    effect = snp_info$effect,
    alt = snp_info$alt_allele,
    Beta_exposure = snp_info$sum_model_a[2, "Estimate"],
    SE_exposure = snp_info$sum_model_a[2, "Std. Error"],
    Beta_W0 = snp_info$sum_model_a[3, "Estimate"],
    SE_W0= snp_info$sum_model_a[3, "Std. Error"],
Beta_sex = snp_info$sum_model_a[4, "Estimate"],
SE_sex = snp_info$sum_model_a[4, "Std. Error"],
Beta_age_at_initiation_in_years = snp_info$sum_model_a[5, "Estimate"],
SE_age_at_initiation_in_years = snp_info$sum_model_a[5, "Std. Error"]))}
  


results_df_a

      snp_name effect alt Beta_exposure SE_exposure     Beta_W0      SE_W0
1       rs6235      C   G    0.38326336   0.4212125 -0.06195969 0.01506290
2       rs6232      T   C    0.39578023   0.8166527 -0.06228691 0.01507433
3   rs10305420      C   T    0.35995116   0.3878158 -0.06192777 0.01506283
4   rs10305421      G   A   -0.73542339   2.3268614 -0.06168054 0.01513119
5    rs2295006      G   A  396.62008831 584.0730816 -0.06212602 0.01506679
6    rs3765467      G   A   -0.83095047   2.4398757 -0.06209513 0.01507123
7    rs6923761      G   A    0.30797400   0.4012858 -0.06270186 0.01508516
8    rs1042044      A   C   -0.07110585   0.3999292 -0.06209934 0.01507232
9   rs10305492      G   A    1.48586835   1.4961174 -0.06180517 0.01506305
10 rs146868158      C   T   -0.36243225  17.0734872 -0.06210147 0.01507373
11    rs429358      T   C   -0.66432669   0.5700902 -0.06299125 0.01507444
   Beta_sex    SE_sex Beta_age_at_initiation_in_years
1  2.465141 0.5839108                     -0.07223163
2  2.497087 0.5841541                     -0.07344297
3  2.517099 0.5843629                     -0.07096233
4  2.478190 0.5843834                     -0.07321292
5  2.467644 0.5842789                     -0.07310704
6  2.482448 0.5839127                     -0.07285575
7  2.468487 0.5840286                     -0.07311670
8  2.488347 0.5839415                     -0.07314149
9  2.449146 0.5845693                     -0.07372557
10 2.486272 0.5838878                     -0.07345527
11 2.502562 0.5833445                     -0.07466354
   SE_age_at_initiation_in_years
1                     0.03852831
2                     0.03852355
3                     0.03859809
4                     0.03853695
5                     0.03851990
6                     0.03857049
7                     0.03851471
8                     0.03857623
9                     0.03849963
10                    0.03854427
11                    0.03850011

Code

write.csv(results_df_a,file="results_model_a.csv",quote = F)

Code

# Now create a new data frame to store all the data in the desired format
results_df_b <- data.frame(
  snp_name = character(),
  effect = character(),
  alt = character(),
  Beta_exposure = numeric(),
  SE_exposure = numeric(),
  Beta_W0 = numeric(),
  SE_W0 = numeric(),
  Beta_exposure_W0 = numeric(),
  SE_exposure_W0 = numeric(),
  Beta_sex = numeric(),
  SE_sex = numeric(),
  Beta_age_at_initiation_in_years = numeric(),
  SE_age_at_initiation_in_years = numeric(),
  stringsAsFactors = FALSE
)

# Fill in the results data frame
for(i in c(1:10, 13)) {
  snp_info <- snp_list[[i]]
  results_df_b <- rbind(results_df_b, data.frame(
    snp_name = snp_info$snp_name,
    effect = snp_info$effect,
    alt = snp_info$alt_allele,
    Beta_exposure = snp_info$sum_model_b[2, "Estimate"],
    SE_exposure = snp_info$sum_model_b[2, "Std. Error"],
    Beta_W0 = snp_info$sum_model_b[3, "Estimate"],
    SE_W0= snp_info$sum_model_b[3, "Std. Error"],
  Beta_exposure_W0 = snp_info$sum_model_b[6, "Estimate"],
  SE_exposure_W0 = snp_info$sum_model_b[6, "Std. Error"],
Beta_sex = snp_info$sum_model_b[4, "Estimate"],
SE_sex = snp_info$sum_model_b[4, "Std. Error"],
Beta_age_at_initiation_in_years = snp_info$sum_model_b[5, "Estimate"],
SE_age_at_initiation_in_years = snp_info$sum_model_b[5, "Std. Error"]))}
  

write.csv(results_df_b,file="results_model_b.csv",quote = F)