GLP1 Analysis

Author

Sarah Urbut

Published

January 7, 2024

Enrollment Criteria (per AP documentation)

  • Minimum age at initiation 18 years ol

  • I used the UKB codes available in this paper: https://pubmed.ncbi.nlm.nih.gov/34211116/

    They are summarized here:

    Procedures OPCS-4 procedure codes
    Gastric bypass G281, G301, G302, G304, G312, G321, G331
    Gastric banding G303, G481, G485
    Sleeve gastrectomy G282-G285
    Duodenal switch G716
Code
hesin=fread("~/Library/CloudStorage/Dropbox-Personal/hesin_May2023/hesin_oper.txt.gz")
oper_codes=c("G281", "G301", "G302", "G304", "G312", "G321", "G331","G303","G481", "G485",
             "G282","G285","G283","G284","G716")

ba=hesin[hesin$oper4%in%oper_codes,c("eid","oper4","opdate")]
ba$opdate=as.Date(ba$opdate,format = "%d/%m/%Y")
b=ba%>%group_by(eid)%>%summarise(min(as.Date(opdate)),proc=oper4[1])
saveRDS(b,"barisurgerydate.rds")
Code
library(lubridate)
library(dplyr)
library(tidyr)
library(data.table)
library(table1)

## From the UKB showcase field
prs_subset=readRDS("~/Library/CloudStorage/Dropbox-Personal/pheno_dir/prs_subset.rds")
df_baseline=readRDS("~/Library/CloudStorage/Dropbox-Personal/pheno_dir/output/dfukb_baseline.rds")
surg=readRDS("barisurgerydate.rds")
procedure=c(rep("Gastric bypass",7),rep(
               "Gastric banding",3),rep(
                "Sleeve gastrectomy",4),
               "Duodenal switch")


code=c("G281", "G301","G302", "G304", "G312", "G321", "G331",
"G303","G481","G485","G282","G283","G284","G285","G716")
key=data.frame(cbind(procedure,code))

s=merge(surg,key,by.x="proc",by.y="code",all.x = T)[,c(2:4)]




colnames(s)[1]=c("Identifier")
colnames(s)[2]=c("event_dt")
s$event_dt <- as.Date(s$event_dt, format="%d/%m/%Y")


biomarkers=readRDS("~/Library/CloudStorage/Dropbox-Personal/ukbb-ehr-data/data/biomarkers.rds")
height=biomarkers[biomarkers$variable%in%"height",]%>%group_by(eid)%>%summarise(m=mean(value))

weight=biomarkers[biomarkers$variable%in%"weight",]
weight$date <- as.Date(weight$date, format="%Y/%m/%d")

patient_med_data <- s 
colnames(patient_med_data)=c("eid","first_script_date","med_name")
patient_med_data$first_script_date <- as.Date(patient_med_data$first_script_date, format = "%Y-%m-%d")

dim(patient_med_data)
[1] 1202    3

There are 1202 patients initially;

We then grab treated pateints. Median of body weight measurements at least 180 days after initiation and maximum 365 days after initiation. (different than our definition which was one year after end)

This variable is called  mW1

Code
patient_weight_data=weight[weight$eid%in%patient_med_data$eid,c("eid","date","value")]
bl=df_baseline[,c("identifier","f.53.0.0","f.21001.0.0")]
bl$f.53.0.0=as.Date(bl$f.53.0.0)

patient_weight_data=rbind(patient_weight_data,bl[,c("identifier","f.53.0.0","f.21001.0.0")],use.names=FALSE)

patient_weight_data$date <- as.Date(patient_weight_data$date, format = "%Y-%m-%d")
merged_data <- merge(patient_med_data,patient_weight_data, by = "eid")

### here i use median weight beofre and the minimum date
w0 <- merged_data %>%
  filter(date < first_script_date & date > (first_script_date - years(1)))%>%
  group_by(eid) %>%
  summarise("beforedate"=min(date),"medweight"=median(value),"number"=length(value),"med_name"=med_name[1])

summary(w0$number)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  1.000   1.000   2.000   2.579   3.000  16.000 
Code
## Median of body weight measurements at least 180 days after surgery and maximum 1460 days after surgery. This variable is called  mW1

mW1 <- merged_data %>%
  filter(date > (first_script_date + days(180)) & date < (first_script_date + days(1460))) %>%
  
  group_by(eid) %>%
  summarise("afterdate"=min(date),"medweight"=median(value),"number"=length(value))

summary(mW1$number)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  1.000   1.000   3.000   4.361   6.000  26.000 

Cohort descriptors

  • N individuals included in the analysis

  • % females 

  • Age in years (mean±SD)

  • T2D prevalence at initiation. The definition of T2D is left to individual cohorts.

  • Percentage breakdown of Surgical types within the class.

  • Body weight (kg) and BMI (kg/m2) (mean±SD) at W0 (see 4.1 / 4.2) 

  • Median body weight (kg) and BMI (kg/m2) between 180 and 365 days (or 1460 days for bariatric surgery)

Average percentage change in body weight between the two time points:

\(\sum_i (mW1 - W0)/W0 * 100 ) / n\)

Code
dat=merge(w0[,c("eid","beforedate","medweight","med_name")],mW1[,c("eid","afterdate","medweight")],by="eid")
# Trim spaces from the medication names
dat$med_name <- stringr::str_trim(dat$med_name)

colnames(dat)=c("eid","t1","w0","med_name","t2","mW1")
dat$med_name=as.factor(dat$med_name)
summary(dat$med_name)
   Gastric banding     Gastric bypass Sleeve gastrectomy 
                43                112                 48 
Code
dat=merge(dat,height,by="eid")
dat$bmi_before=dat$w0/(dat$m)^2
dat$bmi_after=dat$mW1/(dat$m)^2
load("~/Library/CloudStorage/Dropbox-Personal/pheno_dir/output/merged_pheno_censor_final_withdrugs_smoke.rds")
phenos=dfh[,c("identifier","Dm_0_Any","smoke","f.31.0.0","Birthdate")]

phenos$eid=as.numeric(as.character(phenos$identifier))
dat=merge(dat,phenos,by="eid")
dat$enrollage=as.numeric(difftime(dat$t1,dat$Birthdate,units = "days"))/365.4
dat$delta_wt=dat$mW1-dat$w0
dat$delta_bmi=dat$bmi_after-dat$bmi_before
dat$med_group=as.factor(dat$med_name)
## you may want to add these too
# dat$HyperLip_0_Any=factor(dat$HyperLip_0_Any,levels = c(1,2),labels = c("No","Yes"))
# dat$Ht_0_Any=factor(dat$Ht_0_Any,levels = c(1,2),labels = c("No","Yes"))
dat$Dm_0_Any=factor(dat$Dm_0_Any,levels = c(1,2),labels = c("No","Yes"))
dat$sex=factor(dat$f.31.0.0,levels = c(0,1),labels = c("Female","Male"))
dat$enrollage=as.numeric(dat$enrollage)
#dat$smoke=factor(dat$smoke,levels=c(0,1),labels = c("No","Yes"))
#dat$antihtn=factor(dat$antihtn,levels=c(0,1),labels = c("No","Yes"))
#dat$statin=factor(dat$statin,levels=c(0,1),labels = c("No","Yes"))
#dat$treat=factor(dat$treat,levels=c(0,1),labels=c("Untreated","Treated"))
label(dat$delta_wt)= "Change in wt (kg)"
label(dat$delta_bmi)= "Change in wt (bmi)"
label(dat$sex)<- "Sex"
label(dat$w0)<- "Pre-treat Wt (kg)"
label(dat$mW1)<- "Post-treat Wt (kg)"
label(dat$enrollage) <- "Age"
label(dat$bmi_before) <- "Pre-Treat BMI"
label(dat$Dm_0_Any) = "Diabetes Type 2"
label(dat$med_group)="Surgical Class"
# One level of stratification
table1(~sex + enrollage+bmi_before+Dm_0_Any+w0+mW1+delta_wt+delta_bmi+med_group,data = dat)
Overall
(N=196)
Sex
Female 136 (69.4%)
Male 60 (30.6%)
Age
Mean (SD) 55.5 (8.21)
Median [Min, Max] 55.8 [36.7, 75.0]
Pre-Treat BMI
Mean (SD) 44.1 (9.78)
Median [Min, Max] 45.2 [17.4, 73.6]
Diabetes Type 2
No 98 (50.0%)
Yes 98 (50.0%)
Pre-treat Wt (kg)
Mean (SD) 121 (29.7)
Median [Min, Max] 120 [47.0, 198]
Post-treat Wt (kg)
Mean (SD) 95.1 (23.0)
Median [Min, Max] 93.7 [46.5, 173]
Missing 1 (0.5%)
Change in wt (kg)
Mean (SD) -26.2 (20.2)
Median [Min, Max] -24.8 [-80.6, 46.6]
Missing 1 (0.5%)
Change in wt (bmi)
Mean (SD) -9.52 (7.25)
Median [Min, Max] -9.59 [-31.2, 15.8]
Missing 1 (0.5%)
Surgical Class
Gastric banding 41 (20.9%)
Gastric bypass 109 (55.6%)
Sleeve gastrectomy 46 (23.5%)

PRS Analysis

Now let’s merge with PRS for our analysis. We first do the analysis based on BMI prs

Model_A (percent body weight change as outcome):

\(((mW1 - W0)/W0) * 100 \sim  ~ exposure +W0 + sex + age at initiation (years)  +medication_type + PC1:20 + covariates\)

Code
prs_df=readRDS("~/Library/CloudStorage/Dropbox-Personal/pheno_dir/prs_subset.rds")
dat_with_prs=merge(dat,prs_df[,c("Identifier","BMI","T2D")],by.x = "eid",by.y="Identifier")
pcs=data.frame(readRDS("~/Library/CloudStorage/Dropbox-Personal/pheno_dir/baseline_withPCS.rds"))
pc_df=data.frame(pcs[,grep(names(pcs),pattern="22009")[1:20]])
names(pc_df)=rep(paste0("PC",seq(1:20)))
pc_df$eid=pcs$identifier
dat_with_all=merge(dat_with_prs,pc_df,by="eid")
dat_with_all$sex <- relevel(dat_with_all$sex, ref = "Female")

MA_BMI=lm((delta_wt/w0)*100 ~ BMI+w0+sex+enrollage
            +PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_with_all)

summary(MA_BMI)$coefficients[1:5,c(1:2)]
              Estimate  Std. Error
(Intercept)  2.7245835 11.26729849
BMI          0.5272535  1.16115823
w0          -0.2593418  0.04415814
sexMale      3.9592779  2.56116552
enrollage    0.1340547  0.15041446

Model_B (interaction between weight at baseline and exposure):

\(m_{W1} \sim exposure:W0_{kg} + sex_{male}+ age_{initiation}+ PC1:20 + covariates\)

Code
MB_BMI=lm(mW1~BMI*w0+sex+enrollage
          +PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_with_all)

summary(MB_BMI)$coefficients[c(1:5,26),c(1:2)]
               Estimate  Std. Error
(Intercept) 20.86977529 12.21282767
BMI         -9.81945189  5.26572745
w0           0.51041793  0.05368630
sexMale      1.03299186  2.77546301
enrollage    0.19383161  0.15698839
BMI:w0       0.08721246  0.04197449

And now using the PRS for T2D:

Model_A (percent body weight change as outcome):

\(((mW1 - W0)/W0) * 100 \sim exposure +W0 + sex + age at initiation (years) +medication_type + PC1:20 + covariates\)

Code
MA_T2D=lm((delta_wt/w0)*100 ~ T2D+w0+sex+enrollage+
            PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_with_all)

summary(MA_T2D)$coefficients[1:5,c(1:2)]
               Estimate  Std. Error
(Intercept)  3.01080720 11.37715969
T2D         -0.09578102  1.22906083
w0          -0.25561271  0.04347306
sexMale      3.95206575  2.56282164
enrollage    0.12864933  0.15037980

Model_B (interaction between weight at baseline and exposure):

\(m_{W1} \sim exposure:W0_{kg} + sex_{male}+ age_{initiation}+ medication_{type}+ PC1:20 + covariates\)

Code
MB_T2D=lm(mW1 ~ T2D*w0+sex+enrollage+PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_with_all)

summary(MB_T2D)$coefficients[c(1:5,26),c(1:2)]
               Estimate  Std. Error
(Intercept) 13.63330690 12.12682906
T2D          2.50310388  5.67821018
w0           0.58095548  0.04848316
sexMale      2.70779899  2.72401150
enrollage    0.18415004  0.15909663
T2D:w0      -0.02153186  0.04643192

With Single SNPs

Code
snplist=readRDS("./chrlistbar.rds")

snp_frame=do.call(rbind,snplist)
# Convert dat_with_all$eid to a vector of column indices
col_indices <- which(names(snp_frame) %in% dat_with_all$eid)
# Subset the data.table by these columns
selected_snp_frame <- snp_frame[, ..col_indices]

snp_list=vector("list", nrow(snp_frame))


for(i in c(1:3)){
  
  new_snp=data.frame(t(selected_snp_frame[i,]))
  colnames(new_snp)="newsnp"
  new_snp$eid=rownames(new_snp)
  ## now add that snps data as a column
  dat_updated=na.omit(merge(dat_with_all,new_snp,by="eid"))
  #print(colnames(dat_updated))
  
  
  ## recompute lm
  model_a=lm((delta_wt/w0)*100 ~ newsnp+w0+sex+enrollage+
            PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_updated)
  
  sum1=summary(model_a)$coefficients[1:5,c(1:2)]
  
model_b=lm(mW1 ~ newsnp*w0+sex+enrollage+
            +PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_updated)

sum2=summary(model_b)$coefficients[c(1:5,26),c(1:2)]
snp_name=snp_frame$rsid[i]
effect_allele=snp_frame$alleleA[i]

snp_list[[i]]=list(snp_name=snp_name,effect=snp_frame$alleleA[i],alt_allele=snp_frame$alleleB[i],sum_model_a=sum1,sum_model_b=sum2)
}



# Now create a new data frame to store all the data in the desired format
results_df_a <- data.frame(
  snp_name = character(),
  effect = character(),
  alt = character(),
  Beta_exposure = numeric(),
  SE_exposure = numeric(),
  Beta_W0 = numeric(),
  SE_W0 = numeric(),
  Beta_sex = numeric(),
  SE_sex = numeric(),
  Beta_age_at_initiation_in_years = numeric(),
  SE_age_at_initiation_in_years = numeric(),
  stringsAsFactors = FALSE
)

# Fill in the results data frame
for(i in c(1:3)) {
  print(i)
  snp_info <- snp_list[[i]]
  results_df_a <- rbind(results_df_a, data.frame(
    snp_name = snp_info$snp_name,
    effect = snp_info$effect,
    alt = snp_info$alt_allele,
    Beta_exposure = snp_info$sum_model_a[2, "Estimate"],
    SE_exposure = snp_info$sum_model_a[2, "Std. Error"],
    Beta_W0 = snp_info$sum_model_a[3, "Estimate"],
    SE_W0= snp_info$sum_model_a[3, "Std. Error"],
Beta_sex = snp_info$sum_model_a[4, "Estimate"],
SE_sex = snp_info$sum_model_a[4, "Std. Error"],
Beta_age_at_initiation_in_years = snp_info$sum_model_a[5, "Estimate"],
SE_age_at_initiation_in_years = snp_info$sum_model_a[5, "Std. Error"]))}
[1] 1
[1] 2
[1] 3
Code
results_df_a
    snp_name effect alt Beta_exposure SE_exposure    Beta_W0      SE_W0
1   rs728996      C   T     0.5822562    1.537311 -0.2562582 0.04332956
2 rs17702901      G   A    -2.2870501    3.865706 -0.2541249 0.04327187
3   rs429358      T   C     2.2852172    2.391750 -0.2572998 0.04320748
  Beta_sex   SE_sex Beta_age_at_initiation_in_years
1 3.970204 2.561956                       0.1223081
2 4.014953 2.562108                       0.1327715
3 3.819514 2.559737                       0.1345161
  SE_age_at_initiation_in_years
1                     0.1512138
2                     0.1501019
3                     0.1498398
Code
write.csv(results_df_a,file="results_model_a_bari.csv",quote = F)
[1] 1
[1] 2
[1] 3