library(lubridate)library(dplyr)library(tidyr)library(data.table)library(table1)## From the UKB showcase fieldprs_subset=readRDS("~/Library/CloudStorage/Dropbox-Personal/pheno_dir/prs_subset.rds")df_baseline=readRDS("~/Library/CloudStorage/Dropbox-Personal/pheno_dir/output/dfukb_baseline.rds")surg=readRDS("barisurgerydate.rds")procedure=c(rep("Gastric bypass",7),rep("Gastric banding",3),rep("Sleeve gastrectomy",4),"Duodenal switch")code=c("G281", "G301","G302", "G304", "G312", "G321", "G331","G303","G481","G485","G282","G283","G284","G285","G716")key=data.frame(cbind(procedure,code))s=merge(surg,key,by.x="proc",by.y="code",all.x = T)[,c(2:4)]colnames(s)[1]=c("Identifier")colnames(s)[2]=c("event_dt")s$event_dt <-as.Date(s$event_dt, format="%d/%m/%Y")biomarkers=readRDS("~/Library/CloudStorage/Dropbox-Personal/ukbb-ehr-data/data/biomarkers.rds")height=biomarkers[biomarkers$variable%in%"height",]%>%group_by(eid)%>%summarise(m=mean(value))weight=biomarkers[biomarkers$variable%in%"weight",]weight$date <-as.Date(weight$date, format="%Y/%m/%d")patient_med_data <- s colnames(patient_med_data)=c("eid","first_script_date","med_name")patient_med_data$first_script_date <-as.Date(patient_med_data$first_script_date, format ="%Y-%m-%d")dim(patient_med_data)
[1] 1202 3
There are 1202 patients initially;
We then grab treated pateints. Median of body weight measurements at least 180 days after initiation and maximum 365 days after initiation. (different than our definition which was one year after end)
This variable is called mW1
Code
patient_weight_data=weight[weight$eid%in%patient_med_data$eid,c("eid","date","value")]bl=df_baseline[,c("identifier","f.53.0.0","f.21001.0.0")]bl$f.53.0.0=as.Date(bl$f.53.0.0)patient_weight_data=rbind(patient_weight_data,bl[,c("identifier","f.53.0.0","f.21001.0.0")],use.names=FALSE)patient_weight_data$date <-as.Date(patient_weight_data$date, format ="%Y-%m-%d")merged_data <-merge(patient_med_data,patient_weight_data, by ="eid")### here i use median weight beofre and the minimum datew0 <- merged_data %>%filter(date < first_script_date & date > (first_script_date -years(1)))%>%group_by(eid) %>%summarise("beforedate"=min(date),"medweight"=median(value),"number"=length(value),"med_name"=med_name[1])summary(w0$number)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000 1.000 2.000 2.579 3.000 16.000
Code
## Median of body weight measurements at least 180 days after surgery and maximum 1460 days after surgery. This variable is called mW1mW1 <- merged_data %>%filter(date > (first_script_date +days(180)) & date < (first_script_date +days(1460))) %>%group_by(eid) %>%summarise("afterdate"=min(date),"medweight"=median(value),"number"=length(value))summary(mW1$number)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000 1.000 3.000 4.361 6.000 26.000
Cohort descriptors
N individuals included in the analysis
% females
Age in years (mean±SD)
T2D prevalence at initiation. The definition of T2D is left to individual cohorts.
Percentage breakdown of Surgical types within the class.
Body weight (kg) and BMI (kg/m2) (mean±SD) at W0 (see 4.1 / 4.2)
Median body weight (kg) and BMI (kg/m2) between 180 and 365 days (or 1460 days for bariatric surgery)
Average percentage change in body weight between the two time points:
\(\sum_i (mW1 - W0)/W0 * 100 ) / n\)
Code
dat=merge(w0[,c("eid","beforedate","medweight","med_name")],mW1[,c("eid","afterdate","medweight")],by="eid")# Trim spaces from the medication namesdat$med_name <- stringr::str_trim(dat$med_name)colnames(dat)=c("eid","t1","w0","med_name","t2","mW1")dat$med_name=as.factor(dat$med_name)summary(dat$med_name)
snplist=readRDS("./chrlistbar.rds")snp_frame=do.call(rbind,snplist)# Convert dat_with_all$eid to a vector of column indicescol_indices <-which(names(snp_frame) %in% dat_with_all$eid)# Subset the data.table by these columnsselected_snp_frame <- snp_frame[, ..col_indices]snp_list=vector("list", nrow(snp_frame))for(i inc(1:3)){ new_snp=data.frame(t(selected_snp_frame[i,]))colnames(new_snp)="newsnp" new_snp$eid=rownames(new_snp)## now add that snps data as a column dat_updated=na.omit(merge(dat_with_all,new_snp,by="eid"))#print(colnames(dat_updated))## recompute lm model_a=lm((delta_wt/w0)*100~ newsnp+w0+sex+enrollage+ PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_updated) sum1=summary(model_a)$coefficients[1:5,c(1:2)]model_b=lm(mW1 ~ newsnp*w0+sex+enrollage++PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+PC11+PC12+PC13+PC14+PC15+PC16+PC17+PC18+PC19+PC20,data = dat_updated)sum2=summary(model_b)$coefficients[c(1:5,26),c(1:2)]snp_name=snp_frame$rsid[i]effect_allele=snp_frame$alleleA[i]snp_list[[i]]=list(snp_name=snp_name,effect=snp_frame$alleleA[i],alt_allele=snp_frame$alleleB[i],sum_model_a=sum1,sum_model_b=sum2)}# Now create a new data frame to store all the data in the desired formatresults_df_a <-data.frame(snp_name =character(),effect =character(),alt =character(),Beta_exposure =numeric(),SE_exposure =numeric(),Beta_W0 =numeric(),SE_W0 =numeric(),Beta_sex =numeric(),SE_sex =numeric(),Beta_age_at_initiation_in_years =numeric(),SE_age_at_initiation_in_years =numeric(),stringsAsFactors =FALSE)# Fill in the results data framefor(i inc(1:3)) {print(i) snp_info <- snp_list[[i]] results_df_a <-rbind(results_df_a, data.frame(snp_name = snp_info$snp_name,effect = snp_info$effect,alt = snp_info$alt_allele,Beta_exposure = snp_info$sum_model_a[2, "Estimate"],SE_exposure = snp_info$sum_model_a[2, "Std. Error"],Beta_W0 = snp_info$sum_model_a[3, "Estimate"],SE_W0= snp_info$sum_model_a[3, "Std. Error"],Beta_sex = snp_info$sum_model_a[4, "Estimate"],SE_sex = snp_info$sum_model_a[4, "Std. Error"],Beta_age_at_initiation_in_years = snp_info$sum_model_a[5, "Estimate"],SE_age_at_initiation_in_years = snp_info$sum_model_a[5, "Std. Error"]))}
[1] 1
[1] 2
[1] 3
Code
results_df_a
snp_name effect alt Beta_exposure SE_exposure Beta_W0 SE_W0
1 rs728996 C T 0.5822562 1.537311 -0.2562582 0.04332956
2 rs17702901 G A -2.2870501 3.865706 -0.2541249 0.04327187
3 rs429358 T C 2.2852172 2.391750 -0.2572998 0.04320748
Beta_sex SE_sex Beta_age_at_initiation_in_years
1 3.970204 2.561956 0.1223081
2 4.014953 2.562108 0.1327715
3 3.819514 2.559737 0.1345161
SE_age_at_initiation_in_years
1 0.1512138
2 0.1501019
3 0.1498398