processing

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)
library(data.table)

Attaching package: 'data.table'
The following objects are masked from 'package:dplyr':

    between, first, last
a=readRDS("~/Dropbox (Personal)/UKB_topic_app/disease_array_incidence.rds")
prs_subset=na.omit(readRDS("~/Dropbox (Personal)/pheno_dir/prs_subset.rds"))
mu_d_list=readRDS("~/Dropbox (Personal)/massivemudlist.rds")
names=readRDS("~/Desktop/namesofphenos.rds")
names(mu_d_list)=names$phenotype
colnames(a)=names$phenotype
top=names(sapply(mu_d_list,sum)[order(sapply(mu_d_list,sum),decreasing = T)][1:20])


## make sure everything matches
mu_d_l=mu_d_list[top]
f=sample(intersect(rownames(a[,1,]),rownames(prs_subset)),1000)
y2=a[f,top,]
g_i=prs_subset[f,-c(10,16,18,36,37)]


# Ensure dimensions are correct
n_individuals <- length(f)
n_diseases <- length(mu_d_l)
T <- dim(y2)[3]
n_topics <- 3   # You can adjust this as needed
n_genetic_factors <- ncol(g_i)

# Convert mu_d_l to the required format
mu_d_logit <- mu_d_l

# Ensure y2 is in the correct format (should be binary)
y <- as.array(y2)

# Ensure g_i is a matrix
g_i <- as.matrix(g_i)

print(dim(y))
[1] 1000   20   51
print(dim(g_i))
[1] 1000   32
print(length(mu_d_logit))
[1] 20
mcmc_results=readRDS("~/Dropbox (Personal)/mcmc_results_827_fast.rds")
K=3
post_phi=apply(mcmc_results$Phi,c(2,3,4),mean)
dimnames(post_phi)=list(paste0("Factor",c(1:K)),dimnames(y)[[2]],dimnames(y)[[3]])
post_lambda=apply(mcmc_results$Lambda,c(2,3,4),mean)
post_gamma=apply(mcmc_results$Gamma,c(2,3),mean)

dimnames(post_phi)=list(paste0("Factor",c(1:K)),dimnames(y)[[2]],dimnames(y)[[3]])
dimnames(post_lambda)=list(f,paste0("Factor",c(1:K)),dimnames(y)[[3]])
dimnames(post_gamma)=list(paste0("Factor",c(1:K)),colnames(g_i))
library(ggplot2)
library(dplyr)
library(tidyr)

# Average Phi over time
avg_phi <- apply(post_phi, c(1,2), mean)

# Create a list to store plots
disease_plots <- list()

for (k in 1:n_topics) {
  # Order diseases by importance for this factor
  ordered_diseases <- data.frame(
    Disease = colnames(avg_phi),
    Value = avg_phi[k,]
  ) %>% 
    arrange(desc(Value))
  
  # Create plot
  p <- ggplot(ordered_diseases, aes(x = reorder(Disease, Value), y = Value)) +
    geom_bar(stat = "identity", fill = "steelblue") +
    coord_flip() +
    theme_minimal() +
    labs(title = paste("Disease Loadings for Topic", k),
         x = "Diseases", 
         y = "Loading") +
    theme(axis.text.y = element_text(size = 8))
  
  disease_plots[[k]] <- p
}

# Display plots
for (p in disease_plots) {
  print(p)
}

gamma_plots <- list()

for (k in 1:n_topics) {
  # Order genetic factors by importance for this topic
  ordered_gamma <- data.frame(
    Genetic_Factor = colnames(g_i),
    Value = post_gamma[k,]
  ) %>% 
    arrange(desc(abs(Value)))  # Use absolute value for ordering
  
  # Create plot
  p <- ggplot(ordered_gamma, aes(x = reorder(Genetic_Factor, abs(Value)), y = Value)) +
    geom_bar(stat = "identity", fill = "darkgreen") +
    coord_flip() +
    theme_minimal() +
    labs(title = paste("Genetic Factor Importance for Topic", k),
         x = "Genetic Factors", 
         y = "Importance") +
    theme(axis.text.y = element_text(size = 8))
  
  gamma_plots[[k]] <- p
}

# Display plots
for (p in gamma_plots) {
  print(p)
}

try for more diseases

a=readRDS("~/Dropbox (Personal)/UKB_topic_app/disease_array_incidence.rds")
prs_subset=na.omit(readRDS("~/Dropbox (Personal)/pheno_dir/prs_subset.rds"))
mu_d_list=readRDS("~/Dropbox (Personal)/massivemudlist.rds")


mu_d_l=mu_d_list[1:100]
f=sample(intersect(rownames(a[,1,]),rownames(prs_subset)),1000)
y2=a[f,c(1:100),]
g_i=prs_subset[f,-c(10,16,18,36,37)]


# Ensure dimensions are correct
n_individuals <- length(f)
n_diseases <- length(mu_d_l)
T <- dim(y2)[3]
n_topics <- 3  # You can adjust this as needed
n_genetic_factors <- ncol(g_i)

# Convert mu_d_l to the required format
mu_d_logit <- mu_d_l

# Ensure y2 is in the correct format (should be binary)
y <- as.array(y2)

# Ensure g_i is a matrix
g_i <- as.matrix(g_i)

mcmc_results=readRDS("~/Dropbox (Personal)/mcmc_results_827.rds")
df=data.frame(phecode=colnames(a)[1:100])
dlist <- merge(df, ATM::disease_info_phecode_icd10, by = "phecode", sort = FALSE)

post_phi=apply(mcmc_results$Phi,c(2,3,4),mean)
dimnames(post_phi)=list(paste0("Factor",c(1:K)),dlist$phenotype,dimnames(a)[[3]])
post_lambda=apply(mcmc_results$Lambda,c(2,3,4),mean)
post_gamma=apply(mcmc_results$Gamma,c(2,3),mean)

dimnames(post_phi)=list(paste0("Factor",c(1:K)),dlist$phenotype,dimnames(a)[[3]])
dimnames(post_lambda)=list(f,paste0("Factor",c(1:K)),dimnames(a)[[3]])
dimnames(post_gamma)=list(paste0("Factor",c(1:K)),colnames(g_i))



library(ggplot2)
library(dplyr)
library(tidyr)

# Average Phi over time
avg_phi <- apply(post_phi, c(1,2), mean)

# Create a list to store plots
disease_plots <- list()

for (k in 1:n_topics) {
  # Order diseases by importance for this factor
  ordered_diseases <- data.frame(
    Disease = colnames(avg_phi),
    Value = avg_phi[k,]
  ) %>% 
    arrange(desc(Value))
  
  # Create plot
  p <- ggplot(ordered_diseases, aes(x = reorder(Disease, Value), y = Value)) +
    geom_bar(stat = "identity", fill = "steelblue") +
    coord_flip() +
    theme_minimal() +
    labs(title = paste("Disease Loadings for Topic", k),
         x = "Diseases", 
         y = "Loading") +
    theme(axis.text.y = element_text(size = 8))
  
  disease_plots[[k]] <- p
}

# Display plots
for (p in disease_plots) {
  print(p)
}