mcmc_sta_clean

Author

Sarah Urbut

Published

August 30, 2024

Code
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
Code
library(reshape2)
library(mvtnorm)
library(rstan)
Loading required package: StanHeaders

rstan version 2.32.6 (Stan version 2.32.2)
For execution on a local, multicore CPU with excess RAM we recommend calling
options(mc.cores = parallel::detectCores()).
To avoid recompilation of unchanged Stan programs, we recommend calling
rstan_options(auto_write = TRUE)
For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
change `threads_per_chain` option:
rstan_options(threads_per_chain = 1)
Code
# Set seed for reproducibility
set.seed(123)

# 1. Simulation (from simnoulli)
# Assuming this function is defined in your simnoulli script
source("../sim_noulli.R")

Attaching package: 'MASS'
The following object is masked from 'package:dplyr':

    select


Attaching package: 'Matrix'
The following objects are masked from 'package:pracma':

    expm, lu, tril, triu

# A tibble: 20 × 2
     age `length(unique(eid))`
   <int>                 <int>
 1     1                    41
 2     2                    23
 3     3                    16
 4     4                     7
 5     5                     9
 6     6                    12
 7     7                     9
 8     8                    10
 9     9                     9
10    10                     8
11    11                     7
12    12                    16
13    13                    17
14    14                    11
15    15                    13
16    16                     9
17    17                    10
18    18                     6
19    19                     4
20    20                     8

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:MASS':

    select
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout

Attaching package: 'data.table'
The following objects are masked from 'package:reshape2':

    dcast, melt
The following objects are masked from 'package:dplyr':

    between, first, last

Attaching package: 'DT'
The following objects are masked from 'package:shiny':

    dataTableOutput, renderDataTable
Warning: The melt generic in data.table has been passed a array and will
attempt to redirect to the relevant reshape2 method; please note that reshape2
is superseded and is no longer actively developed, and this redirection is now
deprecated. To continue using melt methods from reshape2 while both libraries
are attached, e.g. melt.list, you can prepend the namespace, i.e.
reshape2::melt(topic_disease_time_array). In the next version, this warning
will become an error.

Code
# Extract dimensions
N <- dim(y)[1]
D <- dim(y)[2]
T <- dim(y)[3]
K <- 3  # Number of topics, adjust as needed
P <- ncol(g_i)


# Define the RBF kernel function
rbf_kernel <- function(t1, t2, length_scale, variance) {
  return(variance * exp(-0.5 * (t1 - t2) ^ 2 / length_scale ^ 2))
}




# 2. Initialization functions
initialize_model <- function(y,
                             g_i,
                             K,
                             length_scale_lambda,
                             length_scale_phi,
                             variance_phi,
                             variance_lambda,smooth_span=0.75)  {
  N <- dim(y)[1]
  D <- dim(y)[2]
  T <- dim(y)[3]
  P <- ncol(g_i)
  
  
  
  
  # Generate time points
  time_points <- 1:T
  
  
  jitter <- 1e-6  # Small jitter term to improve conditioning
  
  
  
  # Initialize Gamma
  Gamma_init <- matrix(rnorm(K * P, 0, 0.1), nrow = K, ncol = P)
  
  # Initialize Lambda
  Lambda_init <- array(0, dim = c(N, K, T))
  for (k in 1:K) {
    mean_lambda <- g_i %*% Gamma_init[k, ]
    ls_lambda = length_scale_lambda[k]
    vs_lambda = variance_lambda[k]
    
    # Compute the first row of the Toeplitz matrix
    first_row <- sapply(time_points, function(t)
      rbf_kernel(time_points[1], t, ls_lambda, vs_lambda))
    
    # Create the Toeplitz matrix
    Kern  <- toeplitz(first_row)
    image(Kern)
    # Add jitter to the diagonal to improve stability
    Kern <- Kern  + diag(jitter, nrow(Kern))
    
    for (i in 1:N) {
      Lambda_init[i, k, ] <- mvrnorm(1, mu = rep(mean_lambda[i], T), Sigma = Kern)
    }
  }
  
  # Initialize Phi
  Phi_init <- array(0, dim = c(K, D, T))
  for (k in 1:K) {
    ls_phi = length_scale_phi[k]
    vs_phi = variance_phi[k]
    
    # Compute the first row of the Toeplitz matrix
    first_row <- sapply(time_points, function(t)
      rbf_kernel(time_points[1], t, ls_phi, vs_phi))
    
    # Create the Toeplitz matrix
    Kern <- toeplitz(first_row)
    image(Kern)
    # Add jitter to the diagonal to improve stability
    Kern  <- Kern  + diag(jitter, nrow(Kern))
    
    for (d in 1:D) {
      Phi_init[k, d, ] <- mvrnorm(1, mu = rep(0, T), Sigma = Kern)
    }
  }
  
  
  
  # Calculate and smooth mu_d
  mu_d <- matrix(0, D, T)
  for (d in 1:D) {
    # Calculate raw prevalence
    raw_prevalence <- sapply(1:T, function(t) mean(y[, d, t]))
    
    # Apply logit transformation with small constant to avoid infinite values
    logit_prev <- qlogis((raw_prevalence * (N - 1) + 0.5) / N)
    
    # Apply LOESS smoothing
    time_points <- 1:T
    smooth_logit <- loess(logit_prev ~ time_points, span = smooth_span)
    
    # Predict smoothed values
    mu_d[d, ] <- predict(smooth_logit, time_points)
  }
  
  
  return(list(
    Lambda = Lambda_init,
    Gamma = Gamma_init,
    Phi = Phi_init,
    mu_d = mu_d
  ))
}

#### Here we initialize with a different phi and lambda for each 



# Assuming you've already defined and run initialize_model function
initial_values <- initialize_model(y, g_i, K, length_scale_lambda=length_scales_lambda, length_scale_phi=length_scales_phi,
                                   variance_phi = var_scales_phi, 
                                   variance_lambda = var_scales_lambda,smooth_span = 0.75)

Code
plot(initial_values$mu_d[1,],type="l")
matplot(t(initial_values$Phi[1,,]),type="l")

Code
matplot(t(initial_values$Lambda[1,,]),type="l")

Code
library(Matrix)

compute_symmetric_inverse_rbf_kernel <- function(T, length_scale, variance) {
  time_points <- 1:T
  D <- outer(time_points, time_points, "-")^2
  K <- variance * exp(-0.5 * D / length_scale^2)
  K <- K + diag(1e-6, T)  # Add small jitter for numerical stability
  K_inv <- solve(K)
  
  # Force symmetry
  K_inv_symmetric <- (K_inv + t(K_inv)) / 2
  
  return(K_inv_symmetric)
}

# Compute inverse kernels for lambda
inv_kernel_lambda <- lapply(1:K, function(k) {
  compute_symmetric_inverse_rbf_kernel(T, length_scales_lambda[k], var_scales_lambda[k])
})

# Compute inverse kernels for phi
inv_kernel_phi <- lapply(1:K, function(k) {
  compute_symmetric_inverse_rbf_kernel(T, length_scales_phi[k], var_scales_phi[k])
})


inv_kernel_lambda[[1]][1:5, 1:5]
            [,1]       [,2]       [,3]       [,4]        [,5]
[1,]   61109.815 -160655.69   79347.58   69701.93   -1042.156
[2,] -160655.691  482580.64 -367375.66 -103498.43   70960.139
[3,]   79347.577 -367375.66  581618.05 -277715.76 -101710.186
[4,]   69701.933 -103498.43 -277715.76  660941.65 -278240.044
[5,]   -1042.156   70960.14 -101710.19 -278240.04  658486.627
Code
stan_model = "
data {
  int<lower=0> N;
  int<lower=0> D;
  int<lower=0> T;
  int<lower=0> K;
  int<lower=0> P;
  matrix[N, P] g;
  int<lower=0,upper=1> y[N, D, T];
  matrix[T, T] precision_lambda[K];
  matrix[T, T] precision_phi[K];
  matrix[D, T] mu_d_init;  // Initial estimate of mu_d
  real<lower=0> mu_d_sd;   // Standard deviation for mu_d prior
}

parameters {
  matrix[K, P] Gamma;
  matrix[T, N] lambda[K];
  matrix[T, D] phi[K];
  matrix[D, T] mu_d;
}

model {
  // Priors
  for (k in 1:K)
    for (p in 1:P)
      Gamma[k, p] ~ normal(0, 1);  // Independent normal priors for Gamma

  // Informative prior for mu_d
  for (d in 1:D)
    for (t in 1:T)
      mu_d[d, t] ~ normal(mu_d_init[d, t], mu_d_sd);

  // GP priors for lambda and phi using pre-computed precision matrices
  for (k in 1:K) {
    for (n in 1:N) {
      vector[T] mean_lambda = rep_vector(dot_product(g[n], Gamma[k]), T);
      lambda[k][:, n] ~ multi_normal_prec(mean_lambda, precision_lambda[k]);
    }
    
    for (d in 1:D)
      phi[k][:, d] ~ multi_normal_prec(rep_vector(0, T), precision_phi[k]);
  }

  // Hazard Likelihood
  for (n in 1:N) {
    for (d in 1:D) {
      int is_diagnosed = 0;
      real log_surv = 0;
      
      for (t in 1:T) {
        if (!is_diagnosed) {
          real logit_hazard = mu_d[d, t];
          for (k in 1:K)
            logit_hazard += lambda[k][t, n] * phi[k][t, d];
          
          real hazard = inv_logit(logit_hazard);
          
          if (y[n, d, t] == 1) {
            target += log(hazard) + log_surv;
            is_diagnosed = 1;
          } else {
            log_surv += log1m(hazard);
          }
        }
      }
      
      if (!is_diagnosed) {
        target += log_surv;
      }
    }
  }
}

generated quantities {
  matrix[N, D] prob_disease;
  for (n in 1:N)
    for (d in 1:D) {
      real logit_p = mu_d[d, T];  // Using the last time point
      for (k in 1:K)
        logit_p += lambda[k][T, n] * phi[k][T, d];
      prob_disease[n, d] = inv_logit(logit_p);
    }
}
"
Code
# Function to format initial values for Stan
format_init_values <- function(init_values) {
  list(
    Gamma = init_values$Gamma,
    lambda = lapply(1:K, function(k) t(init_values$Lambda[,k,])),
    phi = lapply(1:K, function(k) t(init_values$Phi[k,,])),
    mu_d = init_values$mu_d
  )
}
# Create a list of initial values for each chain
n_chains <- 4
init_values_list <- replicate(n_chains, format_init_values(initial_values), simplify = FALSE)




# Prepare Stan data
stan_data <- list(
  N = dim(y)[1],
  D = dim(y)[2],
  T = dim(y)[3],
  K = K,
  P = ncol(g_i),
  g = g_i,
  y = y,
  precision_lambda = inv_kernel_lambda,
  precision_phi = inv_kernel_phi,
  mu_d_init = initial_values$mu_d,  # Your initial estimate of mu_d
  mu_d_sd = 0.1  # Adjust this value based on your confidence in mu_d_init
)

# Compile and run the model
compiled_model <- stan_model(model_code = stan_model)
Trying to compile a simple C file
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 15.0.0 (clang-1500.3.9.4)’
using SDK: ‘MacOSX14.4.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
#include <cmath>
         ^~~~~~~
1 error generated.
make: *** [foo.o] Error 1
Code
fit <- sampling(compiled_model, data = stan_data, 
                iter = 2000, chains = 4,
                cores = 4,  # Adjust based on your available cores
                init = init_values_list)
Warning: There were 4000 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
Warning: Examine the pairs() plot to diagnose sampling problems
Warning: The largest R-hat is 3.95, indicating chains have not mixed.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#r-hat
Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#bulk-ess
Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#tail-ess
Code
saveRDS(fit,"~/Dropbox (Personal)/fitsample.rds")