########################################################### # Joint models for longitudinal and time-to-event data #### # Jana Vorlickova, 2019/2020 #### # Priloha k diplomove praci #### ########################################################### ############################### # PART A - data simulation #### ############################### library(MASS) library(lme4) rm(list=ls()) #set.seed(123) # for 400 #set.seed(345) # for 200 #set.seed(567) # for 100 set.seed(789) # for 600 K <- 10 # number of planned repeated measurements per subject, per outcome t.max <- 84 # maximum follow-up time # number of observations n1 <- 120 #20,40,80,120 n2 <- 300 #50,100,200,300 n3 <- 180 #30,60,120, 180 v <- rep(c(1,2,3), c(n1, n2,n3)) # parameters for the longitudinal model betas1 <- c("(Intercept)" = 120, "lek" = 0.02, "time"=-0.2) betas2 <- c("(Intercept)" = 120, "lek" = 5, "time"=0.5) betas3 <- c("(Intercept)" = 120, "lek" = -10, "time"=-0.5) # measurement error standard deviation sd <- 6 sd.int <- 0.2 sd.slope <- 0.25 # parameters for the survival model # coefficients for Cox model alpha1 <- c("lek" = 0.4) alpha2 <- c("lek" = -0.18) alpha3 <- c("lek" = -0.59) weibA <- 1.1 DFlong <- list() DFcox <- list() M <- 100 for(i in 1:M){ # group 1 ---#### # -----------#### times <- c(replicate(n1, c(0, sort(runif(K-1, 0, t.max))))) # at which time points longitudinal measurements are supposed to be taken lek <- sample(c(0,1), n1, replace = TRUE) DF <- data.frame(day = times, lek = factor(rep(lek, each = K))) X <- model.matrix(~ lek+day, data = DF) Z <- model.matrix(~ 1+day, data = DF) # design matrix for the survival model W <- cbind("(Intercept)" = 1, "lek" = lek) #simulate random effects b0 <- rnorm(n1, 0, sd.int) b1 <- rnorm(n1, 0, sd.slope) b <- cbind(b0, b1) # simulate longitudinal responses id <- rep(1:n1, each = K) eta.y <- as.vector(X %*% betas1 + rowSums(Z * b[id,])) # linear predictor y <- eta.y + rnorm(n1 * K, 0,sd) # Longitudinal data set DFlon1 <- cbind(id, y, DF) # Cox model lek <- unique(DFlon1[, c("id", "lek")]) Xalpha <- alpha1*(as.numeric(lek[,2])-1) U <- runif(n1, 0, 1) eventT <- (-log(U)*exp(5 -Xalpha))^(1/weibA) # censoring Ctimes <-pmin(runif(n1, 60, 90),84) Time <- pmin(eventT, Ctimes) event <- as.numeric(eventT <= Ctimes) # event indicator DFcox1 <- cbind(Time, event, lek, Ctimes) # Group 2 --#### # ----------#### times <- c(replicate(n2, c(0, sort(runif(K-1, 0, t.max))))) # at which time points longitudinal measurements are supposed to be taken lek <- sample(c(0,1), n2, replace = TRUE) DF <- data.frame(day = times, lek = factor(rep(lek, each = K))) X <- model.matrix(~ lek+day, data = DF) Z <- model.matrix(~ 1+day, data = DF) # design matrix for the survival model W <- cbind("(Intercept)" = 1, "lek" = lek) #simulate random effects b0 <- rnorm(n2, 0, sd.int) b1 <- rnorm(n2, 0, sd.slope) b <- cbind(b0, b1) # simulate longitudinal responses id <- rep(1:n2, each = K) eta.y <- as.vector(X %*% betas2 + rowSums(Z * b[id,])) # linear predictor y <- eta.y + rnorm(n2 * K, 0,sd) # Longitudinal data set id <- id+n1 DFlon2 <- cbind(id, y, DF) # Cox model lek <- unique(DFlon2[, c("id", "lek")]) Xalpha <- alpha2*(as.numeric(lek[,2])-1) U <- runif(n2, 0, 1) eventT <- (-log(U)*exp(5 -Xalpha))^(1/weibA) # censoring Ctimes <-pmin(runif(n2, 60, 90),84) Time <- pmin(eventT, Ctimes) event <- as.numeric(eventT <= Ctimes) # event indicator DFcox2 <- cbind(Time, event, lek, Ctimes) # Group 3 --#### # ----------#### times <- c(replicate(n3, c(0, sort(runif(K-1, 0, t.max))))) # at which time points longitudinal measurements are supposed to be taken lek <- sample(c(0,1), n3, replace = TRUE) DF <- data.frame(day = times, lek = factor(rep(lek, each = K))) X <- model.matrix(~ lek+day, data = DF) Z <- model.matrix(~ 1+day, data = DF) # design matrix for the survival model W <- cbind("(Intercept)" = 1, "lek" = lek) #simulate random effects b0 <- rnorm(n3, 0, sd.int) b1 <- rnorm(n3, 0, sd.slope) b <- cbind(b0, b1) # simulate longitudinal responses id <- rep(1:n3, each = K) eta.y <- as.vector(X %*% betas3 + rowSums(Z * b[id,])) # linear predictor y <- eta.y + rnorm(n3 * K, 0,sd) # Longitudinal data set id <- id+n1+n2 DFlon3 <- cbind(id, y, DF) # Cox model lek <- unique(DFlon3[, c("id", "lek")]) Xalpha <- alpha3*(as.numeric(lek[,2])-1) U <- runif(n3, 0, 1) eventT <- (-log(U)*exp(5 -Xalpha))^(1/weibA) # censoring Ctimes <- pmin(runif(n3, 60, 90),84) Time <- pmin(eventT, Ctimes) event <- as.numeric(eventT <= Ctimes) # event indicator DFcox3 <- cbind(Time, event, lek, Ctimes) dataLon <- rbind(DFlon1, DFlon2, DFlon3) dataCox <- rbind(DFcox1, DFcox2, DFcox3) DFcox[[i]] <- dataCox DFlong[[i]] <- dataLon } DFlong2 <- list() for(j in 1:100){ dataLon <- DFlong[[j]] tf <- dataLon$day[dataLon$id==1]