diff --git a/NAMESPACE b/NAMESPACE index 7ccade27..3e20b1a3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -108,6 +108,7 @@ export(Parameter) export(ParameterList) export(Prior) export(STAN_BLOCKS) +export(SimGroup) export(StanModel) export(StanModule) export(Surv) @@ -172,6 +173,7 @@ exportClasses(LongitudinalSteinFojo) exportClasses(Parameter) exportClasses(ParameterList) exportClasses(Prior) +exportClasses(SimGroup) exportClasses(StanModel) exportClasses(StanModule) exportClasses(SurvivalExponential) diff --git a/R/LongitudinalRandomSlope.R b/R/LongitudinalRandomSlope.R index c9ffae3a..8a500ec1 100755 --- a/R/LongitudinalRandomSlope.R +++ b/R/LongitudinalRandomSlope.R @@ -42,7 +42,7 @@ LongitudinalRandomSlope <- function( name = "Random Slope", stan = stan, parameters = ParameterList( - Parameter(name = "lm_rs_intercept", prior = intercept, size = 1), + Parameter(name = "lm_rs_intercept", prior = intercept, size = "n_studies"), Parameter(name = "lm_rs_slope_mu", prior = slope_mu, size = "n_arms"), Parameter(name = "lm_rs_slope_sigma", prior = slope_sigma, size = 1), Parameter(name = "lm_rs_sigma", prior = sigma, size = 1), diff --git a/R/simulations.R b/R/simulations.R index ead016f4..b5fa8608 100755 --- a/R/simulations.R +++ b/R/simulations.R @@ -23,10 +23,64 @@ get_timepoints <- function(x) { ) } +#' Define Simulation Group +#' +#' Specifies a simulation group to be used by [`simulate_joint_data()`]. +#' +#' @param n (`numeric`)\cr number of subjects in the group. +#' @param arm (`character`)\cr treatment arm. +#' @param study (`character`)\cr study name. +#' +#' @slot n (`numeric`)\cr See arguments. +#' @slot arm (`character`)\cr See arguments. +#' @slot study (`character`)\cr See arguments. +#' +#' @examples +#' SimGroup(n = 50, arm = "Arm-A", study = "Study-1") +#' @name SimGroup-class +#' @exportClass SimGroup +.SimGroup <- setClass( + "SimGroup", + slots = c( + n = "numeric", + arm = "character", + study = "character" + ) +) + +#' @export +#' @rdname SimGroup-class +SimGroup <- function(n, arm, study) { + .SimGroup( + n = n, + arm = arm, + study = study + ) +} + + +setValidity( + "SimGroup", + function(object) { + if (length(object@n) != 1) { + return("`n` must be a length 1 integer") + } + if (length(object@arm) != 1) { + return("`arm` must be a length 1 string") + } + if (length(object@study) != 1) { + return("`study` must be a length 1 string") + } + if (any(object@n < 1) | any(object@n %% 1 != 0)) { + return("`n` must be positive integer") + } + return(TRUE) + } +) #' Simulating Joint Longitudinal and Time-to-Event Data #' -#' @param n_arm (`numeric`)\cr numbers of patients per treatment arm. +#' @param design (`list`)\cr a list of [`SimGroup`] objects. See details. #' @param times (`numeric`)\cr time grid, e.g. specifying the days after randomization. #' @param lambda_cen (`number`)\cr rate of the exponential censoring distribution. #' @param beta_cont (`number`)\cr coefficient for the continuous covariate. @@ -37,10 +91,26 @@ get_timepoints <- function(x) { #' @param .debug (`flag`)\cr whether to enter debug mode such that the function #' would only return a subset of columns. #' +#' @details +#' +#' The `design` argument is used to specify how many distinct groups should be simulated +#' including key information such as the number of subjects within the group as well as +#' which treatment arm and study the group belongs to. The `design` argument should be a +#' list of [`SimGroup`] objects e.g. +#' ``` +#' design = list( +#' SimGroup(n = 50, study = "Study-1", arm = "Arm-A"), +#' SimGroup(n = 50, study = "Study-1", arm = "Arm-B") +#' ) +#' ``` +#' #' @returns List with simulated `lm` (longitudinal) and `os` (survival) data sets. #' @export simulate_joint_data <- function( - n_arm = c(50, 80), # Number of arms and number of subjects per arm + design = list( + SimGroup(n = 50, study = "Study-1", arm = "Arm-A"), + SimGroup(n = 50, study = "Study-1", arm = "Arm-B") + ), times = 0:2000, lambda_cen = 1 / 3, beta_cont = 0.2, @@ -50,24 +120,31 @@ simulate_joint_data <- function( .silent = FALSE, .debug = FALSE ) { - n <- sum(n_arm) + + assert( + all(vapply(design, \(x) is(x, "SimGroup"), logical(1))), + msg = "All elements of `design` must be of class `SimGroup`" + ) + + n_group <- vapply(design, function(x) x@n, numeric(1)) + arms <- vapply(design, function(x) x@arm, character(1)) + studies <- vapply(design, function(x) x@study, character(1)) + n <- sum(n_group) u_pts <- sprintf("pt_%05i", seq_len(n)) bounds <- get_timepoints(times) - ARMS <- paste0("Group-", seq_along(n_arm)) - os_baseline <- dplyr::tibble(pt = u_pts) |> dplyr::mutate(cov_cont = stats::rnorm(n)) |> dplyr::mutate(cov_cat = factor( - sample(c("A", "B", "C"), replace = TRUE, size = n), - levels = c("A", "B", "C") + sample(names(beta_cat), replace = TRUE, size = n), + levels = names(beta_cat) )) |> dplyr::mutate(log_haz_cov = .data$cov_cont * beta_cont + beta_cat[.data$cov_cat]) |> dplyr::mutate(survival = stats::runif(n)) |> dplyr::mutate(chazard_limit = -log(.data$survival)) |> dplyr::mutate(time_cen = stats::rexp(n, lambda_cen)) |> - dplyr::mutate(study = factor("Study-1")) |> - dplyr::mutate(arm = factor(rep(ARMS, times = n_arm), levels = ARMS)) |> + dplyr::mutate(arm = factor(rep(arms, times = n_group), levels = unique(arms))) |> + dplyr::mutate(study = factor(rep(studies, times = n_group), levels = unique(studies))) |> dplyr::select(!dplyr::all_of("survival")) time_dat <- expand.grid( diff --git a/R/simulations_gsf.R b/R/simulations_gsf.R index 659c0a05..dfea8392 100644 --- a/R/simulations_gsf.R +++ b/R/simulations_gsf.R @@ -81,6 +81,8 @@ sim_lm_gsf <- function( assert_that( length(unique(lm_base$study)) == length(mu_b), + is.factor(lm_base$study), + is.factor(lm_base$arm), length(mu_b) == 1, length(sigma) == 1, length(mu_s) == length(unique(lm_base$arm)), @@ -92,8 +94,8 @@ sim_lm_gsf <- function( baseline_covs <- lm_base |> dplyr::distinct(.data$pt, .data$arm, .data$study) |> - dplyr::mutate(study_idx = as.numeric(factor(as.character(.data$study)))) |> - dplyr::mutate(arm_idx = as.numeric(factor(as.character(.data$arm)))) |> + dplyr::mutate(study_idx = as.numeric(.data$study)) |> + dplyr::mutate(arm_idx = as.numeric(.data$arm)) |> dplyr::mutate(psi_b = stats::rlnorm(dplyr::n(), log(mu_b[.data$study_idx]), omega_b)) |> dplyr::mutate(psi_s = stats::rlnorm(dplyr::n(), log(mu_s[.data$arm_idx]), omega_s)) |> dplyr::mutate(psi_g = stats::rlnorm(dplyr::n(), log(mu_g[.data$arm_idx]), omega_g)) |> diff --git a/R/simulations_rs.R b/R/simulations_rs.R index d13a7e59..125f3c0d 100644 --- a/R/simulations_rs.R +++ b/R/simulations_rs.R @@ -1,8 +1,8 @@ #' Construct a Simulation Function for Longitudinal Data from Random Slope Model #' -#' @param intercept (`number`)\cr the mean baseline value. -#' @param slope_mu (`numeric`)\cr the population slope for the two treatment arms. +#' @param intercept (`number`)\cr the mean baseline value for each study. +#' @param slope_mu (`numeric`)\cr the population slope for each treatment arm. #' @param slope_sigma (`number`)\cr the random slope standard deviation. #' @param sigma (`number`)\cr the variance of the longitudinal values. #' @param link_dsld (`number`)\cr the link coefficient for the DSLD contribution. @@ -23,20 +23,29 @@ sim_lm_random_slope <- function( function(lm_base) { assert_that( - length(slope_mu) == 1 | length(slope_mu) == length(unique(lm_base$arm)), - msg = "slope_mu should either be length 1 or equal to the length of n_arm" + is.factor(lm_base$study), + is.factor(lm_base$arm) ) - if (length(slope_mu) == 1) { - slope_mu <- rep(slope_mu, length(unique(lm_base$arm))) - } + assert_that( + length(slope_mu) == length(unique(lm_base$arm)), + msg = "`length(slope_mu)` should be equal to the number of unique studies" + ) + + assert_that( + length(intercept) == length(unique(lm_base$study)), + msg = "`length(intercept)` should be equal to the number of unique studies" + ) rs_baseline <- lm_base |> - dplyr::distinct(.data$pt, .data$arm) |> + dplyr::distinct(.data$pt, .data$arm, .data$study) |> + dplyr::mutate(intercept = intercept[as.numeric(.data$study)]) |> dplyr::mutate(slope_ind = stats::rnorm( - dplyr::n(), slope_mu[as.numeric(factor(as.character(.data$arm)))], sd = slope_sigma + n = dplyr::n(), + mean = slope_mu[as.numeric(.data$arm)], + sd = slope_sigma )) |> - dplyr::select(!dplyr::any_of("arm")) + dplyr::select(!dplyr::any_of(c("arm", "study"))) lm_dat <- lm_base |> dplyr::mutate(err = stats::rnorm(dplyr::n(), 0, sigma)) |> diff --git a/R/simulations_sf.R b/R/simulations_sf.R index 6b5765fd..23cb0e03 100644 --- a/R/simulations_sf.R +++ b/R/simulations_sf.R @@ -76,6 +76,8 @@ sim_lm_sf <- function( length(unique(lm_base$study)) == length(mu_b), length(mu_b) == 1, length(sigma) == 1, + is.factor(lm_base$study), + is.factor(lm_base$arm), length(mu_s) == length(unique(lm_base$arm)), length(mu_s) == length(mu_g), length(c(omega_b, omega_s, omega_g)) == 3 @@ -83,8 +85,8 @@ sim_lm_sf <- function( baseline_covs <- lm_base |> dplyr::distinct(.data$pt, .data$arm, .data$study) |> - dplyr::mutate(study_idx = as.numeric(factor(as.character(.data$study)))) |> - dplyr::mutate(arm_idx = as.numeric(factor(as.character(.data$arm)))) |> + dplyr::mutate(study_idx = as.numeric(.data$study)) |> + dplyr::mutate(arm_idx = as.numeric(.data$arm)) |> dplyr::mutate(psi_b = stats::rlnorm(dplyr::n(), log(mu_b[.data$study_idx]), omega_b)) |> dplyr::mutate(psi_s = stats::rlnorm(dplyr::n(), log(mu_s[.data$arm_idx]), omega_s)) |> dplyr::mutate(psi_g = stats::rlnorm(dplyr::n(), log(mu_g[.data$arm_idx]), omega_g)) diff --git a/_pkgdown.yml b/_pkgdown.yml index 652d0af6..8e0c626c 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -45,6 +45,7 @@ reference: - sim_os_exponential - sim_os_loglogistic - sim_os_weibull + - SimGroup - title: Prior Distributions contents: - Prior diff --git a/design/debug-gsf/compare/model.R b/design/debug-gsf/compare/model.R index 4fb41284..df696380 100644 --- a/design/debug-gsf/compare/model.R +++ b/design/debug-gsf/compare/model.R @@ -285,7 +285,9 @@ devtools::load_all(export = FALSE, path = "../../..") set.seed(4513) jlist <- simulate_joint_data( .debug = TRUE, - n_arm = c(50), + design = list( + SimGroup(50, "Arm-A", "Study-X") + ), times = seq(0, 3, by = (1/365)/2), lambda_cen = 1 / 9000, beta_cat = c( diff --git a/design/example_of_use.R b/design/example_of_use.R index 638c6d33..36c935ea 100755 --- a/design/example_of_use.R +++ b/design/example_of_use.R @@ -78,7 +78,10 @@ write_stan(jm, "local/debug.stan") ## Generate Test data with known parameters jlist <- simulate_joint_data( - n_arm = c(500, 500), + design = list( + SimGroup(500, "Arm-A", "Study-X"), + SimGroup(500, "Arm-B", "Study-X") + ), times = 1:2000, lambda_cen = 1 / 9000, beta_cat = c( diff --git a/design/tests/gsf-no-link.R b/design/tests/gsf-no-link.R index ed44c2e6..1f4093de 100644 --- a/design/tests/gsf-no-link.R +++ b/design/tests/gsf-no-link.R @@ -21,7 +21,10 @@ options("jmpost.cache_dir" = file.path("local", "models")) # set.seed(7143) # jlist <- simulate_joint_data( # .debug = TRUE, -# n_arm = c(70, 90), + # design = list( + # SimGroup(70, "Arm-A", "Study-X"), + # SimGroup(90, "Arm-B", "Study-X") + # ), # times = seq(0, 3, by = (1/365)/2), # lambda_cen = 1 / 9000, # beta_cat = c( @@ -48,7 +51,10 @@ options("jmpost.cache_dir" = file.path("local", "models")) set.seed(7143) jlist <- simulate_joint_data( .debug = TRUE, - n_arm = c(85, 100), + design = list( + SimGroup(85, "Arm-A", "Study-X"), + SimGroup(100, "Arm-B", "Study-X") + ), times = seq(0, 3, by = (1/365)/2), lambda_cen = 1 / 9000, beta_cat = c( diff --git a/design/tests/gsf-with-link-identity.R b/design/tests/gsf-with-link-identity.R index 5b2959a6..9ca917cc 100644 --- a/design/tests/gsf-with-link-identity.R +++ b/design/tests/gsf-with-link-identity.R @@ -18,7 +18,9 @@ options("jmpost.cache_dir" = file.path("local", "models")) ## Generate Test data with known parameters jlist <- simulate_joint_data( - n_arm = 200, + design = list( + SimGroup(200, "Arm-A", "Study-X") + ), times = seq(0, 1000), lambda_cen = 1 / 9000, beta_cat = c( diff --git a/design/tests/gsf-with-link.R b/design/tests/gsf-with-link.R index 983f718f..9920253e 100644 --- a/design/tests/gsf-with-link.R +++ b/design/tests/gsf-with-link.R @@ -17,7 +17,10 @@ options("jmpost.cache_dir" = file.path("local", "models")) ## Generate Test data with known parameters jlist <- simulate_joint_data( - n_arm = c(80, 80), + design = list( + SimGroup(80, "Arm-A", "Study-X"), + SimGroup(80, "Arm-B", "Study-X") + ), times = seq(0, 4, by = (1/365)/2), lambda_cen = 1 / 9000, beta_cat = c( diff --git a/design/tests/os-brier-score.R b/design/tests/os-brier-score.R index 2c4166f4..424160e1 100644 --- a/design/tests/os-brier-score.R +++ b/design/tests/os-brier-score.R @@ -7,7 +7,10 @@ options("jmpost.cache_dir" = file.path("local", "models")) library(bayesplot) jlist <- simulate_joint_data( - n_arm = c(1000, 1000), + design = list( + SimGroup(1000, "Arm-A", "Study-X"), + SimGroup(1000, "Arm-B", "Study-X") + ), times = seq(1, 1000, by = 0.5), lambda_cen = 1 / 9000, beta_cat = c( diff --git a/design/tests/os-exponential.R b/design/tests/os-exponential.R index 77ef5980..39ed78b6 100644 --- a/design/tests/os-exponential.R +++ b/design/tests/os-exponential.R @@ -7,7 +7,10 @@ options("jmpost.cache_dir" = file.path("local", "models")) library(bayesplot) jlist <- simulate_joint_data( - n_arm = c(1000, 1000), + design = list( + SimGroup(1000, "Arm-A", "Study-X"), + SimGroup(1000, "Arm-B", "Study-X") + ), times = seq(1, 1000, by = 0.5), lambda_cen = 1 / 9000, beta_cat = c( diff --git a/design/tests/os-loglogistic.R b/design/tests/os-loglogistic.R index 272c67f5..3f3b93bd 100644 --- a/design/tests/os-loglogistic.R +++ b/design/tests/os-loglogistic.R @@ -6,7 +6,10 @@ library(bayesplot) options("jmpost.cache_dir" = file.path("local", "models")) jlist <- simulate_joint_data( - n_arm = c(1000, 1000), + design = list( + SimGroup(1000, "Arm-A", "Study-X"), + SimGroup(1000, "Arm-B", "Study-X") + ), times = 1:2000, lambda_cen = 1 / 9000, beta_cat = c( diff --git a/design/tests/os-weibull.R b/design/tests/os-weibull.R index 3f90aae6..c469cb02 100644 --- a/design/tests/os-weibull.R +++ b/design/tests/os-weibull.R @@ -8,7 +8,10 @@ options("jmpost.cache_dir" = file.path("local", "models")) set.seed(6042) jlist <- simulate_joint_data( - n_arm = c(1000, 1000), + design = list( + SimGroup(1000, "Arm-A", "Study-X"), + SimGroup(1000, "Arm-B", "Study-X") + ), times = 1:2000, lambda_cen = 1 / 9000, beta_cat = c( diff --git a/design/tests/os-zero-covariates.R b/design/tests/os-zero-covariates.R index cb28d9ac..c4c185b0 100644 --- a/design/tests/os-zero-covariates.R +++ b/design/tests/os-zero-covariates.R @@ -7,7 +7,9 @@ options("jmpost.cache_dir" = file.path("local", "models")) true_lambda <- 1/100 jlist <- simulate_joint_data( - n_arm = c(500), + design = list( + SimGroup(500, "Arm-B", "Study-X") + ), times = seq(1, 1000, by = 0.5), lambda_cen = 1 / 9000, beta_cat = c( "A" = 0, "B" = 0, "C" = 0), diff --git a/design/tests/sf-no-link.R b/design/tests/sf-no-link.R index 06e4e618..c6e1d095 100644 --- a/design/tests/sf-no-link.R +++ b/design/tests/sf-no-link.R @@ -48,7 +48,10 @@ options("jmpost.cache_dir" = file.path("local", "models")) set.seed(7143) jlist <- simulate_joint_data( .debug = TRUE, - n_arm = c(85, 100), + design = list( + SimGroup(85, "Arm-A", "Study-X"), + SimGroup(100, "Arm-B", "Study-X") + ), times = seq(0, 3, by = (1/365)/2), lambda_cen = 1 / 9000, beta_cat = c( diff --git a/design/tests/sf-with-link.R b/design/tests/sf-with-link.R index c9734b5f..9a8b305a 100644 --- a/design/tests/sf-with-link.R +++ b/design/tests/sf-with-link.R @@ -15,7 +15,10 @@ options("jmpost.cache_dir" = file.path("local", "models")) ## Generate Test data with known parameters jlist <- simulate_joint_data( - n_arm = c(110, 110), + design = list( + SimGroup(110, "Arm-A", "Study-X"), + SimGroup(110, "Arm-B", "Study-X") + ), times = seq(0, 4, by = (1/365)/2), lambda_cen = 1 / 9000, beta_cat = c( diff --git a/inst/stan/lm-random-slope/link.stan b/inst/stan/lm-random-slope/link.stan index dc953b48..1c94e43f 100644 --- a/inst/stan/lm-random-slope/link.stan +++ b/inst/stan/lm-random-slope/link.stan @@ -5,6 +5,6 @@ transformed parameters { // matrix[Nind, 2] link_function_inputs; - link_function_inputs[, 1] = rep_vector(lm_rs_intercept, Nind); + link_function_inputs[, 1] = lm_rs_ind_intercept; link_function_inputs[, 2] = lm_rs_ind_rnd_slope; } diff --git a/inst/stan/lm-random-slope/model.stan b/inst/stan/lm-random-slope/model.stan index 165f4b54..7d4a0480 100755 --- a/inst/stan/lm-random-slope/model.stan +++ b/inst/stan/lm-random-slope/model.stan @@ -7,7 +7,7 @@ parameters { // Source - lm-random-slope/model.stan // - real lm_rs_intercept; + array [n_studies] real lm_rs_intercept; array [n_arms] real lm_rs_slope_mu; real lm_rs_slope_sigma; real lm_rs_sigma; @@ -19,10 +19,10 @@ transformed parameters { // // Source - lm-random-slope/model.stan // - + vector[Nind] lm_rs_ind_intercept = to_vector(lm_rs_intercept[pt_study_index]); vector[Nta_total] lm_rs_rslope_ind = to_vector(lm_rs_ind_rnd_slope[ind_index]); - vector[Nta_total] Ypred = lm_rs_intercept + lm_rs_rslope_ind .* Tobs; + vector[Nta_total] Ypred = lm_rs_ind_intercept[ind_index] + lm_rs_rslope_ind .* Tobs; log_lik += csr_matrix_times_vector( Nind, @@ -60,7 +60,7 @@ generated quantities { for (i in 1:n_pt_select_index) { int current_pt_index = pt_select_index[i]; y_fit_at_time_grid[i, ] = - lm_rs_intercept + + lm_rs_ind_intercept[current_pt_index] + lm_rs_ind_rnd_slope[current_pt_index] .* to_row_vector(lm_time_grid); } diff --git a/man/SimGroup-class.Rd b/man/SimGroup-class.Rd new file mode 100644 index 00000000..2d1f4d94 --- /dev/null +++ b/man/SimGroup-class.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulations.R +\docType{class} +\name{SimGroup-class} +\alias{SimGroup-class} +\alias{.SimGroup} +\alias{SimGroup} +\title{Define Simulation Group} +\usage{ +SimGroup(n, arm, study) +} +\arguments{ +\item{n}{(\code{numeric})\cr number of subjects in the group.} + +\item{arm}{(\code{character})\cr treatment arm.} + +\item{study}{(\code{character})\cr study name.} +} +\description{ +Specifies a simulation group to be used by \code{\link[=simulate_joint_data]{simulate_joint_data()}}. +} +\section{Slots}{ + +\describe{ +\item{\code{n}}{(\code{numeric})\cr See arguments.} + +\item{\code{arm}}{(\code{character})\cr See arguments.} + +\item{\code{study}}{(\code{character})\cr See arguments.} +}} + +\examples{ +SimGroup(n = 50, arm = "Arm-A", study = "Study-1") +} diff --git a/man/sim_lm_random_slope.Rd b/man/sim_lm_random_slope.Rd index d0c5152f..331043e5 100644 --- a/man/sim_lm_random_slope.Rd +++ b/man/sim_lm_random_slope.Rd @@ -14,9 +14,9 @@ sim_lm_random_slope( ) } \arguments{ -\item{intercept}{(\code{number})\cr the mean baseline value.} +\item{intercept}{(\code{number})\cr the mean baseline value for each study.} -\item{slope_mu}{(\code{numeric})\cr the population slope for the two treatment arms.} +\item{slope_mu}{(\code{numeric})\cr the population slope for each treatment arm.} \item{slope_sigma}{(\code{number})\cr the random slope standard deviation.} diff --git a/man/simulate_joint_data.Rd b/man/simulate_joint_data.Rd index f8d3ec1d..7e2d27d1 100644 --- a/man/simulate_joint_data.Rd +++ b/man/simulate_joint_data.Rd @@ -5,7 +5,8 @@ \title{Simulating Joint Longitudinal and Time-to-Event Data} \usage{ simulate_joint_data( - n_arm = c(50, 80), + design = list(SimGroup(n = 50, study = "Study-1", arm = "Arm-A"), SimGroup(n = 50, + study = "Study-1", arm = "Arm-B")), times = 0:2000, lambda_cen = 1/3, beta_cont = 0.2, @@ -17,7 +18,7 @@ simulate_joint_data( ) } \arguments{ -\item{n_arm}{(\code{numeric})\cr numbers of patients per treatment arm.} +\item{design}{(\code{list})\cr a list of \code{\link{SimGroup}} objects. See details.} \item{times}{(\code{numeric})\cr time grid, e.g. specifying the days after randomization.} @@ -42,3 +43,15 @@ List with simulated \code{lm} (longitudinal) and \code{os} (survival) data sets. \description{ Simulating Joint Longitudinal and Time-to-Event Data } +\details{ +The \code{design} argument is used to specify how many distinct groups should be simulated +including key information such as the number of subjects within the group as well as +which treatment arm and study the group belongs to. The \code{design} argument should be a +list of \code{\link{SimGroup}} objects e.g. + +\if{html}{\out{
}}\preformatted{design = list( + SimGroup(n = 50, study = "Study-1", arm = "Arm-A"), + SimGroup(n = 50, study = "Study-1", arm = "Arm-B") +) +}\if{html}{\out{
}} +} diff --git a/tests/testthat/_snaps/JointModelSamples.md b/tests/testthat/_snaps/JointModelSamples.md index abf042c2..df44bcdb 100644 --- a/tests/testthat/_snaps/JointModelSamples.md +++ b/tests/testthat/_snaps/JointModelSamples.md @@ -18,6 +18,7 @@ Variables: Ypred[2400] beta_os_cov[3] + lm_rs_ind_intercept[400] lm_rs_ind_rnd_slope[400] lm_rs_intercept lm_rs_rslope_ind[2400] diff --git a/tests/testthat/_snaps/survival_plot/survival-plot-no-wrap-no-ci-km-ggplot2.svg b/tests/testthat/_snaps/survival_plot/survival-plot-no-wrap-no-ci-km-ggplot2.svg index e4c6d4a6..88b5b151 100644 --- a/tests/testthat/_snaps/survival_plot/survival-plot-no-wrap-no-ci-km-ggplot2.svg +++ b/tests/testthat/_snaps/survival_plot/survival-plot-no-wrap-no-ci-km-ggplot2.svg @@ -18,7 +18,7 @@ - + @@ -309,6 +309,6 @@ A B C -survival_plot-no wrap + no ci + km + ggplot2 +survival_plot-no wrap + no ci + km + ggplot2 diff --git a/tests/testthat/_snaps/survival_plot/survival-plot-with-no-wrap-and-no-ci.svg b/tests/testthat/_snaps/survival_plot/survival-plot-with-no-wrap-and-no-ci.svg index d84e451c..69678884 100644 --- a/tests/testthat/_snaps/survival_plot/survival-plot-with-no-wrap-and-no-ci.svg +++ b/tests/testthat/_snaps/survival_plot/survival-plot-with-no-wrap-and-no-ci.svg @@ -18,7 +18,7 @@ - + diff --git a/tests/testthat/helper-example_data.R b/tests/testthat/helper-example_data.R index 1dd36a88..f27dc5d9 100644 --- a/tests/testthat/helper-example_data.R +++ b/tests/testthat/helper-example_data.R @@ -9,7 +9,10 @@ ensure_test_data_1 <- function() { set.seed(739) jlist <- simulate_joint_data( - n = c(250, 150), + design = list( + SimGroup(250, "Arm-A", "Study-X"), + SimGroup(150, "Arm-B", "Study-X") + ), times = 1:2000, lambda_cen = 1 / 9000, lm_fun = sim_lm_random_slope( diff --git a/tests/testthat/test-JointModelSamples.R b/tests/testthat/test-JointModelSamples.R index 78134bda..0144847a 100644 --- a/tests/testthat/test-JointModelSamples.R +++ b/tests/testthat/test-JointModelSamples.R @@ -3,7 +3,10 @@ test_that("smoke test for JointModelSamples", { set.seed(739) jlist <- simulate_joint_data( - n = c(250, 150), + design = list( + SimGroup(250, "Arm-A", "Study-X"), + SimGroup(150, "Arm-B", "Study-X") + ), times = 1:2000, lambda_cen = 1 / 9000, lm_fun = sim_lm_random_slope( diff --git a/tests/testthat/test-LongitudinalGSF.R b/tests/testthat/test-LongitudinalGSF.R index 5376bfe9..27a35964 100644 --- a/tests/testthat/test-LongitudinalGSF.R +++ b/tests/testthat/test-LongitudinalGSF.R @@ -59,7 +59,10 @@ test_that("Can recover known distributional parameters from a full GSF joint mod set.seed(7143) jlist <- simulate_joint_data( .debug = TRUE, - n_arm = c(80, 100), + design = list( + SimGroup(80, "Arm-A", "Study-X"), + SimGroup(100, "Arm-B", "Study-X") + ), times = seq(0, 3, by = (1 / 365) / 2), lambda_cen = 1 / 9000, beta_cat = c( @@ -138,7 +141,7 @@ test_that("Can recover known distributional parameters from a full GSF joint mod iter_warmup = 400, iter_sampling = 800, chains = 2, - refresh = 200, + refresh = 0, parallel_chains = 2 )) diff --git a/tests/testthat/test-LongitudinalQuantiles.R b/tests/testthat/test-LongitudinalQuantiles.R index d2bd8638..c3e1cc12 100644 --- a/tests/testthat/test-LongitudinalQuantiles.R +++ b/tests/testthat/test-LongitudinalQuantiles.R @@ -1,7 +1,7 @@ -ensure_test_data_1() test_that("Test that LongitudinalQuantities works as expected", { + ensure_test_data_1() expected_column_names <- c("median", "lower", "upper", "time", "group") diff --git a/tests/testthat/test-LongitudinalRandomSlope.R b/tests/testthat/test-LongitudinalRandomSlope.R index 824fc857..7be56b46 100644 --- a/tests/testthat/test-LongitudinalRandomSlope.R +++ b/tests/testthat/test-LongitudinalRandomSlope.R @@ -14,3 +14,73 @@ test_that("Print method for LongitudinalRandomSlope works as expected", { print(x) }) }) + +test_that("LongitudinalRandomSlope correctly generates an intercept per study", { + set.seed(3521) + sim_data <- simulate_joint_data( + times = seq(1, 1000) / 365, + design = list( + SimGroup(80, "Arm-A", "Study-X"), + SimGroup(120, "Arm-A", "Study-Y") + ), + lm_fun = sim_lm_random_slope( + intercept = c(50, 70), + slope_mu = 10, + slope_sigma = 0.5, + sigma = 2, + link_dsld = 0, + link_identity = 0 + ), + os_fun = sim_os_exponential(lambda = 1 / 100), + lambda_cen = 1 / 200, + .silent = TRUE + ) + + dat_lm <- sim_data$lm |> + dplyr::filter(time %in% (c(10, 100, 200, 500, 700, 900) / 365)) + + jdat <- DataJoint( + subject = DataSubject( + data = sim_data$os, + subject = "pt", + arm = "arm", + study = "study" + ), + longitudinal = DataLongitudinal( + data = dat_lm, + formula = sld ~ time + ) + ) + + jm <- JointModel( + longitudinal = LongitudinalRandomSlope( + intercept = prior_normal(60, 15), + slope_mu = prior_normal(10, 5), + slope_sigma = prior_lognormal(0.5, 1), + sigma = prior_lognormal(2, 1) + ), + link = Link() + ) + + mp <- suppressWarnings( + sampleStanModel( + jm, + data = jdat, + iter_warmup = 200, + iter_sampling = 400, + chains = 2, + refresh = 0, + parallel_chains = 2 + ) + ) + + samples <- mp@results$draws( + c("lm_rs_intercept", "lm_rs_slope_mu", "lm_rs_slope_sigma", "lm_rs_sigma"), + format = "draws_matrix" + ) + + ests <- apply(samples, 2, mean) + ests_se <- sqrt(apply(samples, 2, var)) + z_score <- (ests - c(50, 70, 10, 0.5, 2)) / ests_se + expect_true(all(abs(z_score) < qnorm(0.99))) +}) diff --git a/tests/testthat/test-LongitudinalSteinFojo.R b/tests/testthat/test-LongitudinalSteinFojo.R index 943fa404..1602b468 100644 --- a/tests/testthat/test-LongitudinalSteinFojo.R +++ b/tests/testthat/test-LongitudinalSteinFojo.R @@ -59,7 +59,10 @@ test_that("Can recover known distributional parameters from a SF joint model", { set.seed(9438) ## Generate Test data with known parameters jlist <- simulate_joint_data( - n_arm = c(120, 120), + design = list( + SimGroup(120, "Arm-A", "Study-X"), + SimGroup(120, "Arm-B", "Study-X") + ), times = seq(0, 4, by = (1 / 365) / 2), lambda_cen = 1 / 9000, beta_cat = c( diff --git a/tests/testthat/test-SurvivalExponential.R b/tests/testthat/test-SurvivalExponential.R index 4e8aa94f..d0c4e27f 100644 --- a/tests/testthat/test-SurvivalExponential.R +++ b/tests/testthat/test-SurvivalExponential.R @@ -5,7 +5,9 @@ test_that("SurvivalExponential can recover true parameter (no covariates)", { set.seed(2043) jlist <- simulate_joint_data( - n_arm = c(500), + design = list( + SimGroup(500, "Arm-A", "Study-X") + ), times = seq(1, 1000, by = 0.5), lambda_cen = 1 / 9000, beta_cat = c("A" = 0, "B" = 0, "C" = 0), @@ -58,7 +60,9 @@ test_that("SurvivalExponential can recover true parameter (including covariates) true_beta <- c(0.5, -0.2, 0.1) set.seed(20234) jlist <- simulate_joint_data( - n_arm = c(500), + design = list( + SimGroup(500, "Arm-A", "Study-X") + ), times = seq(1, 1000, by = 0.5), lambda_cen = 1 / 9000, beta_cat = c("A" = 0, "B" = true_beta[1], "C" = true_beta[2]), diff --git a/tests/testthat/test-SurvivalLoglogistic.R b/tests/testthat/test-SurvivalLoglogistic.R index 6f219e32..b91086c8 100644 --- a/tests/testthat/test-SurvivalLoglogistic.R +++ b/tests/testthat/test-SurvivalLoglogistic.R @@ -22,7 +22,9 @@ test_that("SurvivalLogLogistic can recover known values", { set.seed(837) jlist <- suppressMessages({ simulate_joint_data( - n_arm = c(300), + design = list( + SimGroup(300, "Arm-A", "Study-X") + ), times = seq(1, 2000, by = 0.5), lambda_cen = 1 / 9000, beta_cat = c("A" = 0, "B" = true_beta[1], "C" = true_beta[2]), diff --git a/tests/testthat/test-SurvivalQuantities.R b/tests/testthat/test-SurvivalQuantities.R index f238c155..7d403f85 100644 --- a/tests/testthat/test-SurvivalQuantities.R +++ b/tests/testthat/test-SurvivalQuantities.R @@ -1,8 +1,8 @@ -ensure_test_data_1() test_that("SurvivalQuantities and autoplot.SurvivalQuantities works as expected", { + ensure_test_data_1() expected_column_names <- c("median", "lower", "upper", "time", "group", "type") diff --git a/tests/testthat/test-brierScore.R b/tests/testthat/test-brierScore.R index 288da157..e8fdd2e4 100644 --- a/tests/testthat/test-brierScore.R +++ b/tests/testthat/test-brierScore.R @@ -15,7 +15,10 @@ test_that("brierScore(SurvivalQuantities) returns same results as survreg", { set.seed(11023) jlist <- simulate_joint_data( - n_arm = c(100, 150), + design = list( + SimGroup(100, "Arm-A", "Study-X"), + SimGroup(150, "Arm-B", "Study-X") + ), times = seq(1, 1000, by = 0.5), lambda_cen = 1 / 9000, beta_cat = c( @@ -58,6 +61,7 @@ test_that("brierScore(SurvivalQuantities) returns same results as survreg", { iter_sampling = 300, iter_warmup = 300, chains = 1, + refresh = 0, parallel_chains = 1 ) diff --git a/tests/testthat/test-misc_models.R b/tests/testthat/test-misc_models.R index f1479f21..75f30e60 100644 --- a/tests/testthat/test-misc_models.R +++ b/tests/testthat/test-misc_models.R @@ -16,7 +16,10 @@ test_that("Longitudinal Model doesn't print sampler rejection messages", { jlist <- simulate_joint_data( times = 1:1000, lambda_cen = 1 / 500, - n = c(200, 200), + design = list( + SimGroup(200, "Arm-A", "Study-X"), + SimGroup(200, "Arm-B", "Study-X") + ), lm_fun = sim_lm_random_slope(), os_fun = sim_os_exponential(1 / 100) ) diff --git a/tests/testthat/test-model_random_slope.R b/tests/testthat/test-model_random_slope.R index e8b89ef2..55ea7d45 100644 --- a/tests/testthat/test-model_random_slope.R +++ b/tests/testthat/test-model_random_slope.R @@ -4,7 +4,10 @@ test_that("Random Slope Model can recover known parameter values", { ## Generate data with known parameters set.seed(739) jlist <- simulate_joint_data( - n = c(250, 150), + design = list( + SimGroup(250, "Arm-A", "Study-X"), + SimGroup(150, "Arm-B", "Study-X") + ), times = 1:2000, lambda_cen = 1 / 9000, lm_fun = sim_lm_random_slope( diff --git a/tests/testthat/test-model_random_slope_2chain.R b/tests/testthat/test-model_random_slope_2chain.R index 0e5e31b4..aebaf608 100644 --- a/tests/testthat/test-model_random_slope_2chain.R +++ b/tests/testthat/test-model_random_slope_2chain.R @@ -8,7 +8,10 @@ test_that("Can recover known distribution parameters from random slope model whe set.seed(3251) jlist <- simulate_joint_data( - n = c(150, 150), + design = list( + SimGroup(150, "Arm-A", "Study-X"), + SimGroup(150, "Arm-B", "Study-X") + ), times = 1:2000, lambda_cen = 1 / 9000, beta_cat = c( diff --git a/tests/testthat/test-simulations.R b/tests/testthat/test-simulations.R index 8a580072..b833e91a 100644 --- a/tests/testthat/test-simulations.R +++ b/tests/testthat/test-simulations.R @@ -6,8 +6,8 @@ test_that("sim_lm_gsf works as expected", { df <- data.frame( pt = rep(c("1", "2", "3"), each = 5), time = rep(1:5, 3), - arm = rep(c("A", "B"), c(1, 2) * 5), - study = "study1" + arm = factor(rep(c("A", "B"), c(1, 2) * 5)), + study = factor("study1") ) result <- expect_silent(sim_lm_gsf( sigma = 0.003, @@ -40,8 +40,8 @@ test_that("sim_lm_random_slope works as expected", { df <- data.frame( pt = rep(c("1", "2", "3"), each = 5), time = rep(1:5, 3), - arm = rep(c("A", "B"), c(1, 2) * 5), - study = "study1" + arm = factor(rep(c("A", "B"), c(1, 2) * 5)), + study = factor("study1") ) result <- expect_silent(sim_lm_random_slope( intercept = 50, @@ -68,7 +68,10 @@ test_that("simulate_joint_data works as expected", { set.seed(5433) times <- seq(0, 4, by = (1 / 365) / 2) result <- expect_silent(simulate_joint_data( - n_arm = c(80, 80), + design = list( + SimGroup(50, "Arm-A", "Study-X"), + SimGroup(80, "Arm-B", "Study-X") + ), times = times, lambda_cen = 1 / 9000, beta_cat = c( @@ -93,7 +96,8 @@ test_that("simulate_joint_data works as expected", { os_fun = sim_os_weibull( lambda = 1 / 200 * 365, gamma = 0.97 - ) + ), + .silent = TRUE )) expect_type(result, "list") expect_s3_class(result$os, "tbl_df") @@ -101,7 +105,7 @@ test_that("simulate_joint_data works as expected", { names(result$os), c("pt", "time", "event", "cov_cont", "cov_cat", "study", "arm") ) - expect_equal(nrow(result$os), 80 + 80) + expect_equal(nrow(result$os), 50 + 80) expect_s3_class(result$lm, "tbl_df") expect_identical( names(result$lm), @@ -117,7 +121,9 @@ test_that("simulate_joint_data leads to valid DataJoint with almost only default set.seed(321) sim_data <- simulate_joint_data( lm_fun = sim_lm_random_slope(), - os_fun = sim_os_exponential(lambda = 1 / 100) + os_fun = sim_os_exponential(lambda = 1 / 100), + lambda_cen = 1 / 200, + .silent = TRUE ) os_data <- sim_data$os long_data <- sim_data$lm |> @@ -147,7 +153,10 @@ test_that("simulate_joint_data leads to valid DataJoint with almost only default test_that("sim_os_exponential creates a dataset with the correct parameter", { set.seed(321) sim_data <- simulate_joint_data( - n_arm = c(200, 200), + design = list( + SimGroup(200, "Arm-A", "Study-X"), + SimGroup(200, "Arm-B", "Study-X") + ), times = seq(1, 1000), beta_cont = 0, beta_cat = c("A" = 0, "B" = 0, "C" = 0), @@ -171,7 +180,10 @@ test_that("sim_os_weibull creates a dataset with the correct parameter", { lambda_real <- 1 / 100 gamma_real <- 0.9 sim_data <- simulate_joint_data( - n_arm = c(200, 200), + design = list( + SimGroup(200, "Arm-A", "Study-X"), + SimGroup(200, "Arm-B", "Study-X") + ), times = seq(1, 1000), beta_cont = 0.2, beta_cat = c("A" = 0, "B" = -0.6, "C" = 0.6), @@ -204,3 +216,61 @@ test_that("sim_os_weibull creates a dataset with the correct parameter", { z_score <- (log_scale_obs - log_scale_real) / log_scale_se expect_true(abs(z_score) < qnorm(0.9)) }) + + +test_that("simulate_joint_data correctly generates an intercept per study", { + set.seed(3521) + sim_data <- simulate_joint_data( + times = seq(1, 1000) / 365, + design = list( + SimGroup(200, "Arm-A", "Study-X"), + SimGroup(200, "Arm-A", "Study-Y") + ), + lm_fun = sim_lm_random_slope( + intercept = c(50, 70), + slope_mu = 10, + slope_sigma = 0.5, + sigma = 2, + link_dsld = 0, + link_identity = 0 + ), + os_fun = sim_os_exponential(lambda = 1 / 100), + lambda_cen = 1 / 200, + .silent = TRUE + ) + + dat_lm <- sim_data$lm |> + dplyr::filter(time %in% (c(10, 100, 200, 500, 700, 900) / 365)) + + mod <- lme4::lmer( + dat = dat_lm, + formula = sld ~ 0 + study + time + (0 + time | pt), + ) + + ests <- lme4::fixef(mod) + ests_se <- sqrt(diag(as.matrix(vcov(mod)))) + z_score <- (ests - c(50, 70, 10)) / ests_se + expect_true(all(abs(z_score) < qnorm(0.99))) +}) + + +test_that("SimGroup() works as expected", { + sim_group <- SimGroup(100, "Arm-A", "Study-X") + expect_s4_class(sim_group, "SimGroup") + expect_equal(sim_group@n, 100) + expect_equal(sim_group@arm, "Arm-A") + expect_equal(sim_group@study, "Study-X") + + expect_error( + SimGroup(c(100, 200), "Arm-A", "Study-X"), + regexp = "`n` must be a length 1 integer" + ) + expect_error( + SimGroup(100, c("Arm-A", "Arm-b"), "Study-X"), + regexp = "`arm` must be a length 1 string" + ) + expect_error( + SimGroup(100, "Arm-A", c("Study-X", "Study-Z")), + regexp = "`study` must be a length 1 string" + ) +}) diff --git a/vignettes/model_fitting.Rmd b/vignettes/model_fitting.Rmd index a71b2d48..db6b56e8 100644 --- a/vignettes/model_fitting.Rmd +++ b/vignettes/model_fitting.Rmd @@ -128,7 +128,10 @@ So let's run the code for that: ```{r simulate_data} set.seed(129) sim_data <- simulate_joint_data( - n_arm = c(50, 50), + design = list( + SimGroup(50, "Arm-A", "Study-X"), + SimGroup(50, "Arm-B", "Study-X") + ), times = 1:2000, lambda_cen = 1 / 9000, beta_cat = c( diff --git a/vignettes/statistical-specification.Rmd b/vignettes/statistical-specification.Rmd index 59f6ad57..6d77e3d4 100644 --- a/vignettes/statistical-specification.Rmd +++ b/vignettes/statistical-specification.Rmd @@ -74,17 +74,19 @@ Where: ## Random-Slope Model $$ -y_{ij} = \mu_0 + s_i t_{ij} + \epsilon_{ij} +y_{ij} = \mu_{l(i)} + s_i t_{ij} + \epsilon_{ij} $$ where: - $y_{ij}$ is the tumour size for subject $i$ at timepoint $j$ -- $\mu_0$ is the intercept -- $s_i$ is the random slope for subject $i$ with $s_i \sim N(\mu_{sk}, \sigma_s)$ -- $\mu_{sk}$ is the mean for the random slope within treatment arm $k$ +- $\mu_{l(i)}$ is the intercept for subject $i$ +- $s_i$ is the random slope for subject $i$ with $s_i \sim N(\mu_{sk(i)}, \sigma_s)$ +- $\mu_{sk(i)}$ is the mean for the random slope within treatment arm $k(i)$ - $\sigma_s$ is the variance term for the slopes - $\epsilon_{ij}$ is the error term with $\epsilon_{ij} \sim N(0, \sigma)$ +* $k(i)$ is the treatment arm index for subject $i$ +* $l(i)$ is the study index for subject $i$ ### Derivative of the SLD Trajectory Link