Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simulation function supports multiple studies #278

Merged
merged 4 commits into from
Mar 4, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ export(Parameter)
export(ParameterList)
export(Prior)
export(STAN_BLOCKS)
export(SimGroup)
export(StanModel)
export(StanModule)
export(Surv)
Expand Down Expand Up @@ -172,6 +173,7 @@ exportClasses(LongitudinalSteinFojo)
exportClasses(Parameter)
exportClasses(ParameterList)
exportClasses(Prior)
exportClasses(SimGroup)
exportClasses(StanModel)
exportClasses(StanModule)
exportClasses(SurvivalExponential)
Expand Down
2 changes: 1 addition & 1 deletion R/LongitudinalRandomSlope.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ LongitudinalRandomSlope <- function(
name = "Random Slope",
stan = stan,
parameters = ParameterList(
Parameter(name = "lm_rs_intercept", prior = intercept, size = 1),
Parameter(name = "lm_rs_intercept", prior = intercept, size = "n_studies"),
Parameter(name = "lm_rs_slope_mu", prior = slope_mu, size = "n_arms"),
Parameter(name = "lm_rs_slope_sigma", prior = slope_sigma, size = 1),
Parameter(name = "lm_rs_sigma", prior = sigma, size = 1),
Expand Down
95 changes: 86 additions & 9 deletions R/simulations.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,64 @@ get_timepoints <- function(x) {
)
}

#' Define Simulation Group
#'
#' Specifies a simulation group to be used by [`simulate_joint_data()`].
#'
#' @param n (`numeric`)\cr number of subjects in the group.
#' @param arm (`character`)\cr treatment arm.
#' @param study (`character`)\cr study name.
#'
#' @slot n (`numeric`)\cr See arguments.
#' @slot arm (`character`)\cr See arguments.
#' @slot study (`character`)\cr See arguments.
#'
#' @examples
#' SimGroup(n = 50, arm = "Arm-A", study = "Study-1")
#' @name SimGroup-class
#' @exportClass SimGroup
.SimGroup <- setClass(
"SimGroup",
slots = c(
n = "numeric",
study = "character",
arm = "character"
gowerc marked this conversation as resolved.
Show resolved Hide resolved
)
)

#' @export
#' @rdname SimGroup-class
SimGroup <- function(n, arm, study) {
.SimGroup(
n = n,
arm = arm,
study = study
)
}


setValidity(
"SimGroup",
function(object) {
if (length(object@n) != 1) {
return("`n` must be a length 1 integer")
}
if (length(object@arm) != 1) {
return("`arm` must be a length 1 string")
}
if (length(object@study) != 1) {
return("`study` must be a length 1 string")
}
if (any(object@n < 1) | any(object@n %% 1 != 0)) {
return("`n` must be positive integer")
}
return(TRUE)
}
)

#' Simulating Joint Longitudinal and Time-to-Event Data
#'
#' @param n_arm (`numeric`)\cr numbers of patients per treatment arm.
#' @param design (`list`)\cr a list of [`SimGroup`] objects. See details.
#' @param times (`numeric`)\cr time grid, e.g. specifying the days after randomization.
#' @param lambda_cen (`number`)\cr rate of the exponential censoring distribution.
#' @param beta_cont (`number`)\cr coefficient for the continuous covariate.
Expand All @@ -37,10 +91,26 @@ get_timepoints <- function(x) {
#' @param .debug (`flag`)\cr whether to enter debug mode such that the function
#' would only return a subset of columns.
#'
#' @details
#'
#' The `design` argument is used to specify how many distinct groups should be simulated
#' including key information such as the number of subjects within the group as well as
#' which treatment arm and study the group belongs to. The `design` argument should be a
#' list of [`SimGroup`] objects e.g.
#' ```
#' design = list(
#' SimGroup(n = 50, study = "Study-1", arm = "Arm-A"),
#' SimGroup(n = 50, study = "Study-1", arm = "Arm-B")
#' )
#' ```
#'
#' @returns List with simulated `lm` (longitudinal) and `os` (survival) data sets.
#' @export
simulate_joint_data <- function(
n_arm = c(50, 80), # Number of arms and number of subjects per arm
design = list(
SimGroup(n = 50, study = "Study-1", arm = "Arm-A"),
SimGroup(n = 50, study = "Study-1", arm = "Arm-B")
),
times = 0:2000,
lambda_cen = 1 / 3,
beta_cont = 0.2,
Expand All @@ -50,24 +120,31 @@ simulate_joint_data <- function(
.silent = FALSE,
.debug = FALSE
) {
n <- sum(n_arm)

assert(
all(vapply(design, \(x) is(x, "SimGroup"), logical(1))),
msg = "All elements of `design` must be of class `SimGroup`"
)

n_group <- vapply(design, function(x) x@n, numeric(1))
arms <- vapply(design, function(x) x@arm, character(1))
studies <- vapply(design, function(x) x@study, character(1))
n <- sum(n_group)
u_pts <- sprintf("pt_%05i", seq_len(n))
bounds <- get_timepoints(times)

ARMS <- paste0("Group-", seq_along(n_arm))

os_baseline <- dplyr::tibble(pt = u_pts) |>
dplyr::mutate(cov_cont = stats::rnorm(n)) |>
dplyr::mutate(cov_cat = factor(
sample(c("A", "B", "C"), replace = TRUE, size = n),
levels = c("A", "B", "C")
sample(names(beta_cat), replace = TRUE, size = n),
levels = names(beta_cat)
)) |>
dplyr::mutate(log_haz_cov = .data$cov_cont * beta_cont + beta_cat[.data$cov_cat]) |>
dplyr::mutate(survival = stats::runif(n)) |>
dplyr::mutate(chazard_limit = -log(.data$survival)) |>
dplyr::mutate(time_cen = stats::rexp(n, lambda_cen)) |>
dplyr::mutate(study = factor("Study-1")) |>
dplyr::mutate(arm = factor(rep(ARMS, times = n_arm), levels = ARMS)) |>
dplyr::mutate(arm = factor(rep(arms, times = n_group), levels = unique(arms))) |>
dplyr::mutate(study = factor(rep(studies, times = n_group), levels = unique(studies))) |>
dplyr::select(!dplyr::all_of("survival"))

time_dat <- expand.grid(
Expand Down
6 changes: 4 additions & 2 deletions R/simulations_gsf.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ sim_lm_gsf <- function(

assert_that(
length(unique(lm_base$study)) == length(mu_b),
is.factor(lm_base$study),
is.factor(lm_base$arm),
length(mu_b) == 1,
length(sigma) == 1,
length(mu_s) == length(unique(lm_base$arm)),
Expand All @@ -92,8 +94,8 @@ sim_lm_gsf <- function(

baseline_covs <- lm_base |>
dplyr::distinct(.data$pt, .data$arm, .data$study) |>
dplyr::mutate(study_idx = as.numeric(factor(as.character(.data$study)))) |>
dplyr::mutate(arm_idx = as.numeric(factor(as.character(.data$arm)))) |>
dplyr::mutate(study_idx = as.numeric(.data$study)) |>
dplyr::mutate(arm_idx = as.numeric(.data$arm)) |>
dplyr::mutate(psi_b = stats::rlnorm(dplyr::n(), log(mu_b[.data$study_idx]), omega_b)) |>
dplyr::mutate(psi_s = stats::rlnorm(dplyr::n(), log(mu_s[.data$arm_idx]), omega_s)) |>
dplyr::mutate(psi_g = stats::rlnorm(dplyr::n(), log(mu_g[.data$arm_idx]), omega_g)) |>
Expand Down
29 changes: 19 additions & 10 deletions R/simulations_rs.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@

#' Construct a Simulation Function for Longitudinal Data from Random Slope Model
#'
#' @param intercept (`number`)\cr the mean baseline value.
#' @param slope_mu (`numeric`)\cr the population slope for the two treatment arms.
#' @param intercept (`number`)\cr the mean baseline value for each study.
#' @param slope_mu (`numeric`)\cr the population slope for each treatment arm.
#' @param slope_sigma (`number`)\cr the random slope standard deviation.
#' @param sigma (`number`)\cr the variance of the longitudinal values.
#' @param link_dsld (`number`)\cr the link coefficient for the DSLD contribution.
Expand All @@ -23,20 +23,29 @@ sim_lm_random_slope <- function(
function(lm_base) {

assert_that(
length(slope_mu) == 1 | length(slope_mu) == length(unique(lm_base$arm)),
msg = "slope_mu should either be length 1 or equal to the length of n_arm"
is.factor(lm_base$study),
is.factor(lm_base$arm)
)

if (length(slope_mu) == 1) {
slope_mu <- rep(slope_mu, length(unique(lm_base$arm)))
}
assert_that(
length(slope_mu) == length(unique(lm_base$arm)),
msg = "`length(slope_mu)` should be equal to the number of unique studies"
)

assert_that(
length(intercept) == length(unique(lm_base$study)),
msg = "`length(intercept)` should be equal to the number of unique studies"
)

rs_baseline <- lm_base |>
dplyr::distinct(.data$pt, .data$arm) |>
dplyr::distinct(.data$pt, .data$arm, .data$study) |>
dplyr::mutate(intercept = intercept[as.numeric(.data$study)]) |>
dplyr::mutate(slope_ind = stats::rnorm(
dplyr::n(), slope_mu[as.numeric(factor(as.character(.data$arm)))], sd = slope_sigma
n = dplyr::n(),
mean = slope_mu[as.numeric(.data$arm)],
sd = slope_sigma
)) |>
dplyr::select(!dplyr::any_of("arm"))
dplyr::select(!dplyr::any_of(c("arm", "study")))

lm_dat <- lm_base |>
dplyr::mutate(err = stats::rnorm(dplyr::n(), 0, sigma)) |>
Expand Down
6 changes: 4 additions & 2 deletions R/simulations_sf.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,17 @@ sim_lm_sf <- function(
length(unique(lm_base$study)) == length(mu_b),
length(mu_b) == 1,
length(sigma) == 1,
is.factor(lm_base$study),
is.factor(lm_base$arm),
length(mu_s) == length(unique(lm_base$arm)),
length(mu_s) == length(mu_g),
length(c(omega_b, omega_s, omega_g)) == 3
)

baseline_covs <- lm_base |>
dplyr::distinct(.data$pt, .data$arm, .data$study) |>
dplyr::mutate(study_idx = as.numeric(factor(as.character(.data$study)))) |>
dplyr::mutate(arm_idx = as.numeric(factor(as.character(.data$arm)))) |>
dplyr::mutate(study_idx = as.numeric(.data$study)) |>
dplyr::mutate(arm_idx = as.numeric(.data$arm)) |>
dplyr::mutate(psi_b = stats::rlnorm(dplyr::n(), log(mu_b[.data$study_idx]), omega_b)) |>
dplyr::mutate(psi_s = stats::rlnorm(dplyr::n(), log(mu_s[.data$arm_idx]), omega_s)) |>
dplyr::mutate(psi_g = stats::rlnorm(dplyr::n(), log(mu_g[.data$arm_idx]), omega_g))
Expand Down
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ reference:
- sim_os_exponential
- sim_os_loglogistic
- sim_os_weibull
- SimGroup
- title: Prior Distributions
contents:
- Prior
Expand Down
4 changes: 3 additions & 1 deletion design/debug-gsf/compare/model.R
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,9 @@ devtools::load_all(export = FALSE, path = "../../..")
set.seed(4513)
jlist <- simulate_joint_data(
.debug = TRUE,
n_arm = c(50),
design = list(
SimGroup(50, "Arm-A", "Study-X")
),
times = seq(0, 3, by = (1/365)/2),
lambda_cen = 1 / 9000,
beta_cat = c(
Expand Down
5 changes: 4 additions & 1 deletion design/example_of_use.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,10 @@ write_stan(jm, "local/debug.stan")

## Generate Test data with known parameters
jlist <- simulate_joint_data(
n_arm = c(500, 500),
design = list(
SimGroup(500, "Arm-A", "Study-X"),
SimGroup(500, "Arm-B", "Study-X")
),
times = 1:2000,
lambda_cen = 1 / 9000,
beta_cat = c(
Expand Down
10 changes: 8 additions & 2 deletions design/tests/gsf-no-link.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ options("jmpost.cache_dir" = file.path("local", "models"))
# set.seed(7143)
# jlist <- simulate_joint_data(
# .debug = TRUE,
# n_arm = c(70, 90),
# design = list(
# SimGroup(70, "Arm-A", "Study-X"),
# SimGroup(90, "Arm-B", "Study-X")
# ),
# times = seq(0, 3, by = (1/365)/2),
# lambda_cen = 1 / 9000,
# beta_cat = c(
Expand All @@ -48,7 +51,10 @@ options("jmpost.cache_dir" = file.path("local", "models"))
set.seed(7143)
jlist <- simulate_joint_data(
.debug = TRUE,
n_arm = c(85, 100),
design = list(
SimGroup(85, "Arm-A", "Study-X"),
SimGroup(100, "Arm-B", "Study-X")
),
times = seq(0, 3, by = (1/365)/2),
lambda_cen = 1 / 9000,
beta_cat = c(
Expand Down
4 changes: 3 additions & 1 deletion design/tests/gsf-with-link-identity.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ options("jmpost.cache_dir" = file.path("local", "models"))

## Generate Test data with known parameters
jlist <- simulate_joint_data(
n_arm = 200,
design = list(
SimGroup(200, "Arm-A", "Study-X")
),
times = seq(0, 1000),
lambda_cen = 1 / 9000,
beta_cat = c(
Expand Down
5 changes: 4 additions & 1 deletion design/tests/gsf-with-link.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ options("jmpost.cache_dir" = file.path("local", "models"))

## Generate Test data with known parameters
jlist <- simulate_joint_data(
n_arm = c(80, 80),
design = list(
SimGroup(80, "Arm-A", "Study-X"),
SimGroup(80, "Arm-B", "Study-X")
),
times = seq(0, 4, by = (1/365)/2),
lambda_cen = 1 / 9000,
beta_cat = c(
Expand Down
5 changes: 4 additions & 1 deletion design/tests/os-brier-score.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ options("jmpost.cache_dir" = file.path("local", "models"))
library(bayesplot)

jlist <- simulate_joint_data(
n_arm = c(1000, 1000),
design = list(
SimGroup(1000, "Arm-A", "Study-X"),
SimGroup(1000, "Arm-B", "Study-X")
),
times = seq(1, 1000, by = 0.5),
lambda_cen = 1 / 9000,
beta_cat = c(
Expand Down
5 changes: 4 additions & 1 deletion design/tests/os-exponential.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ options("jmpost.cache_dir" = file.path("local", "models"))
library(bayesplot)

jlist <- simulate_joint_data(
n_arm = c(1000, 1000),
design = list(
SimGroup(1000, "Arm-A", "Study-X"),
SimGroup(1000, "Arm-B", "Study-X")
),
times = seq(1, 1000, by = 0.5),
lambda_cen = 1 / 9000,
beta_cat = c(
Expand Down
5 changes: 4 additions & 1 deletion design/tests/os-loglogistic.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ library(bayesplot)
options("jmpost.cache_dir" = file.path("local", "models"))

jlist <- simulate_joint_data(
n_arm = c(1000, 1000),
design = list(
SimGroup(1000, "Arm-A", "Study-X"),
SimGroup(1000, "Arm-B", "Study-X")
),
times = 1:2000,
lambda_cen = 1 / 9000,
beta_cat = c(
Expand Down
5 changes: 4 additions & 1 deletion design/tests/os-weibull.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ options("jmpost.cache_dir" = file.path("local", "models"))
set.seed(6042)

jlist <- simulate_joint_data(
n_arm = c(1000, 1000),
design = list(
SimGroup(1000, "Arm-A", "Study-X"),
SimGroup(1000, "Arm-B", "Study-X")
),
times = 1:2000,
lambda_cen = 1 / 9000,
beta_cat = c(
Expand Down
4 changes: 3 additions & 1 deletion design/tests/os-zero-covariates.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ options("jmpost.cache_dir" = file.path("local", "models"))
true_lambda <- 1/100

jlist <- simulate_joint_data(
n_arm = c(500),
design = list(
SimGroup(500, "Arm-B", "Study-X")
),
times = seq(1, 1000, by = 0.5),
lambda_cen = 1 / 9000,
beta_cat = c( "A" = 0, "B" = 0, "C" = 0),
Expand Down
5 changes: 4 additions & 1 deletion design/tests/sf-no-link.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,10 @@ options("jmpost.cache_dir" = file.path("local", "models"))
set.seed(7143)
jlist <- simulate_joint_data(
.debug = TRUE,
n_arm = c(85, 100),
design = list(
SimGroup(85, "Arm-A", "Study-X"),
SimGroup(100, "Arm-B", "Study-X")
),
times = seq(0, 3, by = (1/365)/2),
lambda_cen = 1 / 9000,
beta_cat = c(
Expand Down
Loading
Loading