From 0c73341f33e295253bd1a715d170ded788e8a033 Mon Sep 17 00:00:00 2001 From: Chris Knoll Date: Mon, 23 Sep 2024 00:15:20 -0400 Subject: [PATCH 1/2] Added implementation of 'combination cohorts'. Added simple test. --- NAMESPACE | 5 + R/CohortConstruction.R | 40 +-- R/CombinedCohort.R | 250 ++++++++++++++++++ R/CombinedCohortQueryBuilder.R | 16 ++ R/SerializeUtils.R | 69 +++++ R/Subsets.R | 20 -- .../CombinedCohortDefinition.sql | 16 ++ .../combinedCohorts/unionCombine.sql | 14 + man/CombinedCohortDef.Rd | 103 ++++++++ man/CombinedCohortOp.Rd | 113 ++++++++ man/addCombinedCohort.Rd | 26 ++ man/createCombinedCohortDef.Rd | 18 ++ man/createCombinedCohortOp.Rd | 16 ++ tests/testthat/test-CombinationCohorts.R | 66 +++++ 14 files changed, 734 insertions(+), 38 deletions(-) create mode 100644 R/CombinedCohort.R create mode 100644 R/CombinedCohortQueryBuilder.R create mode 100644 R/SerializeUtils.R create mode 100644 inst/sql/sql_server/combinedCohorts/CombinedCohortDefinition.sql create mode 100644 inst/sql/sql_server/combinedCohorts/unionCombine.sql create mode 100644 man/CombinedCohortDef.Rd create mode 100644 man/CombinedCohortOp.Rd create mode 100644 man/addCombinedCohort.Rd create mode 100644 man/createCombinedCohortDef.Rd create mode 100644 man/createCombinedCohortOp.Rd create mode 100644 tests/testthat/test-CombinationCohorts.R diff --git a/NAMESPACE b/NAMESPACE index 62124bb..17fb2f8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,16 +2,21 @@ export(CohortSubsetDefinition) export(CohortSubsetOperator) +export(CombinedCohortDef) +export(CombinedCohortOp) export(DemographicSubsetOperator) export(LimitSubsetOperator) export(SubsetCohortWindow) export(SubsetOperator) export(addCohortSubsetDefinition) +export(addCombinedCohort) export(checkAndFixCohortDefinitionSetDataTypes) export(computeChecksum) export(createCohortSubset) export(createCohortSubsetDefinition) export(createCohortTables) +export(createCombinedCohortDef) +export(createCombinedCohortOp) export(createDemographicSubset) export(createEmptyCohortDefinitionSet) export(createEmptyNegativeControlOutcomeCohortSet) diff --git a/R/CohortConstruction.R b/R/CohortConstruction.R index e27d222..062ecc1 100644 --- a/R/CohortConstruction.R +++ b/R/CohortConstruction.R @@ -106,24 +106,28 @@ generateCohortSet <- function(connectionDetails = NULL, if (incremental) { recordKeepingFile <- file.path(incrementalFolder, "GeneratedCohorts.csv") - if (isTRUE(attr(cohortDefinitionSet, "hasSubsetDefinitions"))) { - cohortDefinitionSet$checksum <- "" - for (i in 1:nrow(cohortDefinitionSet)) { + cohortDefinitionSet$checksum <- "" + for (i in 1:nrow(cohortDefinitionSet)) { + if (isTRUE(attr(cohortDefinitionSet, "hasSubsetDefinitions"))) { # This implementation supports recursive definitions (subsetting subsets) because the subsets have to be added in order - if (cohortDefinitionSet$subsetParent[i] != cohortDefinitionSet$cohortId[i]) { + if (cohortDefinitionSet$isSubset[i] && cohortDefinitionSet$subsetParent[i] != cohortDefinitionSet$cohortId[i]) { j <- which(cohortDefinitionSet$cohortId == cohortDefinitionSet$subsetParent[i]) cohortDefinitionSet$checksum[i] <- computeChecksum(paste( cohortDefinitionSet$sql[j], cohortDefinitionSet$sql[i] )) - } else { - cohortDefinitionSet$checksum[i] <- computeChecksum(cohortDefinitionSet$sql[i]) } + } else if (isTRUE(attr(cohortDefinitionSet, "hasCombinedCohorts"))) { + dependantCohortIds <- as.integer(strsplit(cohortDefinitionSet$dependentCohorts[i])) + dependentCohortIdx <- which(cohortDefinitionSet$cohortId %in% dependantCohortIds) + cohortDefinitionSet$checksum[i] <- + computeChecksum(paste0(c(cohortDefinitionSet$sql[dependentCohortIdx], cohortDefinitionSet$sql[i]), collapse = "")) + } else { + cohortDefinitionSet$checksum <- computeChecksum(cohortDefinitionSet$sql) } - } else { - cohortDefinitionSet$checksum <- computeChecksum(cohortDefinitionSet$sql) } } + # Create the cluster # DEV NOTE :: running subsets in a multiprocess setup will not work with subsets that subset other subsets # To resolve this issue we need to execute the dependency tree. @@ -145,7 +149,7 @@ generateCohortSet <- function(connectionDetails = NULL, dplyr::select("cohortId") %>% dplyr::pull() } - + # Apply the generation operation to the cluster cohortsGenerated <- ParallelLogger::clusterApply( cluster, @@ -253,7 +257,15 @@ generateCohort <- function(cohortId = NULL, rlang::inform(paste0(i, "/", nrow(cohortDefinitionSet), "- Generating cohort: ", cohortName, " (id = ", cohortId, ")")) sql <- cohortDefinitionSet$sql[i] - if (!isSubset) { + if (isSubset) { + sql <- SqlRender::render( + sql = sql, + cdm_database_schema = cdmDatabaseSchema, + cohort_table = cohortTableNames$cohortTable, + cohort_database_schema = cohortDatabaseSchema, + warnOnMissingParameters = FALSE + ) + } else { # combined cohorts apply same paramaters as standard cohort generation sql <- SqlRender::render( sql = sql, cdm_database_schema = cdmDatabaseSchema, @@ -269,14 +281,6 @@ generateCohort <- function(cohortId = NULL, results_database_schema.cohort_censor_stats = paste(cohortDatabaseSchema, cohortTableNames$cohortCensorStatsTable, sep = "."), warnOnMissingParameters = FALSE ) - } else { - sql <- SqlRender::render( - sql = sql, - cdm_database_schema = cdmDatabaseSchema, - cohort_table = cohortTableNames$cohortTable, - cohort_database_schema = cohortDatabaseSchema, - warnOnMissingParameters = FALSE - ) } sql <- SqlRender::translate( sql = sql, diff --git a/R/CombinedCohort.R b/R/CombinedCohort.R new file mode 100644 index 0000000..65c4b42 --- /dev/null +++ b/R/CombinedCohort.R @@ -0,0 +1,250 @@ +# CombinedCohortOp ------------- +#' @title A Combined cohort operation used to UNION or INTERSECT cohorts. Note: only UNION supported. +#' @export +#' @description +#' Defines a UNION or INTERSECT on a set of cohorts. +CombinedCohortOp <- R6::R6Class( + classname = "CombinedCohortOp", + private = list( + .targetCohortIds = NULL, + .opType = "union" + ), + active = list( + #' @field targetCohortIds The list of cohorts to apply in this group. + targetCohortIds = function(targetCohortIds) { + if (missing(targetCohortIds)) { + return(private$.targetCohortIds) + } + checkmate::assertList(as.list(targetCohortIds), types="numeric", min.len = 0) + private$.targetCohortIds <- targetCohortIds + return(self) + }, + #' @field opType The group operation, either 'union' or 'intersect' + opType = function(opType) { + if (missing(opType)) { + return(private$.opType) + } + checkmate::assertChoice(opType, c("union")) + private$.opType <- opType + return(self) + } + ), + public = list( + #' @description + #' creates a new instance, using the provided data param if provided. + #' @param data the data (as a json string or list) to initialize with + initialize = function(data = list()) { + dataList <- .convertJSON(data) + + if ("targetCohortIds" %in% names (dataList)) self$targetCohortIds <- dataList$targetCohortIds + if ("opType" %in% names (dataList)) self$opType <- dataList$opType + }, + #' @description List representation of object + toList = function() { + .removeEmpty(list( + targetCohortIds = .toJsonArray(private$.targetCohortIds), + opType = jsonlite::unbox(private$.opType) + )) + }, + #' To JSON + #' @description json serialized representation of object + toJSON = function() { + .toJSON(self$toList()) + }, + + #' Is Equal to + #' @description Compare CombinedCohortGroup to another + #' @param other CombinedCohortGroup instance + isEqualTo = function(other) { + checkmate::assertR6(other, "CombinedCohortGroup") + return(other$toJSON() == self$toJSON()) + }, + #' getDependentCohortIds + #' @description Gets the dependent cohortIds from this operation + getDependentCohortIds = function() { + if (length(private$.targetCohortIds) == 0) return(c()) + return(private$.targetCohortIds) + } + ) +) + +# CombinedCohortDef ------------- +#' @title A CombinedCohortDef +#' @export +#' @description +#' Defines the ID, name and operation to produce a new cohort. +CombinedCohortDef <- R6::R6Class( + classname = "CombinedCohortDef", + private = list( + .cohortId = NA, + .cohortName = NA, + .expression = NULL + ), + active = list( + #' @field cohortId The list of cohorts to apply in this group. + cohortId = function(cohortId) { + if (missing(cohortId)) { + return(private$.cohortId) + } + checkmate::assertInt(cohortId) + private$.cohortId <- cohortId + return(self) + }, + #' @field cohortName the name given to this outcome definition + cohortName = function(cohortName) { + if (missing(cohortName)) { + private$.cohortName + } else { + # check type + checkmate::assertCharacter(cohortName) + private$.cohortName <- cohortName + self + } + }, + #' @field expression The combo operator that is the root of the definition. + expression = function(expression) { + if (missing(expression)) { + return(private$.expression) + } + checkmate::assertClass(expression, classes="CombinedCohortOp") + private$.expression <- expression + return(self) + } + ), + public = list( + #' @description + #' creates a new instance, using the provided data param if provided. + #' @param data the data (as a json string or list) to initialize with + initialize = function(data = list()) { + dataList <- .convertJSON(data) + + if ("cohortId" %in% names (dataList)) self$targetCohortIds <- dataList$cohortId + if ("cohortName" %in% names (dataList)) self$cohortName <- dataList$cohortName + if ("expression" %in% names (dataList)) self$expression<- CohortGenerator::CombinedCohortOp$new(dataList$expression) + }, + #' @description List representation of object + toList = function() { + .removeEmpty(list( + cohortId = jsonlite::unbox(private$.cohortId), + cohortName = jsonlite::unbox(private$.cohortName), + expression = .r6ToListOrNA(private$.expression) + )) + }, + #' To JSON + #' @description json serialized representation of object + toJSON = function() { + .toJSON(self$toList()) + }, + + #' Is Equal to + #' @description Compare CombinedCohortDef to another + #' @param other CombinedCohortDef instance + isEqualTo = function(other) { + checkmate::assertR6(other, "CombinedCohortDef") + return(other$toJSON() == self$toJSON()) + } + ) +) + +### Factory Functions + +#' Create CombinedCohortOp instance +#' @description +#' A factory function to create CombinedCohortOp +#' @export +#' @param targetCohortIds list of target cohort IDs to combine in this operation +#' @param opType The op type of this cohort combination, can only be 'union' +createCombinedCohortOp <- function(targetCohortIds, opType) { + + cohortOp <- CombinedCohortOp$new() + if (!missing(targetCohortIds)) cohortOp$targetCohortIds <- targetCohortIds + if (!missing(opType)) cohortOp$opType <- opType + + return (cohortOp); +} + +#' Defines a combined cohort using combined cohort operations +#' @description +#' Creates an instance of CombinedCohortDef with the provided cohortId, cohortName and the combine operator expression +#' @export +#' @param cohortId The output cohort id from applying the combine expression. +#' @param cohortName The output cohort name +#' @param expression The combine operator that will yield the final cohort. +createCombinedCohortDef <- function(cohortId, cohortName, expression) { + + cohortDef <- CombinedCohortDef$new(); + if (!missing(cohortId)) cohortDef$cohortId <- cohortId; + if (!missing(cohortName)) cohortDef$cohortName <- cohortName; + if (!missing(expression)) cohortDef$expression <- expression; + + return (cohortDef); +} + + + +#' Add cohort subset definition to a cohort definition set +#' @description +#' Given a subset definition and cohort definition set, this function returns a modified cohortDefinitionSet +#' That contains cohorts that's have parent's contained within the base cohortDefinitionSet +#' +#' Also adds the columns subsetParent and isSubset that denote if the cohort is a subset and what the parent definition +#' is. +#' @export +#' @param cohortDefinitionSet data.frame that conforms to CohortDefinitionSet +#' @param combinedCohortDefiniton CombinedCohortDefinition instance +#' @param overwriteExisting Overwrite existing subset definition of the same definitionId if present +addCombinedCohort <- function(cohortDefinitionSet, + combinedCohortDefiniton, + overwriteExisting = FALSE) { + checkmate::assertTRUE(isCohortDefinitionSet(cohortDefinitionSet)) + checkmate::assertR6(combinedCohortDefiniton, "CombinedCohortDef") + checkmate::assertTRUE(!is.null(combinedCohortDefiniton$expression)) + + if (!"dependentCohorts" %in% colnames(cohortDefinitionSet)) { + cohortDefinitionSet$dependentCohorts <- "" + } + + if (!"isCombinedCohort" %in% colnames(cohortDefinitionSet)) { + cohortDefinitionSet$isCombinedCohort <- FALSE + } + + dependentCohortIds <- combinedCohortDefiniton$expression$getDependentCohortIds() + if (length(dependentCohortIds) > 0) { + checkmate::assertSubset(dependentCohortIds, cohortDefinitionSet$cohortId) + } + + # TODO: going to just fail if the cohortId exists for now, later we can implement overwrite + if (!overwriteExisting && nrow(cohortDefinitionSet %>% dplyr::filter(.data$cohortId == combinedCohortDefiniton$cohortId)) > 0) { + stop("The specified cohortId for this combined cohort already exists in the cohort definition set") + } else { + # remove this definition from the set + cohortDefinitionSet <- cohortDefinitionSet %>% filter(cohortId != combinedCohortDefiniton$cohortId) + } + + defSql <- SqlRender::readSql(system.file("sql", "sql_server", "combinedCohorts", "CombinedCohortDefinition.sql", package = "CohortGenerator")) + + queryBuilder <- CombinedCohortQueryBuilder$new() + combinationQuery <- queryBuilder$buildQuery(combinedCohortDefiniton) + + defSql <- SqlRender::render(defSql, + output_cohort_id = combinedCohortDefiniton$cohortId, + combined_cohort_query = combinationQuery) + + cohortDefinitionSet <- + dplyr::bind_rows( + cohortDefinitionSet, + data.frame( + cohortId = combinedCohortDefiniton$cohortId, + cohortName = combinedCohortDefiniton$cohortName, + sql = defSql, + json = as.character(combinedCohortDefiniton$toJSON()), + isCombinedCohort = TRUE, + dependentCohorts = paste0(dependentCohortIds, collapse = ",") + ) + ) + + attr(cohortDefinitionSet, "hasCombinedCohorts") <- TRUE + + return(cohortDefinitionSet) +} + diff --git a/R/CombinedCohortQueryBuilder.R b/R/CombinedCohortQueryBuilder.R new file mode 100644 index 0000000..9d9d7f0 --- /dev/null +++ b/R/CombinedCohortQueryBuilder.R @@ -0,0 +1,16 @@ +CombinedCohortQueryBuilder <- R6::R6Class( + classname = "CombinedCohortQueryBuilder", + private = list( + ), + public = list( + buildQuery = function(combinedCohortDefiniton) { + checkmate::assertR6(combinedCohortDefiniton, "CombinedCohortDef") + sql <- SqlRender::readSql(system.file("sql", "sql_server", "combinedCohorts", "unionCombine.sql", package = "CohortGenerator")) + sql <- SqlRender::render(sql, + target_cohort_ids = combinedCohortDefiniton$expression$targetCohortIds, + warnOnMissingParameters = TRUE + ) + return(sql) + } + ) +) diff --git a/R/SerializeUtils.R b/R/SerializeUtils.R new file mode 100644 index 0000000..cf19602 --- /dev/null +++ b/R/SerializeUtils.R @@ -0,0 +1,69 @@ +.loadJson <- function(definition, simplifyVector = FALSE, simplifyDataFrame = FALSE, ...) { + if (is.character(definition)) { + definition <- jsonlite::fromJSON(definition, + simplifyVector = simplifyVector, + simplifyDataFrame = simplifyDataFrame, + ... + ) + } + + if (!is.list(definition)) { + stop("Cannot instanitate object invalid type ", class(definition)) + } + definition +} + +.toJSON <- function(obj) { + jsonlite::toJSON(obj, pretty = TRUE) +} + +# For R6 classes that have a toList() function, this function handles unboxing of logicals and NAs, lists of +# R6 classes, or just a single R6 class. +.r6ToListOrNA <- function(x) { + if (length(x) == 0) { + return(invisible(list())) + } else if (is.logical(x) && is.na(x)) { + return(invisible(jsonlite::unbox(NA))) + } else if (checkmate::testList(x)) { + return(invisible(lapply(x, function(item) { item$toList() }))) + } else { + return(invisible(x$toList())) + } +} + +# Applies unbox to scalars and null values, and unlists lists > 0. +.toJsonArray <- function(x) { + if (checkmate::testScalarNA(x) || checkmate::testNull(x)) { + return(jsonlite::unbox(NA)) + } else if (length(x) > 0) { + return(unlist(x)) + }else { + return(list()) + } +} + +# removes any na elements from list +.removeEmpty <- function(x) { + Filter(Negate(anyNA),x) +} + +# Converts list or json into well-formed, empty-removed list. +.convertJSON <- function(data) { + if (checkmate::testString(data)) { + return(.removeEmpty(.nullToNa(jsonlite::fromJSON(data, simplifyDataFrame = FALSE)))) + } else if (checkmate::testList(data)) { + return(.removeEmpty(data)) + } else { + stop("Error: Attempting to initalize R6 class witn non-list or non-string") + } +} + +# Converts null values to NA to serlize json properly +.nullToNa <- function(obj) { + if (is.list(obj)) { + obj <- lapply(obj, function(x) if (is.null(x)) NA else x) + obj <- lapply(obj, .nullToNa) + } + return(obj) +} + diff --git a/R/Subsets.R b/R/Subsets.R index 7bbcc5a..324e5b8 100644 --- a/R/Subsets.R +++ b/R/Subsets.R @@ -14,26 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -.loadJson <- function(definition, simplifyVector = FALSE, simplifyDataFrame = FALSE, ...) { - if (is.character(definition)) { - definition <- jsonlite::fromJSON(definition, - simplifyVector = simplifyVector, - simplifyDataFrame = simplifyDataFrame, - ... - ) - } - - if (!is.list(definition)) { - stop("Cannot instanitate object invalid type ", class(definition)) - } - definition -} - -.toJSON <- function(obj) { - jsonlite::toJSON(obj, pretty = TRUE) -} - - # SubsetCohortWindow ------------- #' @title Time Window For Cohort Subset Operator #' @export diff --git a/inst/sql/sql_server/combinedCohorts/CombinedCohortDefinition.sql b/inst/sql/sql_server/combinedCohorts/CombinedCohortDefinition.sql new file mode 100644 index 0000000..ad1bd9e --- /dev/null +++ b/inst/sql/sql_server/combinedCohorts/CombinedCohortDefinition.sql @@ -0,0 +1,16 @@ +select subject_id, cohort_start_date, cohort_end_date +INTO #combined_cohort +FROM ( + @combined_cohort_query +) Q; + + +INSERT INTO @target_database_schema.@target_cohort_table +SELECT + @output_cohort_id as cohort_definition_id, + subject_id, + cohort_start_date, + cohort_end_date +FROM #combined_cohort; + +DROP TABLE #combined_cohort; diff --git a/inst/sql/sql_server/combinedCohorts/unionCombine.sql b/inst/sql/sql_server/combinedCohorts/unionCombine.sql new file mode 100644 index 0000000..eb41e12 --- /dev/null +++ b/inst/sql/sql_server/combinedCohorts/unionCombine.sql @@ -0,0 +1,14 @@ +select subject_id, min(cohort_start_date) as cohort_start_date, max(cohort_end_date) as cohort_end_date +from ( + select subject_id, cohort_start_date, cohort_end_date, sum(is_start) over (partition by subject_id order by cohort_start_date, is_start desc rows unbounded preceding) group_idx + from ( + select subject_id, cohort_start_date, cohort_end_date, + case when max(cohort_end_date) over (partition by subject_id order by cohort_start_date rows between unbounded preceding and 1 preceding) >= cohort_start_date then 0 else 1 end is_start + from ( + select subject_id, cohort_start_date, cohort_end_date + from @target_database_schema.@target_cohort_table + WHERE cohort_definition_id in (@target_cohort_ids) + ) CR + ) ST +) GR +group by subject_id, group_idx \ No newline at end of file diff --git a/man/CombinedCohortDef.Rd b/man/CombinedCohortDef.Rd new file mode 100644 index 0000000..ae61ed9 --- /dev/null +++ b/man/CombinedCohortDef.Rd @@ -0,0 +1,103 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CombinedCohort.R +\name{CombinedCohortDef} +\alias{CombinedCohortDef} +\title{A CombinedCohortDef} +\description{ +Defines the ID, name and operation to produce a new cohort. +} +\section{Active bindings}{ +\if{html}{\out{
}} +\describe{ +\item{\code{cohortId}}{The list of cohorts to apply in this group.} + +\item{\code{cohortName}}{the name given to this outcome definition} + +\item{\code{expression}}{The combo operator that is the root of the definition.} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-CombinedCohortDef-new}{\code{CombinedCohortDef$new()}} +\item \href{#method-CombinedCohortDef-toList}{\code{CombinedCohortDef$toList()}} +\item \href{#method-CombinedCohortDef-toJSON}{\code{CombinedCohortDef$toJSON()}} +\item \href{#method-CombinedCohortDef-isEqualTo}{\code{CombinedCohortDef$isEqualTo()}} +\item \href{#method-CombinedCohortDef-clone}{\code{CombinedCohortDef$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-CombinedCohortDef-new}{}}} +\subsection{Method \code{new()}}{ +creates a new instance, using the provided data param if provided. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{CombinedCohortDef$new(data = list())}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{data}}{the data (as a json string or list) to initialize with} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-CombinedCohortDef-toList}{}}} +\subsection{Method \code{toList()}}{ +List representation of object +To JSON +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{CombinedCohortDef$toList()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-CombinedCohortDef-toJSON}{}}} +\subsection{Method \code{toJSON()}}{ +json serialized representation of object +Is Equal to +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{CombinedCohortDef$toJSON()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-CombinedCohortDef-isEqualTo}{}}} +\subsection{Method \code{isEqualTo()}}{ +Compare CombinedCohortDef to another +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{CombinedCohortDef$isEqualTo(other)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{other}}{CombinedCohortDef instance} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-CombinedCohortDef-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{CombinedCohortDef$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/CombinedCohortOp.Rd b/man/CombinedCohortOp.Rd new file mode 100644 index 0000000..8faea20 --- /dev/null +++ b/man/CombinedCohortOp.Rd @@ -0,0 +1,113 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CombinedCohort.R +\name{CombinedCohortOp} +\alias{CombinedCohortOp} +\title{A Combined cohort operation used to UNION or INTERSECT cohorts. Note: only UNION supported.} +\description{ +Defines a UNION or INTERSECT on a set of cohorts. +} +\section{Active bindings}{ +\if{html}{\out{
}} +\describe{ +\item{\code{targetCohortIds}}{The list of cohorts to apply in this group.} + +\item{\code{opType}}{The group operation, either 'union' or 'intersect'} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-CombinedCohortOp-new}{\code{CombinedCohortOp$new()}} +\item \href{#method-CombinedCohortOp-toList}{\code{CombinedCohortOp$toList()}} +\item \href{#method-CombinedCohortOp-toJSON}{\code{CombinedCohortOp$toJSON()}} +\item \href{#method-CombinedCohortOp-isEqualTo}{\code{CombinedCohortOp$isEqualTo()}} +\item \href{#method-CombinedCohortOp-getDependentCohortIds}{\code{CombinedCohortOp$getDependentCohortIds()}} +\item \href{#method-CombinedCohortOp-clone}{\code{CombinedCohortOp$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-CombinedCohortOp-new}{}}} +\subsection{Method \code{new()}}{ +creates a new instance, using the provided data param if provided. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{CombinedCohortOp$new(data = list())}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{data}}{the data (as a json string or list) to initialize with} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-CombinedCohortOp-toList}{}}} +\subsection{Method \code{toList()}}{ +List representation of object +To JSON +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{CombinedCohortOp$toList()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-CombinedCohortOp-toJSON}{}}} +\subsection{Method \code{toJSON()}}{ +json serialized representation of object +Is Equal to +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{CombinedCohortOp$toJSON()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-CombinedCohortOp-isEqualTo}{}}} +\subsection{Method \code{isEqualTo()}}{ +Compare CombinedCohortGroup to another +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{CombinedCohortOp$isEqualTo(other)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{other}}{CombinedCohortGroup instance +getDependentCohortIds} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-CombinedCohortOp-getDependentCohortIds}{}}} +\subsection{Method \code{getDependentCohortIds()}}{ +Gets the dependent cohortIds from this operation +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{CombinedCohortOp$getDependentCohortIds()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-CombinedCohortOp-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{CombinedCohortOp$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/addCombinedCohort.Rd b/man/addCombinedCohort.Rd new file mode 100644 index 0000000..b3e5f85 --- /dev/null +++ b/man/addCombinedCohort.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CombinedCohort.R +\name{addCombinedCohort} +\alias{addCombinedCohort} +\title{Add cohort subset definition to a cohort definition set} +\usage{ +addCombinedCohort( + cohortDefinitionSet, + combinedCohortDefiniton, + overwriteExisting = FALSE +) +} +\arguments{ +\item{cohortDefinitionSet}{data.frame that conforms to CohortDefinitionSet} + +\item{combinedCohortDefiniton}{CombinedCohortDefinition instance} + +\item{overwriteExisting}{Overwrite existing subset definition of the same definitionId if present} +} +\description{ +Given a subset definition and cohort definition set, this function returns a modified cohortDefinitionSet +That contains cohorts that's have parent's contained within the base cohortDefinitionSet + +Also adds the columns subsetParent and isSubset that denote if the cohort is a subset and what the parent definition +is. +} diff --git a/man/createCombinedCohortDef.Rd b/man/createCombinedCohortDef.Rd new file mode 100644 index 0000000..c7bfff4 --- /dev/null +++ b/man/createCombinedCohortDef.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CombinedCohort.R +\name{createCombinedCohortDef} +\alias{createCombinedCohortDef} +\title{Defines a combined cohort using combined cohort operations} +\usage{ +createCombinedCohortDef(cohortId, cohortName, expression) +} +\arguments{ +\item{cohortId}{The output cohort id from applying the combine expression.} + +\item{cohortName}{The output cohort name} + +\item{expression}{The combine operator that will yield the final cohort.} +} +\description{ +Creates an instance of CombinedCohortDef with the provided cohortId, cohortName and the combine operator expression +} diff --git a/man/createCombinedCohortOp.Rd b/man/createCombinedCohortOp.Rd new file mode 100644 index 0000000..3d829eb --- /dev/null +++ b/man/createCombinedCohortOp.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CombinedCohort.R +\name{createCombinedCohortOp} +\alias{createCombinedCohortOp} +\title{Create CombinedCohortOp instance} +\usage{ +createCombinedCohortOp(targetCohortIds, opType) +} +\arguments{ +\item{targetCohortIds}{list of target cohort IDs to combine in this operation} + +\item{opType}{The op type of this cohort combination, can only be 'union'} +} +\description{ +A factory function to create CombinedCohortOp +} diff --git a/tests/testthat/test-CombinationCohorts.R b/tests/testthat/test-CombinationCohorts.R new file mode 100644 index 0000000..a106471 --- /dev/null +++ b/tests/testthat/test-CombinationCohorts.R @@ -0,0 +1,66 @@ +test_that("combination cohort generation", { + + recordKeepingFolder = file.path(outputFolder, "RecordKeeping") + + createCohortSql <- function (subjectId, cohortStartDate, cohortEndDate) { + templateSql = +"insert into @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date) + values (@target_cohort_id, @subject_id, CAST('@cohort_start_date' AS DATE), CAST('@cohort_end_date' AS DATE));" + return (SqlRender::render(templateSql, + subject_id = subjectId, + cohort_start_date = cohortStartDate, + cohort_end_date = cohortEndDate)) + } + + testCohortDefinitionSet <- data.frame(cohortId = c(50:52), + cohortName = paste0('Cohort ',c(50:52)), + sql=c( + createCohortSql(1, "20160101", "20160301"), + createCohortSql(1, "20160201", "20160401"), + createCohortSql(1, "20160301", "20160501") + ), + json=c("{}","{}","{}")) + + cohortDefinitionSet <- rbind(CohortGenerator::createEmptyCohortDefinitionSet(), testCohortDefinitionSet) + + #build cohort and combined cohort defs + + combinedCohortOp <- CohortGenerator::createCombinedCohortOp(targetCohortIds = c(50,51,52), opType = "union") + combinedCohortDef <- CohortGenerator::createCombinedCohortDef(cohortId = 53, cohortName="Cohort 50,51,52 combo", expression = combinedCohortOp) + + cohortDefinitionSet <- cohortDefinitionSet %>% + CohortGenerator::addCombinedCohort(combinedCohortDef) + + cohortTableNames <- CohortGenerator::getCohortTableNames(cohortTable = "combinedCohorts_cohort") + + connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + on.exit(DatabaseConnector::disconnect(connection)) + + CohortGenerator::createCohortTables( + connection = connection, + cohortDatabaseSchema = "main", + cohortTableNames = cohortTableNames + ) + + cohortsGenerated <- CohortGenerator::generateCohortSet( + connection = connection, + cdmDatabaseSchema = "main", + cohortDatabaseSchema = "main", + cohortTableNames = cohortTableNames, + cohortDefinitionSet = cohortDefinitionSet, + incremental = FALSE, + incrementalFolder = recordKeepingFolder + ) + + # how to test + resultSql <- "select cohort_definition_id, subject_id, cohort_start_date, cohort_end_date FROM main.@cohort_table where cohort_definition_id = 53" + resultSql <- SqlRender::render(resultSql, cohort_table = cohortTableNames$cohortTable) + resultsDF <- DatabaseConnector::querySql(connection = connection, SqlRender::translate(resultSql, targetDialect = "sqlite")) + + expect_true(nrow(resultsDF) == 1) + expect_true(resultsDF %>% dplyr::select("COHORT_START_DATE") %>% dplyr::pull() == "2016-01-01") + expect_true(resultsDF %>% dplyr::select("COHORT_END_DATE") %>% dplyr::pull() == "2016-05-01") + + unlink(recordKeepingFolder, recursive = TRUE) + +}) \ No newline at end of file From d0734ee1d80472e30f43938040e11396be32727e Mon Sep 17 00:00:00 2001 From: Chris Knoll Date: Mon, 23 Sep 2024 10:33:11 -0400 Subject: [PATCH 2/2] Address git action warnings/errors. --- R/CombinedCohort.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/CombinedCohort.R b/R/CombinedCohort.R index 65c4b42..4e1d918 100644 --- a/R/CombinedCohort.R +++ b/R/CombinedCohort.R @@ -213,12 +213,11 @@ addCombinedCohort <- function(cohortDefinitionSet, checkmate::assertSubset(dependentCohortIds, cohortDefinitionSet$cohortId) } - # TODO: going to just fail if the cohortId exists for now, later we can implement overwrite if (!overwriteExisting && nrow(cohortDefinitionSet %>% dplyr::filter(.data$cohortId == combinedCohortDefiniton$cohortId)) > 0) { stop("The specified cohortId for this combined cohort already exists in the cohort definition set") } else { # remove this definition from the set - cohortDefinitionSet <- cohortDefinitionSet %>% filter(cohortId != combinedCohortDefiniton$cohortId) + cohortDefinitionSet <- cohortDefinitionSet %>% dplyr::filter(.data$cohortId != combinedCohortDefiniton$cohortId) } defSql <- SqlRender::readSql(system.file("sql", "sql_server", "combinedCohorts", "CombinedCohortDefinition.sql", package = "CohortGenerator"))