From 0c73341f33e295253bd1a715d170ded788e8a033 Mon Sep 17 00:00:00 2001
From: Chris Knoll <cknoll@ohdsi.org>
Date: Mon, 23 Sep 2024 00:15:20 -0400
Subject: [PATCH 1/2] Added implementation of 'combination cohorts'. Added
 simple test.

---
 NAMESPACE                                     |   5 +
 R/CohortConstruction.R                        |  40 +--
 R/CombinedCohort.R                            | 250 ++++++++++++++++++
 R/CombinedCohortQueryBuilder.R                |  16 ++
 R/SerializeUtils.R                            |  69 +++++
 R/Subsets.R                                   |  20 --
 .../CombinedCohortDefinition.sql              |  16 ++
 .../combinedCohorts/unionCombine.sql          |  14 +
 man/CombinedCohortDef.Rd                      | 103 ++++++++
 man/CombinedCohortOp.Rd                       | 113 ++++++++
 man/addCombinedCohort.Rd                      |  26 ++
 man/createCombinedCohortDef.Rd                |  18 ++
 man/createCombinedCohortOp.Rd                 |  16 ++
 tests/testthat/test-CombinationCohorts.R      |  66 +++++
 14 files changed, 734 insertions(+), 38 deletions(-)
 create mode 100644 R/CombinedCohort.R
 create mode 100644 R/CombinedCohortQueryBuilder.R
 create mode 100644 R/SerializeUtils.R
 create mode 100644 inst/sql/sql_server/combinedCohorts/CombinedCohortDefinition.sql
 create mode 100644 inst/sql/sql_server/combinedCohorts/unionCombine.sql
 create mode 100644 man/CombinedCohortDef.Rd
 create mode 100644 man/CombinedCohortOp.Rd
 create mode 100644 man/addCombinedCohort.Rd
 create mode 100644 man/createCombinedCohortDef.Rd
 create mode 100644 man/createCombinedCohortOp.Rd
 create mode 100644 tests/testthat/test-CombinationCohorts.R

diff --git a/NAMESPACE b/NAMESPACE
index 62124bb..17fb2f8 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,16 +2,21 @@
 
 export(CohortSubsetDefinition)
 export(CohortSubsetOperator)
+export(CombinedCohortDef)
+export(CombinedCohortOp)
 export(DemographicSubsetOperator)
 export(LimitSubsetOperator)
 export(SubsetCohortWindow)
 export(SubsetOperator)
 export(addCohortSubsetDefinition)
+export(addCombinedCohort)
 export(checkAndFixCohortDefinitionSetDataTypes)
 export(computeChecksum)
 export(createCohortSubset)
 export(createCohortSubsetDefinition)
 export(createCohortTables)
+export(createCombinedCohortDef)
+export(createCombinedCohortOp)
 export(createDemographicSubset)
 export(createEmptyCohortDefinitionSet)
 export(createEmptyNegativeControlOutcomeCohortSet)
diff --git a/R/CohortConstruction.R b/R/CohortConstruction.R
index e27d222..062ecc1 100644
--- a/R/CohortConstruction.R
+++ b/R/CohortConstruction.R
@@ -106,24 +106,28 @@ generateCohortSet <- function(connectionDetails = NULL,
   if (incremental) {
     recordKeepingFile <- file.path(incrementalFolder, "GeneratedCohorts.csv")
 
-    if (isTRUE(attr(cohortDefinitionSet, "hasSubsetDefinitions"))) {
-      cohortDefinitionSet$checksum <- ""
-      for (i in 1:nrow(cohortDefinitionSet)) {
+    cohortDefinitionSet$checksum <- ""
+    for (i in 1:nrow(cohortDefinitionSet)) {
+      if (isTRUE(attr(cohortDefinitionSet, "hasSubsetDefinitions"))) {
         # This implementation supports recursive definitions (subsetting subsets) because the subsets have to be added in order
-        if (cohortDefinitionSet$subsetParent[i] != cohortDefinitionSet$cohortId[i]) {
+        if (cohortDefinitionSet$isSubset[i] && cohortDefinitionSet$subsetParent[i] != cohortDefinitionSet$cohortId[i]) {
           j <- which(cohortDefinitionSet$cohortId == cohortDefinitionSet$subsetParent[i])
           cohortDefinitionSet$checksum[i] <- computeChecksum(paste(
             cohortDefinitionSet$sql[j],
             cohortDefinitionSet$sql[i]
           ))
-        } else {
-          cohortDefinitionSet$checksum[i] <- computeChecksum(cohortDefinitionSet$sql[i])
         }
+      } else if (isTRUE(attr(cohortDefinitionSet, "hasCombinedCohorts"))) {
+        dependantCohortIds <- as.integer(strsplit(cohortDefinitionSet$dependentCohorts[i]))
+        dependentCohortIdx <- which(cohortDefinitionSet$cohortId %in% dependantCohortIds)
+        cohortDefinitionSet$checksum[i] <- 
+          computeChecksum(paste0(c(cohortDefinitionSet$sql[dependentCohortIdx], cohortDefinitionSet$sql[i]), collapse = ""))
+      } else {
+        cohortDefinitionSet$checksum <- computeChecksum(cohortDefinitionSet$sql)
       }
-    } else {
-      cohortDefinitionSet$checksum <- computeChecksum(cohortDefinitionSet$sql)
     }
   }
+  
   # Create the cluster
   # DEV NOTE :: running subsets in a multiprocess setup will not work with subsets that subset other subsets
   # To resolve this issue we need to execute the dependency tree.
@@ -145,7 +149,7 @@ generateCohortSet <- function(connectionDetails = NULL,
       dplyr::select("cohortId") %>%
       dplyr::pull()
   }
-
+  
   # Apply the generation operation to the cluster
   cohortsGenerated <- ParallelLogger::clusterApply(
     cluster,
@@ -253,7 +257,15 @@ generateCohort <- function(cohortId = NULL,
     rlang::inform(paste0(i, "/", nrow(cohortDefinitionSet), "- Generating cohort: ", cohortName, " (id = ", cohortId, ")"))
     sql <- cohortDefinitionSet$sql[i]
 
-    if (!isSubset) {
+    if (isSubset) {
+      sql <- SqlRender::render(
+        sql = sql,
+        cdm_database_schema = cdmDatabaseSchema,
+        cohort_table = cohortTableNames$cohortTable,
+        cohort_database_schema = cohortDatabaseSchema,
+        warnOnMissingParameters = FALSE
+      )
+    } else {  # combined cohorts apply same paramaters as standard cohort generation
       sql <- SqlRender::render(
         sql = sql,
         cdm_database_schema = cdmDatabaseSchema,
@@ -269,14 +281,6 @@ generateCohort <- function(cohortId = NULL,
         results_database_schema.cohort_censor_stats = paste(cohortDatabaseSchema, cohortTableNames$cohortCensorStatsTable, sep = "."),
         warnOnMissingParameters = FALSE
       )
-    } else {
-      sql <- SqlRender::render(
-        sql = sql,
-        cdm_database_schema = cdmDatabaseSchema,
-        cohort_table = cohortTableNames$cohortTable,
-        cohort_database_schema = cohortDatabaseSchema,
-        warnOnMissingParameters = FALSE
-      )
     }
     sql <- SqlRender::translate(
       sql = sql,
diff --git a/R/CombinedCohort.R b/R/CombinedCohort.R
new file mode 100644
index 0000000..65c4b42
--- /dev/null
+++ b/R/CombinedCohort.R
@@ -0,0 +1,250 @@
+# CombinedCohortOp -------------
+#' @title A Combined cohort operation used to UNION or INTERSECT cohorts.  Note: only UNION supported.
+#' @export
+#' @description
+#' Defines a UNION or INTERSECT on a set of cohorts.
+CombinedCohortOp <- R6::R6Class(
+  classname = "CombinedCohortOp",
+  private = list(
+    .targetCohortIds = NULL,
+    .opType = "union"
+  ),
+  active = list(
+    #' @field targetCohortIds The list of cohorts to apply in this group.
+    targetCohortIds = function(targetCohortIds) {
+      if (missing(targetCohortIds)) {
+        return(private$.targetCohortIds)
+      }
+      checkmate::assertList(as.list(targetCohortIds), types="numeric", min.len = 0)
+      private$.targetCohortIds <- targetCohortIds
+      return(self)
+    },
+    #' @field opType The group operation, either 'union' or 'intersect'
+    opType = function(opType) {
+      if (missing(opType)) {
+        return(private$.opType)
+      }
+      checkmate::assertChoice(opType, c("union"))
+      private$.opType <- opType
+      return(self)
+    }
+  ),
+  public = list(
+    #' @description
+    #' creates a new instance, using the provided data param if provided.
+    #' @param data the data (as a json string or list) to initialize with
+    initialize = function(data = list()) {
+      dataList <- .convertJSON(data)
+      
+      if ("targetCohortIds" %in% names (dataList)) self$targetCohortIds <- dataList$targetCohortIds
+      if ("opType" %in% names (dataList)) self$opType <- dataList$opType
+    },
+    #' @description List representation of object
+    toList = function() {
+      .removeEmpty(list(
+        targetCohortIds = .toJsonArray(private$.targetCohortIds),
+        opType = jsonlite::unbox(private$.opType)
+      ))      
+    },
+    #' To JSON
+    #' @description json serialized representation of object
+    toJSON = function() {
+      .toJSON(self$toList())
+    },
+    
+    #' Is Equal to
+    #' @description Compare CombinedCohortGroup to another
+    #' @param other CombinedCohortGroup instance
+    isEqualTo = function(other) {
+      checkmate::assertR6(other, "CombinedCohortGroup")
+      return(other$toJSON() == self$toJSON())
+    },
+    #' getDependentCohortIds
+    #' @description Gets the dependent cohortIds from this operation
+    getDependentCohortIds = function() {
+      if (length(private$.targetCohortIds) == 0) return(c())
+      return(private$.targetCohortIds)
+    }
+  )
+)
+
+# CombinedCohortDef -------------
+#' @title A CombinedCohortDef
+#' @export
+#' @description
+#' Defines the ID, name and operation to produce a new cohort.
+CombinedCohortDef <- R6::R6Class(
+  classname = "CombinedCohortDef",
+  private = list(
+    .cohortId = NA,
+    .cohortName = NA,
+    .expression = NULL
+  ),
+  active = list(
+    #' @field cohortId The list of cohorts to apply in this group.
+    cohortId = function(cohortId) {
+      if (missing(cohortId)) {
+        return(private$.cohortId)
+      }
+      checkmate::assertInt(cohortId)
+      private$.cohortId <- cohortId
+      return(self)
+    },
+    #' @field cohortName the name given to this outcome definition
+    cohortName = function(cohortName) {
+      if (missing(cohortName)) {
+        private$.cohortName
+      } else {
+        # check type
+        checkmate::assertCharacter(cohortName)
+        private$.cohortName <- cohortName
+        self
+      }
+    },    
+    #' @field expression The combo operator that is the root of the definition.
+    expression = function(expression) {
+      if (missing(expression)) {
+        return(private$.expression)
+      }
+      checkmate::assertClass(expression, classes="CombinedCohortOp")
+      private$.expression <- expression
+      return(self)
+    }
+  ),
+  public = list(
+    #' @description
+    #' creates a new instance, using the provided data param if provided.
+    #' @param data the data (as a json string or list) to initialize with
+    initialize = function(data = list()) {
+      dataList <- .convertJSON(data)
+      
+      if ("cohortId" %in% names (dataList)) self$targetCohortIds <- dataList$cohortId
+      if ("cohortName" %in% names (dataList)) self$cohortName <- dataList$cohortName
+      if ("expression" %in% names (dataList)) self$expression<- CohortGenerator::CombinedCohortOp$new(dataList$expression)
+    },
+    #' @description List representation of object
+    toList = function() {
+      .removeEmpty(list(
+        cohortId = jsonlite::unbox(private$.cohortId),
+        cohortName = jsonlite::unbox(private$.cohortName),
+        expression = .r6ToListOrNA(private$.expression)
+      ))      
+    },
+    #' To JSON
+    #' @description json serialized representation of object
+    toJSON = function() {
+      .toJSON(self$toList())
+    },
+    
+    #' Is Equal to
+    #' @description Compare CombinedCohortDef to another
+    #' @param other CombinedCohortDef instance
+    isEqualTo = function(other) {
+      checkmate::assertR6(other, "CombinedCohortDef")
+      return(other$toJSON() == self$toJSON())
+    }
+  )
+)
+
+### Factory Functions
+
+#' Create CombinedCohortOp instance
+#' @description
+#' A factory function to create CombinedCohortOp
+#' @export
+#' @param targetCohortIds           list of target cohort IDs to combine in this operation
+#' @param opType                    The op type of this cohort combination, can only be 'union'
+createCombinedCohortOp <- function(targetCohortIds, opType) {
+  
+  cohortOp <- CombinedCohortOp$new()
+  if (!missing(targetCohortIds)) cohortOp$targetCohortIds <- targetCohortIds
+  if (!missing(opType)) cohortOp$opType <- opType
+
+  return (cohortOp);
+}
+
+#' Defines a combined cohort using combined cohort operations
+#' @description
+#' Creates an instance of CombinedCohortDef with the provided cohortId, cohortName and the combine operator expression
+#' @export
+#' @param cohortId       The output cohort id from applying the combine expression.
+#' @param cohortName     The output cohort name
+#' @param expression     The combine operator that will yield the final cohort.
+createCombinedCohortDef <- function(cohortId, cohortName, expression) {
+  
+  cohortDef <- CombinedCohortDef$new();
+  if (!missing(cohortId)) cohortDef$cohortId <- cohortId;
+  if (!missing(cohortName)) cohortDef$cohortName <- cohortName;
+  if (!missing(expression)) cohortDef$expression <- expression;
+  
+  return (cohortDef);
+}
+
+
+
+#' Add cohort subset definition to a cohort definition set
+#' @description
+#' Given a subset definition and cohort definition set, this function returns a modified cohortDefinitionSet
+#' That contains cohorts that's have parent's contained within the base cohortDefinitionSet
+#'
+#' Also adds the columns subsetParent and isSubset that denote if the cohort is a subset and what the parent definition
+#' is.
+#' @export
+#' @param cohortDefinitionSet       data.frame that conforms to CohortDefinitionSet
+#' @param combinedCohortDefiniton   CombinedCohortDefinition instance
+#' @param overwriteExisting         Overwrite existing subset definition of the same definitionId if present
+addCombinedCohort <- function(cohortDefinitionSet,
+                                      combinedCohortDefiniton,
+                                      overwriteExisting = FALSE) {
+  checkmate::assertTRUE(isCohortDefinitionSet(cohortDefinitionSet))
+  checkmate::assertR6(combinedCohortDefiniton, "CombinedCohortDef")
+  checkmate::assertTRUE(!is.null(combinedCohortDefiniton$expression))
+  
+  if (!"dependentCohorts" %in% colnames(cohortDefinitionSet)) {
+    cohortDefinitionSet$dependentCohorts <- ""
+  }
+  
+  if (!"isCombinedCohort" %in% colnames(cohortDefinitionSet)) {
+    cohortDefinitionSet$isCombinedCohort <- FALSE
+  }
+  
+  dependentCohortIds <- combinedCohortDefiniton$expression$getDependentCohortIds()
+  if (length(dependentCohortIds) > 0) {
+    checkmate::assertSubset(dependentCohortIds, cohortDefinitionSet$cohortId)
+  }
+
+  # TODO: going to just fail if the cohortId exists for now, later we can implement overwrite
+  if (!overwriteExisting && nrow(cohortDefinitionSet %>% dplyr::filter(.data$cohortId == combinedCohortDefiniton$cohortId)) > 0) {
+    stop("The specified cohortId for this combined cohort already exists in the cohort definition set")
+  } else {
+    # remove this definition from the set
+    cohortDefinitionSet <- cohortDefinitionSet %>% filter(cohortId != combinedCohortDefiniton$cohortId)
+  }
+
+  defSql <- SqlRender::readSql(system.file("sql", "sql_server", "combinedCohorts", "CombinedCohortDefinition.sql", package = "CohortGenerator"))
+  
+  queryBuilder <- CombinedCohortQueryBuilder$new()
+  combinationQuery <- queryBuilder$buildQuery(combinedCohortDefiniton)
+  
+  defSql <- SqlRender::render(defSql, 
+                              output_cohort_id = combinedCohortDefiniton$cohortId,
+                              combined_cohort_query = combinationQuery)
+  
+  cohortDefinitionSet <-
+    dplyr::bind_rows(
+      cohortDefinitionSet,
+      data.frame(
+        cohortId = combinedCohortDefiniton$cohortId,
+        cohortName = combinedCohortDefiniton$cohortName,
+        sql = defSql,
+        json = as.character(combinedCohortDefiniton$toJSON()),
+        isCombinedCohort = TRUE,
+        dependentCohorts = paste0(dependentCohortIds, collapse = ",")
+      )
+    )
+  
+  attr(cohortDefinitionSet, "hasCombinedCohorts") <- TRUE
+  
+  return(cohortDefinitionSet)
+}
+
diff --git a/R/CombinedCohortQueryBuilder.R b/R/CombinedCohortQueryBuilder.R
new file mode 100644
index 0000000..9d9d7f0
--- /dev/null
+++ b/R/CombinedCohortQueryBuilder.R
@@ -0,0 +1,16 @@
+CombinedCohortQueryBuilder <- R6::R6Class(
+  classname = "CombinedCohortQueryBuilder",
+  private = list(
+  ),
+  public = list(
+    buildQuery = function(combinedCohortDefiniton) {
+      checkmate::assertR6(combinedCohortDefiniton, "CombinedCohortDef")
+      sql <- SqlRender::readSql(system.file("sql", "sql_server", "combinedCohorts", "unionCombine.sql", package = "CohortGenerator"))
+      sql <- SqlRender::render(sql,
+                               target_cohort_ids = combinedCohortDefiniton$expression$targetCohortIds,
+                               warnOnMissingParameters = TRUE
+      )
+      return(sql)
+    }
+  )
+)
diff --git a/R/SerializeUtils.R b/R/SerializeUtils.R
new file mode 100644
index 0000000..cf19602
--- /dev/null
+++ b/R/SerializeUtils.R
@@ -0,0 +1,69 @@
+.loadJson <- function(definition, simplifyVector = FALSE, simplifyDataFrame = FALSE, ...) {
+  if (is.character(definition)) {
+    definition <- jsonlite::fromJSON(definition,
+                                     simplifyVector = simplifyVector,
+                                     simplifyDataFrame = simplifyDataFrame,
+                                     ...
+    )
+  }
+  
+  if (!is.list(definition)) {
+    stop("Cannot instanitate object invalid type ", class(definition))
+  }
+  definition
+}
+
+.toJSON <- function(obj) {
+  jsonlite::toJSON(obj, pretty = TRUE)
+}
+
+# For R6 classes that have a toList() function, this function handles unboxing of logicals and NAs, lists of
+# R6 classes, or just a single R6 class.
+.r6ToListOrNA <- function(x) {
+  if (length(x) == 0) {
+    return(invisible(list()))
+  } else if (is.logical(x) && is.na(x)) {
+    return(invisible(jsonlite::unbox(NA)))
+  } else if (checkmate::testList(x)) {
+    return(invisible(lapply(x, function(item) { item$toList() })))
+  } else {
+    return(invisible(x$toList()))
+  }
+}
+
+# Applies unbox to scalars and null values, and unlists lists > 0.
+.toJsonArray <- function(x) {
+  if (checkmate::testScalarNA(x) || checkmate::testNull(x)) {
+    return(jsonlite::unbox(NA))
+  } else if (length(x) > 0) {
+    return(unlist(x))
+  }else {
+    return(list())
+  }
+}
+
+# removes any na elements from list
+.removeEmpty <- function(x) {
+  Filter(Negate(anyNA),x)
+}
+
+# Converts list or json into well-formed, empty-removed list.
+.convertJSON <- function(data) {
+  if (checkmate::testString(data)) {
+    return(.removeEmpty(.nullToNa(jsonlite::fromJSON(data, simplifyDataFrame = FALSE))))
+  } else if (checkmate::testList(data)) {
+    return(.removeEmpty(data))
+  } else {
+    stop("Error: Attempting to initalize R6 class witn non-list or non-string")
+  }
+}
+
+# Converts null values to NA to serlize json properly
+.nullToNa <- function(obj) {
+  if (is.list(obj)) {
+    obj <- lapply(obj, function(x) if (is.null(x)) NA else x)
+    obj <- lapply(obj, .nullToNa)
+  }
+  return(obj)
+}
+
diff --git a/R/Subsets.R b/R/Subsets.R
index 7bbcc5a..324e5b8 100644
--- a/R/Subsets.R
+++ b/R/Subsets.R
@@ -14,26 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-.loadJson <- function(definition, simplifyVector = FALSE, simplifyDataFrame = FALSE, ...) {
-  if (is.character(definition)) {
-    definition <- jsonlite::fromJSON(definition,
-      simplifyVector = simplifyVector,
-      simplifyDataFrame = simplifyDataFrame,
-      ...
-    )
-  }
-
-  if (!is.list(definition)) {
-    stop("Cannot instanitate object invalid type ", class(definition))
-  }
-  definition
-}
-
-.toJSON <- function(obj) {
-  jsonlite::toJSON(obj, pretty = TRUE)
-}
-
-
 # SubsetCohortWindow -------------
 #' @title Time Window For Cohort Subset Operator
 #' @export
diff --git a/inst/sql/sql_server/combinedCohorts/CombinedCohortDefinition.sql b/inst/sql/sql_server/combinedCohorts/CombinedCohortDefinition.sql
new file mode 100644
index 0000000..ad1bd9e
--- /dev/null
+++ b/inst/sql/sql_server/combinedCohorts/CombinedCohortDefinition.sql
@@ -0,0 +1,16 @@
+select subject_id, cohort_start_date, cohort_end_date
+INTO #combined_cohort
+FROM (
+  @combined_cohort_query
+) Q;
+
+
+INSERT INTO @target_database_schema.@target_cohort_table
+SELECT
+    @output_cohort_id as cohort_definition_id,
+    subject_id,
+    cohort_start_date,
+    cohort_end_date
+FROM #combined_cohort;
+
+DROP TABLE #combined_cohort;
diff --git a/inst/sql/sql_server/combinedCohorts/unionCombine.sql b/inst/sql/sql_server/combinedCohorts/unionCombine.sql
new file mode 100644
index 0000000..eb41e12
--- /dev/null
+++ b/inst/sql/sql_server/combinedCohorts/unionCombine.sql
@@ -0,0 +1,14 @@
+select subject_id, min(cohort_start_date) as cohort_start_date, max(cohort_end_date) as cohort_end_date
+from (
+  select subject_id, cohort_start_date, cohort_end_date, sum(is_start) over (partition by subject_id order by cohort_start_date, is_start desc rows unbounded preceding) group_idx
+  from (
+    select subject_id, cohort_start_date, cohort_end_date, 
+    case when max(cohort_end_date) over (partition by subject_id order by cohort_start_date rows between unbounded preceding and 1 preceding) >= cohort_start_date then 0 else 1 end is_start
+    from (
+      select subject_id, cohort_start_date, cohort_end_date
+      from @target_database_schema.@target_cohort_table
+      WHERE cohort_definition_id in (@target_cohort_ids)
+    ) CR
+  ) ST
+) GR
+group by subject_id, group_idx
\ No newline at end of file
diff --git a/man/CombinedCohortDef.Rd b/man/CombinedCohortDef.Rd
new file mode 100644
index 0000000..ae61ed9
--- /dev/null
+++ b/man/CombinedCohortDef.Rd
@@ -0,0 +1,103 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CombinedCohort.R
+\name{CombinedCohortDef}
+\alias{CombinedCohortDef}
+\title{A CombinedCohortDef}
+\description{
+Defines the ID, name and operation to produce a new cohort.
+}
+\section{Active bindings}{
+\if{html}{\out{<div class="r6-active-bindings">}}
+\describe{
+\item{\code{cohortId}}{The list of cohorts to apply in this group.}
+
+\item{\code{cohortName}}{the name given to this outcome definition}
+
+\item{\code{expression}}{The combo operator that is the root of the definition.}
+}
+\if{html}{\out{</div>}}
+}
+\section{Methods}{
+\subsection{Public methods}{
+\itemize{
+\item \href{#method-CombinedCohortDef-new}{\code{CombinedCohortDef$new()}}
+\item \href{#method-CombinedCohortDef-toList}{\code{CombinedCohortDef$toList()}}
+\item \href{#method-CombinedCohortDef-toJSON}{\code{CombinedCohortDef$toJSON()}}
+\item \href{#method-CombinedCohortDef-isEqualTo}{\code{CombinedCohortDef$isEqualTo()}}
+\item \href{#method-CombinedCohortDef-clone}{\code{CombinedCohortDef$clone()}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-CombinedCohortDef-new"></a>}}
+\if{latex}{\out{\hypertarget{method-CombinedCohortDef-new}{}}}
+\subsection{Method \code{new()}}{
+creates a new instance, using the provided data param if provided.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{CombinedCohortDef$new(data = list())}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{data}}{the data (as a json string or list) to initialize with}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-CombinedCohortDef-toList"></a>}}
+\if{latex}{\out{\hypertarget{method-CombinedCohortDef-toList}{}}}
+\subsection{Method \code{toList()}}{
+List representation of object
+To JSON
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{CombinedCohortDef$toList()}\if{html}{\out{</div>}}
+}
+
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-CombinedCohortDef-toJSON"></a>}}
+\if{latex}{\out{\hypertarget{method-CombinedCohortDef-toJSON}{}}}
+\subsection{Method \code{toJSON()}}{
+json serialized representation of object
+Is Equal to
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{CombinedCohortDef$toJSON()}\if{html}{\out{</div>}}
+}
+
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-CombinedCohortDef-isEqualTo"></a>}}
+\if{latex}{\out{\hypertarget{method-CombinedCohortDef-isEqualTo}{}}}
+\subsection{Method \code{isEqualTo()}}{
+Compare CombinedCohortDef to another
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{CombinedCohortDef$isEqualTo(other)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{other}}{CombinedCohortDef instance}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-CombinedCohortDef-clone"></a>}}
+\if{latex}{\out{\hypertarget{method-CombinedCohortDef-clone}{}}}
+\subsection{Method \code{clone()}}{
+The objects of this class are cloneable with this method.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{CombinedCohortDef$clone(deep = FALSE)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{deep}}{Whether to make a deep clone.}
+}
+\if{html}{\out{</div>}}
+}
+}
+}
diff --git a/man/CombinedCohortOp.Rd b/man/CombinedCohortOp.Rd
new file mode 100644
index 0000000..8faea20
--- /dev/null
+++ b/man/CombinedCohortOp.Rd
@@ -0,0 +1,113 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CombinedCohort.R
+\name{CombinedCohortOp}
+\alias{CombinedCohortOp}
+\title{A Combined cohort operation used to UNION or INTERSECT cohorts.  Note: only UNION supported.}
+\description{
+Defines a UNION or INTERSECT on a set of cohorts.
+}
+\section{Active bindings}{
+\if{html}{\out{<div class="r6-active-bindings">}}
+\describe{
+\item{\code{targetCohortIds}}{The list of cohorts to apply in this group.}
+
+\item{\code{opType}}{The group operation, either 'union' or 'intersect'}
+}
+\if{html}{\out{</div>}}
+}
+\section{Methods}{
+\subsection{Public methods}{
+\itemize{
+\item \href{#method-CombinedCohortOp-new}{\code{CombinedCohortOp$new()}}
+\item \href{#method-CombinedCohortOp-toList}{\code{CombinedCohortOp$toList()}}
+\item \href{#method-CombinedCohortOp-toJSON}{\code{CombinedCohortOp$toJSON()}}
+\item \href{#method-CombinedCohortOp-isEqualTo}{\code{CombinedCohortOp$isEqualTo()}}
+\item \href{#method-CombinedCohortOp-getDependentCohortIds}{\code{CombinedCohortOp$getDependentCohortIds()}}
+\item \href{#method-CombinedCohortOp-clone}{\code{CombinedCohortOp$clone()}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-CombinedCohortOp-new"></a>}}
+\if{latex}{\out{\hypertarget{method-CombinedCohortOp-new}{}}}
+\subsection{Method \code{new()}}{
+creates a new instance, using the provided data param if provided.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{CombinedCohortOp$new(data = list())}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{data}}{the data (as a json string or list) to initialize with}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-CombinedCohortOp-toList"></a>}}
+\if{latex}{\out{\hypertarget{method-CombinedCohortOp-toList}{}}}
+\subsection{Method \code{toList()}}{
+List representation of object
+To JSON
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{CombinedCohortOp$toList()}\if{html}{\out{</div>}}
+}
+
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-CombinedCohortOp-toJSON"></a>}}
+\if{latex}{\out{\hypertarget{method-CombinedCohortOp-toJSON}{}}}
+\subsection{Method \code{toJSON()}}{
+json serialized representation of object
+Is Equal to
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{CombinedCohortOp$toJSON()}\if{html}{\out{</div>}}
+}
+
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-CombinedCohortOp-isEqualTo"></a>}}
+\if{latex}{\out{\hypertarget{method-CombinedCohortOp-isEqualTo}{}}}
+\subsection{Method \code{isEqualTo()}}{
+Compare CombinedCohortGroup to another
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{CombinedCohortOp$isEqualTo(other)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{other}}{CombinedCohortGroup instance
+getDependentCohortIds}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-CombinedCohortOp-getDependentCohortIds"></a>}}
+\if{latex}{\out{\hypertarget{method-CombinedCohortOp-getDependentCohortIds}{}}}
+\subsection{Method \code{getDependentCohortIds()}}{
+Gets the dependent cohortIds from this operation
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{CombinedCohortOp$getDependentCohortIds()}\if{html}{\out{</div>}}
+}
+
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-CombinedCohortOp-clone"></a>}}
+\if{latex}{\out{\hypertarget{method-CombinedCohortOp-clone}{}}}
+\subsection{Method \code{clone()}}{
+The objects of this class are cloneable with this method.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{CombinedCohortOp$clone(deep = FALSE)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{deep}}{Whether to make a deep clone.}
+}
+\if{html}{\out{</div>}}
+}
+}
+}
diff --git a/man/addCombinedCohort.Rd b/man/addCombinedCohort.Rd
new file mode 100644
index 0000000..b3e5f85
--- /dev/null
+++ b/man/addCombinedCohort.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CombinedCohort.R
+\name{addCombinedCohort}
+\alias{addCombinedCohort}
+\title{Add cohort subset definition to a cohort definition set}
+\usage{
+addCombinedCohort(
+  cohortDefinitionSet,
+  combinedCohortDefiniton,
+  overwriteExisting = FALSE
+)
+}
+\arguments{
+\item{cohortDefinitionSet}{data.frame that conforms to CohortDefinitionSet}
+
+\item{combinedCohortDefiniton}{CombinedCohortDefinition instance}
+
+\item{overwriteExisting}{Overwrite existing subset definition of the same definitionId if present}
+}
+\description{
+Given a subset definition and cohort definition set, this function returns a modified cohortDefinitionSet
+That contains cohorts that's have parent's contained within the base cohortDefinitionSet
+
+Also adds the columns subsetParent and isSubset that denote if the cohort is a subset and what the parent definition
+is.
+}
diff --git a/man/createCombinedCohortDef.Rd b/man/createCombinedCohortDef.Rd
new file mode 100644
index 0000000..c7bfff4
--- /dev/null
+++ b/man/createCombinedCohortDef.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CombinedCohort.R
+\name{createCombinedCohortDef}
+\alias{createCombinedCohortDef}
+\title{Defines a combined cohort using combined cohort operations}
+\usage{
+createCombinedCohortDef(cohortId, cohortName, expression)
+}
+\arguments{
+\item{cohortId}{The output cohort id from applying the combine expression.}
+
+\item{cohortName}{The output cohort name}
+
+\item{expression}{The combine operator that will yield the final cohort.}
+}
+\description{
+Creates an instance of CombinedCohortDef with the provided cohortId, cohortName and the combine operator expression
+}
diff --git a/man/createCombinedCohortOp.Rd b/man/createCombinedCohortOp.Rd
new file mode 100644
index 0000000..3d829eb
--- /dev/null
+++ b/man/createCombinedCohortOp.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CombinedCohort.R
+\name{createCombinedCohortOp}
+\alias{createCombinedCohortOp}
+\title{Create CombinedCohortOp instance}
+\usage{
+createCombinedCohortOp(targetCohortIds, opType)
+}
+\arguments{
+\item{targetCohortIds}{list of target cohort IDs to combine in this operation}
+
+\item{opType}{The op type of this cohort combination, can only be 'union'}
+}
+\description{
+A factory function to create CombinedCohortOp
+}
diff --git a/tests/testthat/test-CombinationCohorts.R b/tests/testthat/test-CombinationCohorts.R
new file mode 100644
index 0000000..a106471
--- /dev/null
+++ b/tests/testthat/test-CombinationCohorts.R
@@ -0,0 +1,66 @@
+test_that("combination cohort generation", {
+  
+  recordKeepingFolder = file.path(outputFolder, "RecordKeeping")
+  
+  createCohortSql <- function (subjectId, cohortStartDate, cohortEndDate) {
+    templateSql = 
+"insert into @target_database_schema.@target_cohort_table (cohort_definition_id, subject_id, cohort_start_date, cohort_end_date)
+  values (@target_cohort_id, @subject_id, CAST('@cohort_start_date' AS DATE), CAST('@cohort_end_date' AS DATE));"
+    return (SqlRender::render(templateSql, 
+                              subject_id = subjectId, 
+                              cohort_start_date = cohortStartDate, 
+                              cohort_end_date = cohortEndDate))
+  }
+  
+  testCohortDefinitionSet <- data.frame(cohortId = c(50:52),
+                                        cohortName = paste0('Cohort ',c(50:52)),
+                                        sql=c(
+                                          createCohortSql(1, "20160101", "20160301"),
+                                          createCohortSql(1, "20160201", "20160401"),
+                                          createCohortSql(1, "20160301", "20160501")
+                                        ),
+                                        json=c("{}","{}","{}"))
+  
+  cohortDefinitionSet <- rbind(CohortGenerator::createEmptyCohortDefinitionSet(), testCohortDefinitionSet)
+
+  #build cohort and combined cohort defs
+  
+  combinedCohortOp <- CohortGenerator::createCombinedCohortOp(targetCohortIds = c(50,51,52), opType = "union")
+  combinedCohortDef <- CohortGenerator::createCombinedCohortDef(cohortId = 53, cohortName="Cohort 50,51,52 combo", expression = combinedCohortOp)
+
+  cohortDefinitionSet <- cohortDefinitionSet %>%
+    CohortGenerator::addCombinedCohort(combinedCohortDef)
+  
+  cohortTableNames <- CohortGenerator::getCohortTableNames(cohortTable = "combinedCohorts_cohort")
+  
+  connection <- DatabaseConnector::connect(connectionDetails = connectionDetails)
+  on.exit(DatabaseConnector::disconnect(connection))
+  
+  CohortGenerator::createCohortTables(
+    connection = connection,
+    cohortDatabaseSchema = "main",
+    cohortTableNames = cohortTableNames
+  )
+
+  cohortsGenerated <- CohortGenerator::generateCohortSet(
+    connection = connection,
+    cdmDatabaseSchema = "main",
+    cohortDatabaseSchema = "main",
+    cohortTableNames = cohortTableNames,
+    cohortDefinitionSet = cohortDefinitionSet,
+    incremental = FALSE,
+    incrementalFolder = recordKeepingFolder
+  )
+
+  # how to test
+  resultSql <- "select cohort_definition_id, subject_id, cohort_start_date, cohort_end_date FROM main.@cohort_table where cohort_definition_id = 53"
+  resultSql <- SqlRender::render(resultSql, cohort_table = cohortTableNames$cohortTable) 
+  resultsDF <- DatabaseConnector::querySql(connection = connection, SqlRender::translate(resultSql, targetDialect = "sqlite"))
+  
+  expect_true(nrow(resultsDF) == 1)
+  expect_true(resultsDF %>% dplyr::select("COHORT_START_DATE") %>% dplyr::pull() == "2016-01-01")
+  expect_true(resultsDF %>% dplyr::select("COHORT_END_DATE") %>% dplyr::pull() == "2016-05-01")
+  
+  unlink(recordKeepingFolder, recursive = TRUE)
+  
+})
\ No newline at end of file

From d0734ee1d80472e30f43938040e11396be32727e Mon Sep 17 00:00:00 2001
From: Chris Knoll <cknoll@ohdsi.org>
Date: Mon, 23 Sep 2024 10:33:11 -0400
Subject: [PATCH 2/2] Address git action warnings/errors.

---
 R/CombinedCohort.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/R/CombinedCohort.R b/R/CombinedCohort.R
index 65c4b42..4e1d918 100644
--- a/R/CombinedCohort.R
+++ b/R/CombinedCohort.R
@@ -213,12 +213,11 @@ addCombinedCohort <- function(cohortDefinitionSet,
     checkmate::assertSubset(dependentCohortIds, cohortDefinitionSet$cohortId)
   }
 
-  # TODO: going to just fail if the cohortId exists for now, later we can implement overwrite
   if (!overwriteExisting && nrow(cohortDefinitionSet %>% dplyr::filter(.data$cohortId == combinedCohortDefiniton$cohortId)) > 0) {
     stop("The specified cohortId for this combined cohort already exists in the cohort definition set")
   } else {
     # remove this definition from the set
-    cohortDefinitionSet <- cohortDefinitionSet %>% filter(cohortId != combinedCohortDefiniton$cohortId)
+    cohortDefinitionSet <- cohortDefinitionSet %>% dplyr::filter(.data$cohortId != combinedCohortDefiniton$cohortId)
   }
 
   defSql <- SqlRender::readSql(system.file("sql", "sql_server", "combinedCohorts", "CombinedCohortDefinition.sql", package = "CohortGenerator"))