Fix bugs; revise DB test to use new runCohortGeneration function

OHDSI · Jun 26, 2024 · f263788 · f263788
1 parent e8ab94d
commit f263788
Show file tree

Hide file tree

Showing 6 changed files with 72 additions and 65 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -49,8 +49,9 @@ export(uploadResults)
 export(writeCsv)
 import(DatabaseConnector)
 import(R6)
-importFrom(dplyr,"%>%")
+import(dplyr)
 importFrom(grDevices,rgb)
+importFrom(methods,is)
 importFrom(rlang,':=')
 importFrom(rlang,.data)
 importFrom(stats,aggregate)

diff --git a/R/CohortGenerator.R b/R/CohortGenerator.R
@@ -21,10 +21,10 @@
 #' @import R6
 #' @import dplyr
 #' @importFrom grDevices rgb
+#' @importFrom methods is
 #' @importFrom stats aggregate setNames
 #' @importFrom utils write.csv install.packages menu packageVersion sessionInfo
 #' @importFrom rlang .data ':='
-#' @importFrom methods is
 NULL
 
 # Add custom assertions

diff --git a/R/Export.R b/R/Export.R
@@ -140,12 +140,6 @@ exportCohortDefinitionSet <- function(outputFolder, cohortDefinitionSet = NULL)
   cohortDefinitions <- createEmptyResult("cg_cohort_definition")
   cohortSubsets <- createEmptyResult("cg_cohort_subset_definition")
   if (!is.null(cohortDefinitionSet)) {
-    # Massage and save the cohort definition set
-    colsToRename <- c("cohortId", "cohortName", "sql", "json")
-    colInd <- which(names(cohortDefinitionSet) %in% colsToRename)
-    cohortDefinitions <- cohortDefinitionSet
-    names(cohortDefinitions)[colInd] <- c("cohortDefinitionId", "cohortName", "sqlCommand", "json")
-    cohortDefinitions$description <- ""
     cdsCohortSubsets <- getSubsetDefinitions(cohortDefinitionSet)
     if (length(cdsCohortSubsets) > 0) {
       for (i in seq_along(cdsCohortSubsets)) {
@@ -155,7 +149,33 @@ exportCohortDefinitionSet <- function(outputFolder, cohortDefinitionSet = NULL)
                                  json = as.character(cdsCohortSubsets[[i]]$toJSON())
                                ))
       }
+    } else {
+      # NOTE: In this case the cohortDefinitionSet has no subsets defined
+      # and so we need to add the additional columns that are defined
+      # in the function: addCohortSubsetDefinition. To do this, 
+      # we'll construct a copy of the cohortDefinitionSet with a single
+      # subset to get the proper structure and filter it to the
+      # cohorts of interest.
+      cdsCopy <- cohortDefinitionSet %>% 
+        addCohortSubsetDefinition(
+          cohortSubsetDefintion = createCohortSubsetDefinition(
+            definitionId = 1, 
+            name="empty", 
+            subsetOperators = list(
+              createDemographicSubset()
+            )
+          )
+        ) %>% dplyr::filter(cohortId == cohortDefinitionSet$cohortId)
+      cohortDefinitionSet <- cdsCopy
+    }
+    # Massage and save the cohort definition set
+    colsToRename <- c("cohortId", "cohortName", "sql", "json")
+    colInd <- which(names(cohortDefinitionSet) %in% colsToRename)
+    names(cohortDefinitionSet)[colInd] <- c("cohortDefinitionId", "cohortName", "sqlCommand", "json")
+    if (! "description" %in% names(cohortDefinitionSet)) {
+      cohortDefinitionSet$description <- ""
     }
+    cohortDefinitions <- cohortDefinitionSet[,intersect(names(cohortDefinitions), names(cohortDefinitionSet))]
   }
   writeCsv(
     x = cohortDefinitions,

diff --git a/R/RunCohortGeneration.R b/R/RunCohortGeneration.R
@@ -240,8 +240,8 @@ generateAndExportCohorts <- function(connection,
   )
 
   # Export the cohort definition set
-  #rlang::inform("Saving cohort definition set")
-  #exportCohortDefinitionSet(outputFolder, cohortDefinitionSet)  
+  rlang::inform("Saving cohort definition set")
+  exportCohortDefinitionSet(outputFolder, cohortDefinitionSet)
 }
 
 generateAndExportNegativeControls <- function(connection,

diff --git a/tests/testthat/test-RunCohortGeneration.R b/tests/testthat/test-RunCohortGeneration.R
@@ -47,11 +47,19 @@ test_that("Call runCohortGeneration happy path", {
     file = system.file("csv", "resultsDataModelSpecification.csv", package = "CohortGenerator")
   )
   expectedFileList <- paste0(unique(spec$tableName), ".csv")
-  diffs <- setdiff(basename(list.files(testOutputFolder)), expectedFileList)
+  diffs <- setdiff(expectedFileList, basename(list.files(testOutputFolder)))
+  expect_true(length(diffs) == 0)
 
-  # NOTE: The only difference between the specification and the
-  # output folder is expected to be the resultsDataModelSpecification.csv
-  expect_true(all(diffs == "resultsDataModelSpecification.csv"))
+  # Make sure that each output file contains the same columns as defined
+  # in the specification
+  for (i in seq_along(expectedFileList)) {
+    data <- readCsv(
+      file = file.path(testOutputFolder, expectedFileList[i])
+    )
+    tbl <- tools::file_path_sans_ext(expectedFileList[i])
+    emptyResult <- CohortGenerator:::createEmptyResult(tbl)
+    expect_equal(!!sort(names(data)), !!sort(names(emptyResult)))
+  }
 
   # Make sure that the output that specifies a database ID has the correct
   # value included

diff --git a/tests/testthat/test-dbms-platforms.R b/tests/testthat/test-dbms-platforms.R
@@ -1,19 +1,16 @@
 testPlatform <- function(dbmsDetails) {
   cohortTableNames <- getCohortTableNames(cohortTable = dbmsDetails$cohortTable)
+  platformOutputFolder <- file.path(outputFolder, dbmsDetails$connectionDetails$dbms)
   on.exit({
     dropCohortStatsTables(
       connectionDetails = dbmsDetails$connectionDetails,
       cohortDatabaseSchema = dbmsDetails$cohortDatabaseSchema,
       cohortTableNames = cohortTableNames,
       dropCohortTable = TRUE
     )
+    unlink(platformOutputFolder, recursive = TRUE)
   })
-
-  createCohortTables(
-    connectionDetails = dbmsDetails$connectionDetails,
-    cohortDatabaseSchema = dbmsDetails$cohortDatabaseSchema,
-    cohortTableNames = cohortTableNames
-  )
+
   cohortsWithStats <- getCohortDefinitionSet(
     settingsFileName = "testdata/name/Cohorts.csv",
     jsonFolder = "testdata/name/cohorts",
@@ -23,47 +20,6 @@ testPlatform <- function(dbmsDetails) {
     packageName = "CohortGenerator",
     verbose = FALSE
   )
-
-  cohortsGenerated <- generateCohortSet(
-    connectionDetails = dbmsDetails$connectionDetails,
-    cdmDatabaseSchema = dbmsDetails$cdmDatabaseSchema,
-    cohortDatabaseSchema = dbmsDetails$cohortDatabaseSchema,
-    cohortTableNames = cohortTableNames,
-    cohortDefinitionSet = cohortsWithStats,
-    incremental = TRUE,
-    incrementalFolder = file.path(outputFolder, "RecordKeeping", dbmsDetails$connectionDetails$dbms)
-  )
-  expect_equal(nrow(cohortsGenerated), nrow(cohortsWithStats))
-
-  # Get the cohort counts
-  cohortCounts <- getCohortCounts(
-    connectionDetails = dbmsDetails$connectionDetails,
-    cohortDatabaseSchema = dbmsDetails$cohortDatabaseSchema,
-    cohortTable = cohortTableNames$cohortTable,
-    databaseId = dbmsDetails$dbmsPlatform,
-    cohortDefinitionSet = cohortsWithStats
-  )
-  expect_equal(nrow(cohortsGenerated), nrow(cohortCounts))
-
-  # Insert the inclusion rule names before exporting the stats tables
-  insertInclusionRuleNames(
-    connectionDetails = dbmsDetails$connectionDetails,
-    cohortDefinitionSet = cohortsWithStats,
-    cohortDatabaseSchema = dbmsDetails$cohortDatabaseSchema,
-    cohortInclusionTable = cohortTableNames$cohortInclusionTable
-  )
-
-  exportCohortStatsTables(
-    connectionDetails = dbmsDetails$connectionDetails,
-    cohortTableNames = cohortTableNames,
-    cohortDatabaseSchema = dbmsDetails$cohortDatabaseSchema,
-    cohortStatisticsFolder = file.path(outputFolder, dbmsDetails$dbmsPlatform),
-    snakeCaseToCamelCase = FALSE,
-    fileNamesInSnakeCase = TRUE,
-    incremental = TRUE,
-    databaseId = dbmsDetails$dbmsPlatform
-  )
-
   subsetOperations <- list(
     createCohortSubset(
       cohortIds = 2,
@@ -91,16 +47,38 @@ testPlatform <- function(dbmsDetails) {
     subsetOperators = subsetOperations
   )
   cohortsWithSubsets <- addCohortSubsetDefinition(cohortsWithStats, subsetDef)
-  cohortsGenerated <- generateCohortSet(
+
+  ncSet <- getNegativeControlOutcomeCohortsForTest()
+
+  runCohortGeneration(
     connectionDetails = dbmsDetails$connectionDetails,
     cdmDatabaseSchema = dbmsDetails$cdmDatabaseSchema,
     cohortDatabaseSchema = dbmsDetails$cohortDatabaseSchema,
     cohortTableNames = cohortTableNames,
     cohortDefinitionSet = cohortsWithSubsets,
-    incremental = TRUE,
-    incrementalFolder = file.path(outputFolder, "RecordKeeping", dbmsDetails$connectionDetails$dbms)
+    negativeControlOutcomeCohortSet = ncSet,
+    occurrenceType = "first",
+    detectOnDescendants = TRUE,
+    outputFolder = platformOutputFolder,
+    databaseId = dbmsDetails$connectionDetails$dbms,
+    incremental = F
+  )
+
+  # Check the output to verify the generation worked properly
+  cohortsGenerated <- readCsv(
+    file = file.path(outputFolder, "cg_cohort_generation.csv")
+  )
+  expect_equal(nrow(cohortsGenerated), (nrow(cohortDefintionSet) + nrow(cohortsWithSubsets)))
+
+  cohortCounts <- readCsv(
+    file = file.path(outputFolder, "cg_cohort_count.csv")
+  )
+  expect_equal(nrow(cohortsGenerated), nrow(cohortCounts))
+
+  ncCohortCounts <- readCsv(
+    file = file.path(outputFolder, "cg_cohort_count_neg_ctrl.csv")
   )
-  expect_equal(nrow(cohortsGenerated), nrow(cohortsWithSubsets))
+  expect_equal(nrow(ncSet), nrow(ncCohortCounts))
 }
 
 # This file contains platform specific tests