diff --git a/R/TemplateImplementations.R b/R/TemplateImplementations.R index 8db2b31..f463afb 100644 --- a/R/TemplateImplementations.R +++ b/R/TemplateImplementations.R @@ -73,7 +73,7 @@ #' @export createRxNormCohortTemplateDefinition <- function(indentifierExpression = "concept_id * 1000", cdmDatabaseSchema, - rxNormTable = "cohort_rx_norm_ref_table", + rxNormTable = "cohort_rx_norm_ref", tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), cohortDatabaseSchema, priorObservationPeriod = 365, @@ -163,7 +163,7 @@ createRxNormCohortTemplateDefinition <- function(indentifierExpression = "concep #' @export createAtcCohortTemplateDefinition <- function(indentifierExpression = "concept_id * 1000 + 4", cdmDatabaseSchema, - atcTable = "cohort_atc_table", + atcTable = "cohort_atc_ref", tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), cohortDatabaseSchema, priorObservationPeriod = 365, @@ -192,4 +192,96 @@ createAtcCohortTemplateDefinition <- function(indentifierExpression = "concept_i requireConnectionRefs = TRUE) return(invisible(def)) +} + + +.snomedTemplateRefFun <- function(connection, + cohortDatabaseSchema, + vocabularyDatabaseSchema, + tempEmulationSchema, + conditionsTable, + includeDescendants, + indentifierExpression) { + sql <- SqlRender::loadRenderTranslateSql(sqlFilename = file.path("templates", "snomed", "references.sql"), + packageName = utils::packageName(), + identifier_expression = indentifierExpression, + cohort_database_schema = cohortDatabaseSchema, + tempEmulationSchema = tempEmulationSchema, + conditions_table = conditionsTable, + vocabulary_database_schema = vocabularyDatabaseSchema) + DatabaseConnector::executeSql(connection, sql) + + sql <- "SELECT cohort_definition_id as cohort_id, cohort_name FROM @cohort_database_schema.@atc_table;" + references <- DatabaseConnector::renderTranslateQuerySql(connection = connection, + sql = sql, + cohort_database_schema = cohortDatabaseSchema, + snakeCaseToCamelCase = TRUE, + atc_table = atcTable) + return(references) +} + +.createSnomeCohorts <- function(connection, + cdmDatabaseSchema, + cohortDatabaseSchema, + cohortTableNames, + vocabularyDatabaseSchema, + tempEmulationSchema, + conditionsTable, + priorObservationPeriod = 365) { + sql <- SqlRender::loadRenderTranslateSql(sqlFilename = file.path("templates", "snomed", "definition.sql"), + dbms = DatabaseConnector::dbms(connection), + packageName = utils::packageName(), + conditions_table = conditionsTable, + cohort_table = cohortTableNames$cohortTable, + prior_observation_period = priorObservationPeriod, + vocabulary_database_schema = vocabularyDatabaseSchema, + cohort_database_schema = cohortDatabaseSchema, + cdm_database_schema = cdmDatabaseSchema) + + DatabaseConnector::executeSql(connection, sql) +} + +#' Create SNOMED cohort Template Definition +#' @description +#' Template cohort definition for all OHDSI standard conditions +#' This cohort will use the vocaublary tables to automaticall generate a set of cohorts that have the +#' cohortId = conceptId * 1000 + 4, note that this can be customised with the "identifierExpression" if you are using this +#' with other cohorts you may wish to change this to allow uniqueness +#' @param indentifierExpression an expression for setting the cohort id for the resulting cohort. Must produce unique ids +#' @param conditionsTable reference table to store condition cohorts +#' @param priorObservationPeriod (optional) required prior observation period for individuals +#' @inheritParams generateCohortSet +#' @returns a CohortTemplateDefinition instance +#' @export +createSnomedCohortTemplateDefinition <- function(indentifierExpression = "concept_id * 1000", + cdmDatabaseSchema, + conditionsTable = "cohort_conditions_ref", + tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), + cohortDatabaseSchema, + priorObservationPeriod = 365, + vocabularyDatabaseSchema = cdmDatabaseSchema) { + + executeArgs <- list( + vocabularyDatabaseSchema = vocabularyDatabaseSchema, + priorObservationPeriod = priorObservationPeriod, + conditionsTable = conditionsTable, + tempEmulationSchema = tempEmulationSchema, + includeDescendants = includeDescendants + ) + + templateRefArgs <- list( + cohortDatabaseSchema = cohortDatabaseSchema, + vocabularyDatabaseSchema = vocabularyDatabaseSchema, + indentifierExpression = indentifierExpression, + conditionsTable = conditionsTable, + tempEmulationSchema = tempEmulationSchema, + includeDescendants = includeDescendants + ) + + def <- createCohortTemplateDefintion(name = "All SNOMED Conditions", + templateRefFun = .snomedTemplateRefFun, + executeFun = .createSnomeCohorts, + templateRefArgs = templateRefArgs, + executeArgs = executeArgs, + requireConnectionRefs = TRUE) } \ No newline at end of file diff --git a/inst/sql/sql_server/templates/snomed/definition.sql b/inst/sql/sql_server/templates/snomed/definition.sql new file mode 100644 index 0000000..f2f8b6f --- /dev/null +++ b/inst/sql/sql_server/templates/snomed/definition.sql @@ -0,0 +1,100 @@ +{DEFAULT @require_visit_occurence=FALSE} +{DEFAULT @visit_occurrence_ids = 9201} -- INPATIENT VISIT +{DEFAULT @require_second_diagnosis = FALSE} + +DROP TABLE IF EXISTS #concept_ancestor_grp; + +--HINT DISTRIBUTE_ON_KEY(descendant_concept_id) +select + ca1.ancestor_concept_id + , ca1.descendant_concept_id +into #concept_ancestor_grp +from @cdm_database_schema.concept_ancestor ca1 +inner join +( + select + c1.concept_id + , c1.concept_name + , c1.vocabulary_id + , c1.domain_id + from @cdm_database_schema.concept c1 + inner join @cdm_database_schema.concept_ancestor ca1 + on ca1.ancestor_concept_id = 441840 -- clinical finding + and c1.concept_id = ca1.descendant_concept_id + where + ( + ca1.min_levels_of_separation > 2 + or c1.concept_id in (433736, 433595, 441408, 72404, 192671, 137977, 434621, 437312, 439847, 4171917, 438555, 4299449, 375258, 76784, 40483532, 4145627, 434157, 433778, 258449, 313878) + ) + -- NOTE: this set could be improved to exclude more irrelevant/useless cohorts but has been used in REWARD + and c1.concept_name not like '%finding' + and c1.concept_name not like 'disorder of%' + and c1.concept_name not like 'finding of%' + and c1.concept_name not like 'disease of%' + and c1.concept_name not like 'injury of%' + and c1.concept_name not like '%by site' + and c1.concept_name not like '%by body site' + and c1.concept_name not like '%by mechanism' + and c1.concept_name not like '%of body region' + and c1.concept_name not like '%of anatomical site' + and c1.concept_name not like '%of specific body structure%' + and c1.domain_id = 'Condition' +) t1 on ca1.ancestor_concept_id = t1.concept_id +inner join @reference_schema.@outcome_cohort ocr ON ( + ocr.referent_concept_id = ca1.ancestor_concept_id and ocr.outcome_type = 1 +) +; + +--incident outcomes - requiring inpatient visit +insert into @cohort_database_schema.@cohort_table +( + cohort_definition_id + , subject_id + , cohort_start_date + , cohort_end_date +) +select + ocr.cohort_definition_id + , t1.person_id as subject_id + , t1.cohort_start_date + , t1.cohort_start_date as cohort_end_date +from +( + select + co1.person_id + , ca1.ancestor_concept_id + , min(co1.condition_start_date) as cohort_start_date + from @cdm_database_schema.condition_occurrence co1 + inner join #concept_ancestor_grp ca1 + on co1.condition_concept_id = ca1.descendant_concept_id + group by + co1.person_id + , ca1.ancestor_concept_id +) t1 +inner join @reference_schema.@outcome_cohort ocr ON ( + ocr.referent_concept_id = t1.ancestor_concept_id +) +inner join +( + select + co1.person_id + , ca1.ancestor_concept_id + , min(vo1.visit_start_date) as cohort_start_date + from @cdm_database_schema.condition_occurrence co1 + inner join @cdm_database_schema.visit_occurrence vo1 + on co1.person_Id = vo1.person_id + and co1.visit_occurrence_id = vo1.visit_occurrence_id + {@require_visit_occurence} ? { and visit_concept_id IN (@visit_occurrence_ids)} + inner join #concept_ancestor_grp ca1 + on co1.condition_concept_id = ca1.descendant_concept_id + group by + co1.person_id + , ca1.ancestor_concept_id +) t2 + on t1.person_id = t2.person_id + and t1.ancestor_concept_id = t2.ancestor_concept_id + {@require_second_diagnosis} ? {where t2.cohort_start_date < t2.confirmed_date} +; + +TRUNCATE TABLE #concept_ancestor_grp; +DROP TABLE #concept_ancestor_grp; \ No newline at end of file diff --git a/inst/sql/sql_server/templates/snomed/references.sql b/inst/sql/sql_server/templates/snomed/references.sql new file mode 100644 index 0000000..475144b --- /dev/null +++ b/inst/sql/sql_server/templates/snomed/references.sql @@ -0,0 +1,57 @@ + +-- Create outcome cohort definitions +create table #cpt_anc_grp as +select + ca1.ancestor_concept_id + , ca1.descendant_concept_id +from @vocabulary_schema.concept_ancestor ca1 +inner join +( + select + c1.concept_id + , c1.concept_name + , c1.vocabulary_id + , c1.domain_id + from @vocabulary_schema.concept c1 + inner join @vocabulary_schema.concept_ancestor ca1 + on ca1.ancestor_concept_id = 441840 /* clinical finding */ + and c1.concept_id = ca1.descendant_concept_id + where + ( + ca1.min_levels_of_separation > 2 + or c1.concept_id in (433736, 433595, 441408, 72404, 192671, 137977, 434621, 437312, 439847, 4171917, 438555, 4299449, 375258, 76784, 40483532, 4145627, 434157, 433778, 258449, 313878) + ) + and c1.concept_name not like '%finding' + and c1.concept_name not like 'disorder of%' + and c1.concept_name not like 'finding of%' + and c1.concept_name not like 'disease of%' + and c1.concept_name not like 'injury of%' + and c1.concept_name not like '%by site' + and c1.concept_name not like '%by body site' + and c1.concept_name not like '%by mechanism' + and c1.concept_name not like '%of body region' + and c1.concept_name not like '%of anatomical site' + and c1.concept_name not like '%of specific body structure%' + and c1.domain_id = 'Condition' +) t1 + on ca1.ancestor_concept_id = t1.concept_id +; + +--outcomes not requiring a hospitalization +INSERT INTO @cohort_database_schema.@condition_table +( cohort_definition_id, + cohort_definition_name + , short_name + , concept_id +) +select + DISTINCT + @identifier_expression as cohort_definition_id, + 'outcome of ' + c1.concept_name + ' - first occurence of diagnosis' {@require_second_diagnosis} ? {' with 2 diagnosis codes '} as cohort_definition_name + , ' outcome of ' + c1.concept_name {@require_second_diagnosis} ? {+ ' requiring 2 DX'} as short_name + , c1.concept_id as concept_id +from +#cpt_anc_grp ca1 +inner join @vocabulary_schema.concept c1 + on ca1.ancestor_concept_id = c1.concept_id +; \ No newline at end of file diff --git a/vignettes/UsingTemplateCohorts.Rmd b/vignettes/UsingTemplateCohorts.Rmd new file mode 100644 index 0000000..0e2c554 --- /dev/null +++ b/vignettes/UsingTemplateCohorts.Rmd @@ -0,0 +1,57 @@ +t --- +title: "Using Template Cohorts" +author: "James P. Gilbert" +date: "`r Sys.Date()`" +output: + pdf_document: + toc: yes + html_document: + number_sections: yes + toc: yes +vignette: > + %\VignetteIndexEntry{Generating Cohorts} + %\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + chunk_output_type: console +--- + +# Introduction +This guide intends to demonstrate the usage of template cohorts within the Cohort Generator package. +This can provide a convenient approach to computing large sets of features. +While this is possible through the use of custom scripts, doing so will often require one-off approaches to integrating +references within studies or other OHDSI packages, greatly limiting their reproducibility. + +The principle behind this implementation is that, for all intents and purposes, cohorts created via "bulk" operations +should be treated no differently to cohorts created through circe definitions. + +## Limitations of this approach +For the design of reliable, reusable Phenotype Algorithms, we strongly advise the usage of circe based approaches. +While there is a trade-off that such an approach may be less efficient that pure SQL, this will greatly limit the +reproducibility and replicability of studies using these cohorts. + +# Basic templates + +## Drug ingredient cohorts + +- Example code +- ATC ingredients + +## SNOMED condition cohorts + +- Example code + +# Creating custom cohort templates + +Creating custom cohort templates can be useful for generating large sets of cohorts, utilizing vocabularies. +This requires a good understanding of OHDSI standard vocabularies and the OMOP Common Data Model. + +In this example, we generate cohorts based on procedure codes using the Healthcare Common Procedure Coding System (HCPCS). +To simplify computation we will use only blood based procedures. +To do this we require a function that has two steps + +1. **Creating references for cohorts**: all cohorts used within cohort generator required certain properties to allow +usage in other tools. These references may come from the vocabulary used by the cdm, or they may be defined via +other means. + +2. **Creating cohort logic in SQL**: This requires careful considerations for how cohorts interact within the CDM.