From 04d02a650f5a74320ccaa4712a66b489bf0c9571 Mon Sep 17 00:00:00 2001
From: Evanette Burrows
Date: Fri, 29 Dec 2023 12:26:47 -0500
Subject: [PATCH 01/10] Add support for loading synthea v3.1 and v3.2
---
R/CreateSyntheaTables.r | 8 +-
R/LoadSyntheaTables.r | 10 +-
.../v310/create_synthea_tables.sql | 326 ++++++++++++++++++
.../v320/create_synthea_tables.sql | 326 ++++++++++++++++++
4 files changed, 666 insertions(+), 4 deletions(-)
create mode 100644 inst/sql/sql_server/synthea_version/v310/create_synthea_tables.sql
create mode 100644 inst/sql/sql_server/synthea_version/v320/create_synthea_tables.sql
diff --git a/R/CreateSyntheaTables.r b/R/CreateSyntheaTables.r
index 173c740..df7e522 100644
--- a/R/CreateSyntheaTables.r
+++ b/R/CreateSyntheaTables.r
@@ -10,7 +10,7 @@
#' Server, this should specify both the database and the schema,
#' so for example 'cdm_instance.dbo'.
#' @param syntheaVersion The version of Synthea used to generate the csv files.
-#' Currently "2.7.0" and "3.0.0" are is supported.
+#' Currently "2.7.0", "3.0.0", "3.1.0", and "3.2.0" are is supported.
#'
#'@export
@@ -25,8 +25,12 @@ CreateSyntheaTables <-
sqlFilePath <- "synthea_version/v270"
else if (syntheaVersion == "3.0.0")
sqlFilePath <- "synthea_version/v300"
+ else if (syntheaVersion == "3.1.0")
+ sqlFilePath <- "synthea_version/v310"
+ else if (syntheaVersion == "3.2.0")
+ sqlFilePath <- "synthea_version/v320"
else
- stop("Invalid synthea version specified. Currently \"2.7.0\" and \"3.0.0\" are supported.")
+ stop("Invalid synthea version specified. Currently \"2.7.0\", \"3.0.0\", \"3.1.0\" and \"3.2.0\" are supported.")
sqlFilename <- paste0(sqlFilePath, "/", "create_synthea_tables.sql")
diff --git a/R/LoadSyntheaTables.r b/R/LoadSyntheaTables.r
index ca356bb..699df3c 100644
--- a/R/LoadSyntheaTables.r
+++ b/R/LoadSyntheaTables.r
@@ -11,7 +11,7 @@
#' \cr\code{./run_synthea -p 1000}
#'
#' You can enable csv records in src/main/resources/synthea.properties by setting exporter.csv.export = true.
-#' As of the time of this writing the csv files can be found at synthe/output/csv.
+#' As of the time of this writing the csv files can be found at synthea/output/csv.
#' For more details: \href{https://github.com/synthetichealth/synthea/wiki/Basic-Setup-and-Running}{Synthea Basic Setup}
#'
#' @param connectionDetails An R object of type\cr\code{connectionDetails} created using the
@@ -35,7 +35,7 @@ LoadSyntheaTables <-
{
csvList <- list.files(syntheaFileLoc, pattern = "*.csv")
-
+
conn <- DatabaseConnector::connect(connectionDetails)
for (csv in csvList) {
@@ -60,6 +60,12 @@ LoadSyntheaTables <-
if ("DATE" %in% colnames(syntheaTable))
syntheaTable$DATE <-
as.Date(syntheaTable$DATE, format = "%Y-%m-%d")
+ if ("START_DATE" %in% colnames(syntheaTable))
+ syntheaTable$START_DATE <-
+ as.Date(syntheaTable$START_DATE, format = "%Y-%m-%d")
+ if ("END_DATE" %in% colnames(syntheaTable))
+ syntheaTable$END_DATE <-
+ as.Date(syntheaTable$END_DATE, format = "%Y-%m-%d")
if ("BIRTHDATE" %in% colnames(syntheaTable))
syntheaTable$BIRTHDATE <-
as.Date(syntheaTable$BIRTHDATE, format = "%Y-%m-%d")
diff --git a/inst/sql/sql_server/synthea_version/v310/create_synthea_tables.sql b/inst/sql/sql_server/synthea_version/v310/create_synthea_tables.sql
new file mode 100644
index 0000000..984c16b
--- /dev/null
+++ b/inst/sql/sql_server/synthea_version/v310/create_synthea_tables.sql
@@ -0,0 +1,326 @@
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.allergies (
+start date,
+stop date,
+patient varchar(1000),
+encounter varchar(1000),
+code varchar(100),
+system varchar(255),
+description varchar(255),
+"type" varchar(255),
+category varchar(255),
+reaction1 varchar(255),
+description1 varchar(255),
+severity1 varchar(255),
+reaction2 varchar(255),
+description2 varchar(255),
+severity2 varchar(255)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.careplans (
+id varchar(1000),
+start date,
+stop date,
+patient varchar(1000),
+encounter varchar(1000),
+code varchar(100),
+description varchar(255),
+reasoncode varchar(255),
+reasondescription varchar(255)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.conditions (
+start date,
+stop date,
+patient varchar(1000),
+encounter varchar(1000),
+code varchar(100),
+description varchar(255)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.encounters (
+id varchar(1000),
+start date,
+stop date,
+patient varchar(1000),
+organization varchar(1000),
+provider varchar(1000),
+payer varchar(1000),
+encounterclass varchar(1000),
+code varchar(100),
+description varchar(255),
+base_encounter_cost numeric,
+total_claim_cost numeric,
+payer_coverage numeric,
+reasoncode varchar(100),
+reasondescription varchar(255)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.immunizations (
+"date" date,
+patient varchar(1000),
+encounter varchar(1000),
+code varchar(100),
+description varchar(255),
+base_cost numeric
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.imaging_studies (
+id varchar(1000),
+"date" date,
+patient varchar(1000),
+encounter varchar(1000),
+series_uid varchar(1000),
+bodysite_code varchar(100),
+bodysite_description varchar(255),
+modality_code varchar(100),
+modality_description varchar(255),
+instance_uid varchar(1000),
+SOP_code varchar(100),
+SOP_description varchar(255),
+procedure_code varchar(255)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.medications (
+start date,
+stop date,
+patient varchar(1000),
+payer varchar(1000),
+encounter varchar(1000),
+code varchar(100),
+description varchar(1000),
+base_cost numeric,
+payer_coverage numeric,
+dispenses int,
+totalcost numeric,
+reasoncode varchar(100),
+reasondescription varchar(255)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.observations (
+"date" date,
+patient varchar(1000),
+encounter varchar(1000),
+category varchar(1000),
+code varchar(100),
+description varchar(255),
+value varchar(1000),
+units varchar(100),
+"type" varchar(100)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.organizations (
+id varchar(1000),
+"name" varchar(1000),
+address varchar(1000),
+city varchar(100),
+state varchar(100),
+zip varchar(100),
+lat numeric,
+lon numeric,
+phone varchar(100),
+revenue numeric,
+utilization varchar(100)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.patients (
+id varchar(1000),
+birthdate date,
+deathdate date,
+ssn varchar(100),
+drivers varchar(100),
+passport varchar(100),
+prefix varchar(100),
+first varchar(100),
+last varchar(100),
+suffix varchar(100),
+maiden varchar(100),
+marital varchar(100),
+race varchar(100),
+ethnicity varchar(100),
+gender varchar(100),
+birthplace varchar(100),
+address varchar(100),
+city varchar(100),
+state varchar(100),
+county varchar(100),
+fips varchar(100),
+zip varchar(100),
+lat numeric,
+lon numeric,
+healthcare_expenses numeric,
+healthcare_coverage numeric,
+income int
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.procedures (
+start date,
+stop date,
+patient varchar(1000),
+encounter varchar(1000),
+code varchar(100),
+description varchar(255),
+base_cost numeric,
+reasoncode varchar(1000),
+reasondescription varchar(1000)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.providers (
+id varchar(1000),
+organization varchar(1000),
+"name" varchar(100),
+gender varchar(100),
+speciality varchar(100),
+address varchar(255),
+city varchar(100),
+state varchar(100),
+zip varchar(100),
+lat numeric,
+lon numeric,
+encounters int,
+"procedures" int
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.devices (
+start date,
+stop date,
+patient varchar(1000),
+encounter varchar(1000),
+code varchar(100),
+description varchar(255),
+udi varchar(255)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.claims (
+ id varchar(1000),
+ patientid varchar(1000),
+ providerid varchar(1000),
+ primarypatientinsuranceid varchar(1000),
+ secondarypatientinsuranceid varchar(1000),
+ departmentid varchar(1000),
+ patientdepartmentid varchar(1000),
+ diagnosis1 varchar(1000),
+ diagnosis2 varchar(1000),
+ diagnosis3 varchar(1000),
+ diagnosis4 varchar(1000),
+ diagnosis5 varchar(1000),
+ diagnosis6 varchar(1000),
+ diagnosis7 varchar(1000),
+ diagnosis8 varchar(1000),
+ referringproviderid varchar(1000),
+ appointmentid varchar(1000),
+ currentillnessdate date,
+ servicedate date,
+ supervisingproviderid varchar(1000),
+ status1 varchar(1000),
+ status2 varchar(1000),
+ statusp varchar(1000),
+ outstanding1 numeric,
+ outstanding2 numeric,
+ outstandingp numeric,
+ lastbilleddate1 date,
+ lastbilleddate2 date,
+ lastbilleddatep date,
+ healthcareclaimtypeid1 numeric,
+ healthcareclaimtypeid2 numeric
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.claims_transactions (
+ id varchar(1000),
+ claimid varchar(1000),
+ chargeid numeric,
+ patientid varchar(1000),
+ "type" varchar(1000),
+ amount numeric,
+ method varchar(1000),
+ fromdate date,
+ todate date,
+ placeofservice varchar(1000),
+ procedurecode varchar(1000),
+ modifier1 varchar(1000),
+ modifier2 varchar(1000),
+ diagnosisref1 numeric,
+ diagnosisref2 numeric,
+ diagnosisref3 numeric,
+ diagnosisref4 numeric,
+ units numeric,
+ departmentid numeric,
+ notes varchar(1000),
+ unitamount numeric,
+ transferoutid numeric,
+ transfertype varchar(1000),
+ payments numeric,
+ adjustments numeric,
+ transfers numeric,
+ outstanding numeric,
+ appointmentid varchar(1000),
+ linenote varchar(1000),
+ patientinsuranceid varchar(1000),
+ feescheduleid numeric,
+ providerid varchar(1000),
+ supervisingproviderid varchar(1000)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.payer_transitions (
+ patient varchar(1000),
+ memberid varchar(1000),
+ start_date date,
+ end_date date,
+ payer varchar(1000),
+ secondary_payer varchar(1000),
+ plan_ownership varchar(1000),
+ owner_name varchar(1000)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.payers (
+ id varchar(1000),
+ "name" varchar(1000),
+ ownership varchar NULL,
+ address varchar(1000),
+ city varchar(1000),
+ state_headquartered varchar(1000),
+ zip varchar(1000),
+ phone varchar(1000),
+ amount_covered numeric,
+ amount_uncovered numeric,
+ revenue numeric,
+ covered_encounters numeric,
+ uncovered_encounters numeric,
+ covered_medications numeric,
+ uncovered_medications numeric,
+ covered_procedures numeric,
+ uncovered_procedures numeric,
+ covered_immunizations numeric,
+ uncovered_immunizations numeric,
+ unique_customers numeric,
+ qols_avg numeric,
+ member_months numeric
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.supplies (
+ "date" date,
+ patient varchar(1000),
+ encounter varchar(1000),
+ code varchar(1000),
+ description varchar(1000),
+ quantity numeric
+);
+
diff --git a/inst/sql/sql_server/synthea_version/v320/create_synthea_tables.sql b/inst/sql/sql_server/synthea_version/v320/create_synthea_tables.sql
new file mode 100644
index 0000000..984c16b
--- /dev/null
+++ b/inst/sql/sql_server/synthea_version/v320/create_synthea_tables.sql
@@ -0,0 +1,326 @@
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.allergies (
+start date,
+stop date,
+patient varchar(1000),
+encounter varchar(1000),
+code varchar(100),
+system varchar(255),
+description varchar(255),
+"type" varchar(255),
+category varchar(255),
+reaction1 varchar(255),
+description1 varchar(255),
+severity1 varchar(255),
+reaction2 varchar(255),
+description2 varchar(255),
+severity2 varchar(255)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.careplans (
+id varchar(1000),
+start date,
+stop date,
+patient varchar(1000),
+encounter varchar(1000),
+code varchar(100),
+description varchar(255),
+reasoncode varchar(255),
+reasondescription varchar(255)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.conditions (
+start date,
+stop date,
+patient varchar(1000),
+encounter varchar(1000),
+code varchar(100),
+description varchar(255)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.encounters (
+id varchar(1000),
+start date,
+stop date,
+patient varchar(1000),
+organization varchar(1000),
+provider varchar(1000),
+payer varchar(1000),
+encounterclass varchar(1000),
+code varchar(100),
+description varchar(255),
+base_encounter_cost numeric,
+total_claim_cost numeric,
+payer_coverage numeric,
+reasoncode varchar(100),
+reasondescription varchar(255)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.immunizations (
+"date" date,
+patient varchar(1000),
+encounter varchar(1000),
+code varchar(100),
+description varchar(255),
+base_cost numeric
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.imaging_studies (
+id varchar(1000),
+"date" date,
+patient varchar(1000),
+encounter varchar(1000),
+series_uid varchar(1000),
+bodysite_code varchar(100),
+bodysite_description varchar(255),
+modality_code varchar(100),
+modality_description varchar(255),
+instance_uid varchar(1000),
+SOP_code varchar(100),
+SOP_description varchar(255),
+procedure_code varchar(255)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.medications (
+start date,
+stop date,
+patient varchar(1000),
+payer varchar(1000),
+encounter varchar(1000),
+code varchar(100),
+description varchar(1000),
+base_cost numeric,
+payer_coverage numeric,
+dispenses int,
+totalcost numeric,
+reasoncode varchar(100),
+reasondescription varchar(255)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.observations (
+"date" date,
+patient varchar(1000),
+encounter varchar(1000),
+category varchar(1000),
+code varchar(100),
+description varchar(255),
+value varchar(1000),
+units varchar(100),
+"type" varchar(100)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.organizations (
+id varchar(1000),
+"name" varchar(1000),
+address varchar(1000),
+city varchar(100),
+state varchar(100),
+zip varchar(100),
+lat numeric,
+lon numeric,
+phone varchar(100),
+revenue numeric,
+utilization varchar(100)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.patients (
+id varchar(1000),
+birthdate date,
+deathdate date,
+ssn varchar(100),
+drivers varchar(100),
+passport varchar(100),
+prefix varchar(100),
+first varchar(100),
+last varchar(100),
+suffix varchar(100),
+maiden varchar(100),
+marital varchar(100),
+race varchar(100),
+ethnicity varchar(100),
+gender varchar(100),
+birthplace varchar(100),
+address varchar(100),
+city varchar(100),
+state varchar(100),
+county varchar(100),
+fips varchar(100),
+zip varchar(100),
+lat numeric,
+lon numeric,
+healthcare_expenses numeric,
+healthcare_coverage numeric,
+income int
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.procedures (
+start date,
+stop date,
+patient varchar(1000),
+encounter varchar(1000),
+code varchar(100),
+description varchar(255),
+base_cost numeric,
+reasoncode varchar(1000),
+reasondescription varchar(1000)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.providers (
+id varchar(1000),
+organization varchar(1000),
+"name" varchar(100),
+gender varchar(100),
+speciality varchar(100),
+address varchar(255),
+city varchar(100),
+state varchar(100),
+zip varchar(100),
+lat numeric,
+lon numeric,
+encounters int,
+"procedures" int
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.devices (
+start date,
+stop date,
+patient varchar(1000),
+encounter varchar(1000),
+code varchar(100),
+description varchar(255),
+udi varchar(255)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.claims (
+ id varchar(1000),
+ patientid varchar(1000),
+ providerid varchar(1000),
+ primarypatientinsuranceid varchar(1000),
+ secondarypatientinsuranceid varchar(1000),
+ departmentid varchar(1000),
+ patientdepartmentid varchar(1000),
+ diagnosis1 varchar(1000),
+ diagnosis2 varchar(1000),
+ diagnosis3 varchar(1000),
+ diagnosis4 varchar(1000),
+ diagnosis5 varchar(1000),
+ diagnosis6 varchar(1000),
+ diagnosis7 varchar(1000),
+ diagnosis8 varchar(1000),
+ referringproviderid varchar(1000),
+ appointmentid varchar(1000),
+ currentillnessdate date,
+ servicedate date,
+ supervisingproviderid varchar(1000),
+ status1 varchar(1000),
+ status2 varchar(1000),
+ statusp varchar(1000),
+ outstanding1 numeric,
+ outstanding2 numeric,
+ outstandingp numeric,
+ lastbilleddate1 date,
+ lastbilleddate2 date,
+ lastbilleddatep date,
+ healthcareclaimtypeid1 numeric,
+ healthcareclaimtypeid2 numeric
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.claims_transactions (
+ id varchar(1000),
+ claimid varchar(1000),
+ chargeid numeric,
+ patientid varchar(1000),
+ "type" varchar(1000),
+ amount numeric,
+ method varchar(1000),
+ fromdate date,
+ todate date,
+ placeofservice varchar(1000),
+ procedurecode varchar(1000),
+ modifier1 varchar(1000),
+ modifier2 varchar(1000),
+ diagnosisref1 numeric,
+ diagnosisref2 numeric,
+ diagnosisref3 numeric,
+ diagnosisref4 numeric,
+ units numeric,
+ departmentid numeric,
+ notes varchar(1000),
+ unitamount numeric,
+ transferoutid numeric,
+ transfertype varchar(1000),
+ payments numeric,
+ adjustments numeric,
+ transfers numeric,
+ outstanding numeric,
+ appointmentid varchar(1000),
+ linenote varchar(1000),
+ patientinsuranceid varchar(1000),
+ feescheduleid numeric,
+ providerid varchar(1000),
+ supervisingproviderid varchar(1000)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.payer_transitions (
+ patient varchar(1000),
+ memberid varchar(1000),
+ start_date date,
+ end_date date,
+ payer varchar(1000),
+ secondary_payer varchar(1000),
+ plan_ownership varchar(1000),
+ owner_name varchar(1000)
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.payers (
+ id varchar(1000),
+ "name" varchar(1000),
+ ownership varchar NULL,
+ address varchar(1000),
+ city varchar(1000),
+ state_headquartered varchar(1000),
+ zip varchar(1000),
+ phone varchar(1000),
+ amount_covered numeric,
+ amount_uncovered numeric,
+ revenue numeric,
+ covered_encounters numeric,
+ uncovered_encounters numeric,
+ covered_medications numeric,
+ uncovered_medications numeric,
+ covered_procedures numeric,
+ uncovered_procedures numeric,
+ covered_immunizations numeric,
+ uncovered_immunizations numeric,
+ unique_customers numeric,
+ qols_avg numeric,
+ member_months numeric
+);
+
+--HINT DISTRIBUTE_ON_RANDOM
+create table @synthea_schema.supplies (
+ "date" date,
+ patient varchar(1000),
+ encounter varchar(1000),
+ code varchar(1000),
+ description varchar(1000),
+ quantity numeric
+);
+
From d064f84d15e3adbb29447cdeac30abe29996932f Mon Sep 17 00:00:00 2001
From: Evanette Burrows
Date: Fri, 29 Dec 2023 12:32:52 -0500
Subject: [PATCH 02/10] Update package and documentation
---
R/CreateMapAndRollupTables.r | 7 +++----
docs/pkgdown.yml | 2 +-
docs/reference/CreateMapAndRollupTables.html | 6 +-----
docs/reference/CreateSyntheaTables.html | 2 +-
docs/reference/LoadSyntheaTables.html | 2 +-
man/CreateMapAndRollupTables.Rd | 4 +---
man/CreateSyntheaTables.Rd | 2 +-
man/LoadSyntheaTables.Rd | 2 +-
man/createExtraIndices.Rd | 2 +-
9 files changed, 11 insertions(+), 18 deletions(-)
diff --git a/R/CreateMapAndRollupTables.r b/R/CreateMapAndRollupTables.r
index 77378c7..71ac7d6 100644
--- a/R/CreateMapAndRollupTables.r
+++ b/R/CreateMapAndRollupTables.r
@@ -19,12 +19,11 @@
#' so for example 'cdm_instance.dbo'.
#' @param cdmVersion The version of your CDM. Currently "5.3" and "5.4".
#' @param syntheaVersion The version of Synthea used to generate the csv files.
-#' Currently "2.7.0" and "3.0.0" are supported.
+#' Currently "2.7.0", "3.0.0", "3.1.0" and "3.2.0" are supported.
#' @param cdmSourceName The source name to insert into the CDM_SOURCE table. Default is Synthea synthetic health database.
#' @param cdmSourceAbbreviation The source abbreviation to insert into the CDM_SOURCE table. Default is Synthea.
#' @param cdmHolder The holder to insert into the CDM_SOURCE table. Default is OHDSI
#' @param cdmSourceDescription The description of the source data. Default is generic Synthea description.
-#' @param createIndices A boolean that determines whether or not to create indices on CDM tables before the ETL.
#' @param sqlOnly A boolean that determines whether or not to perform the load or generate SQL scripts. Default is FALSE.
#'
#'@export
@@ -51,10 +50,10 @@ CreateMapAndRollupTables <- function(connectionDetails,
stop("Unsupported CDM specified. Supported CDM versions are \"5.3\" and \"5.4\".")
}
- supportedSyntheaVersions <- c("2.7.0", "3.0.0")
+ supportedSyntheaVersions <- c("2.7.0", "3.0.0","3.1.0","3.2.0")
if (!(syntheaVersion %in% supportedSyntheaVersions))
- stop("Invalid Synthea version specified. Currently \"2.7.0\" and \"3.0.0\" are supported.")
+ stop("Invalid Synthea version specified. Currently \"2.7.0\", \"3.0.0\",\"3.1.0\", and \"3.2.0\" are supported.")
# Create Vocabulary mapping tables
CreateVocabMapTables(connectionDetails, cdmSchema, cdmVersion, sqlOnly)
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
index 6c848e0..77fc03b 100644
--- a/docs/pkgdown.yml
+++ b/docs/pkgdown.yml
@@ -16,5 +16,5 @@ articles:
provider: provider.html
visit_detail: visit_detail.html
visit_occurrence: visit_occurrence.html
-last_built: 2023-12-28T20:36Z
+last_built: 2023-12-29T17:31Z
diff --git a/docs/reference/CreateMapAndRollupTables.html b/docs/reference/CreateMapAndRollupTables.html
index 9e9612b..c6e21be 100644
--- a/docs/reference/CreateMapAndRollupTables.html
+++ b/docs/reference/CreateMapAndRollupTables.html
@@ -138,7 +138,7 @@ Arguments
syntheaVersion
The version of Synthea used to generate the csv files.
-Currently "2.7.0" and "3.0.0" are supported.
+Currently "2.7.0", "3.0.0", "3.1.0" and "3.2.0" are supported.
cdmSourceName
@@ -160,10 +160,6 @@ Arguments
sqlOnly
A boolean that determines whether or not to perform the load or generate SQL scripts. Default is FALSE.
-
-createIndices
-A boolean that determines whether or not to create indices on CDM tables before the ETL.
-
Details
diff --git a/docs/reference/CreateSyntheaTables.html b/docs/reference/CreateSyntheaTables.html
index 3af41c0..dc14d45 100644
--- a/docs/reference/CreateSyntheaTables.html
+++ b/docs/reference/CreateSyntheaTables.html
@@ -113,7 +113,7 @@
Arguments
syntheaVersion
The version of Synthea used to generate the csv files.
-Currently "2.7.0" and "3.0.0" are is supported.
+Currently "2.7.0", "3.0.0", "3.1.0", and "3.2.0" are is supported.
diff --git a/docs/reference/LoadSyntheaTables.html b/docs/reference/LoadSyntheaTables.html
index fd6c9c7..a366282 100644
--- a/docs/reference/LoadSyntheaTables.html
+++ b/docs/reference/LoadSyntheaTables.html
@@ -134,7 +134,7 @@ Details
./gradlew build check test
./run_synthea -p 1000
You can enable csv records in src/main/resources/synthea.properties by setting exporter.csv.export = true.
- As of the time of this writing the csv files can be found at synthe/output/csv.
+ As of the time of this writing the csv files can be found at synthea/output/csv.
For more details: Synthea Basic Setup
diff --git a/man/CreateMapAndRollupTables.Rd b/man/CreateMapAndRollupTables.Rd
index aa0f651..b4732da 100644
--- a/man/CreateMapAndRollupTables.Rd
+++ b/man/CreateMapAndRollupTables.Rd
@@ -36,7 +36,7 @@ so for example 'cdm_instance.dbo'.}
\item{cdmVersion}{The version of your CDM. Currently "5.3" and "5.4".}
\item{syntheaVersion}{The version of Synthea used to generate the csv files.
-Currently "2.7.0" and "3.0.0" are supported.}
+Currently "2.7.0", "3.0.0", "3.1.0" and "3.2.0" are supported.}
\item{cdmSourceName}{The source name to insert into the CDM_SOURCE table. Default is Synthea synthetic health database.}
@@ -47,8 +47,6 @@ Currently "2.7.0" and "3.0.0" are supported.}
\item{cdmSourceDescription}{The description of the source data. Default is generic Synthea description.}
\item{sqlOnly}{A boolean that determines whether or not to perform the load or generate SQL scripts. Default is FALSE.}
-
-\item{createIndices}{A boolean that determines whether or not to create indices on CDM tables before the ETL.}
}
\description{
This function creates the vocabulary mapping and visit roll-up intermediate tables from created
diff --git a/man/CreateSyntheaTables.Rd b/man/CreateSyntheaTables.Rd
index 62382d7..03a4d72 100644
--- a/man/CreateSyntheaTables.Rd
+++ b/man/CreateSyntheaTables.Rd
@@ -17,7 +17,7 @@ Server, this should specify both the database and the schema,
so for example 'cdm_instance.dbo'.}
\item{syntheaVersion}{The version of Synthea used to generate the csv files.
-Currently "2.7.0" and "3.0.0" are is supported.}
+Currently "2.7.0", "3.0.0", "3.1.0", and "3.2.0" are is supported.}
}
\description{
This function creates all Synthea tables.
diff --git a/man/LoadSyntheaTables.Rd b/man/LoadSyntheaTables.Rd
index 7c22fe6..e5c9107 100644
--- a/man/LoadSyntheaTables.Rd
+++ b/man/LoadSyntheaTables.Rd
@@ -38,6 +38,6 @@ This function assumes \cr\code{createSyntheaTables()} has already been run. Add
\cr\code{./run_synthea -p 1000}
You can enable csv records in src/main/resources/synthea.properties by setting exporter.csv.export = true.
- As of the time of this writing the csv files can be found at synthe/output/csv.
+ As of the time of this writing the csv files can be found at synthea/output/csv.
For more details: \href{https://github.com/synthetichealth/synthea/wiki/Basic-Setup-and-Running}{Synthea Basic Setup}
}
diff --git a/man/createExtraIndices.Rd b/man/createExtraIndices.Rd
index 158d861..f003b4e 100644
--- a/man/createExtraIndices.Rd
+++ b/man/createExtraIndices.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CreateExtraIndices.R
+% Please edit documentation in R/createExtraIndices.R
\name{CreateExtraIndices}
\alias{CreateExtraIndices}
\title{Create Optional Extra Indices for ETL Performance}
From 0e377d2bf3ee438b3fa672b5b595fe3072e060c1 Mon Sep 17 00:00:00 2001
From: Evanette Burrows
Date: Fri, 29 Dec 2023 14:03:40 -0500
Subject: [PATCH 03/10] Add synthea version to source description
Resolves https://github.com/OHDSI/ETL-Synthea/issues/168
---
R/LoadEventTables.r | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/R/LoadEventTables.r b/R/LoadEventTables.r
index a381fa9..491caba 100644
--- a/R/LoadEventTables.r
+++ b/R/LoadEventTables.r
@@ -233,7 +233,7 @@ LoadEventTables <- function(connectionDetails,
cdm_source_name = cdmSourceName,
cdm_source_abbreviation = cdmSourceAbbreviation,
cdm_holder = cdmHolder,
- source_description = cdmSourceDescription
+ source_description = paste("Synthea version: ",syntheaVersion," ",cdmSourceDescription)
)
runStep(sql, fileQuery)
From 4d46e237fa3cc8c073d9328e86bb5079675e0144 Mon Sep 17 00:00:00 2001
From: Evanette Burrows
Date: Fri, 29 Dec 2023 14:25:25 -0500
Subject: [PATCH 04/10] Update load scripts and documentation
---
R/LoadEventTables.r | 8 ++++----
.../cdm_version/v531/insert_payer_plan_period.sql | 7 ++++++-
.../cdm_version/v540/insert_payer_plan_period.sql | 5 +++++
vignettes/Payer_plan_period.Rmd | 4 ++--
4 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/R/LoadEventTables.r b/R/LoadEventTables.r
index 491caba..568e07a 100644
--- a/R/LoadEventTables.r
+++ b/R/LoadEventTables.r
@@ -18,7 +18,7 @@
#' so for example 'cdm_instance.dbo'.
#' @param cdmVersion The version of your CDM. Currently "5.3" and "5.4".
#' @param syntheaVersion The version of Synthea used to generate the csv files.
-#' Currently "2.7.0" and "3.0.0" are supported.
+#' Currently "2.7.0","3.0.0","3.1.0" and "3.2.0" are supported.
#' @param cdmSourceName The source name to insert into the CDM_SOURCE table. Default is Synthea synthetic health database.
#' @param cdmSourceAbbreviation The source abbreviation to insert into the CDM_SOURCE table. Default is Synthea.
#' @param cdmHolder The holder to insert into the CDM_SOURCE table. Default is OHDSI
@@ -51,10 +51,10 @@ LoadEventTables <- function(connectionDetails,
stop("Unsupported CDM specified. Supported CDM versions are \"5.3\" and \"5.4\".")
}
- supportedSyntheaVersions <- c("2.7.0", "3.0.0")
+ supportedSyntheaVersions <- c("2.7.0", "3.0.0","3.1.0","3.2.0")
if (!(syntheaVersion %in% supportedSyntheaVersions))
- stop("Invalid Synthea version specified. Currently \"2.7.0\" and \"3.0.0\" are supported.")
+ stop("Invalid Synthea version specified. Currently \"2.7.0\", \"3.0.0\",\"3.1.0\", and \"3.2.0\" are supported.")
if (createIndices) {
print("Creating Indices on CDM Tables....")
@@ -274,7 +274,7 @@ LoadEventTables <- function(connectionDetails,
# cost
if (syntheaVersion == "2.7.0")
fileQuery <- "insert_cost_v270.sql"
- else if (syntheaVersion == "3.0.0")
+ else if (syntheaVersion %in% c("3.0.0","3.1.0","3.2.0"))
fileQuery <- "insert_cost_v300.sql"
sql <- SqlRender::loadRenderTranslateSql(
diff --git a/inst/sql/sql_server/cdm_version/v531/insert_payer_plan_period.sql b/inst/sql/sql_server/cdm_version/v531/insert_payer_plan_period.sql
index b9b356b..bf95ea9 100644
--- a/inst/sql/sql_server/cdm_version/v531/insert_payer_plan_period.sql
+++ b/inst/sql/sql_server/cdm_version/v531/insert_payer_plan_period.sql
@@ -31,7 +31,12 @@ select ROW_NUMBER()OVER(ORDER BY pat.id, pt.start_year) payer_plan_period_id,
CAST(pt.start_year AS DATE) payer_plan_period_start_date,
CAST(pt.end_year AS DATE) payer_plan_period_end_date,
}
-
+
+ {@synthea_version == "3.1.0" | @synthea_version == "3.2.0" } ? {
+ CAST(pt.start_date AS DATE) payer_plan_period_start_date,
+ CAST(pt.end_date AS DATE) payer_plan_period_end_date,
+ }
+
0 payer_concept_id,
pt.payer payer_source_value,
0 payer_source_concept_id,
diff --git a/inst/sql/sql_server/cdm_version/v540/insert_payer_plan_period.sql b/inst/sql/sql_server/cdm_version/v540/insert_payer_plan_period.sql
index b9b356b..06812e0 100644
--- a/inst/sql/sql_server/cdm_version/v540/insert_payer_plan_period.sql
+++ b/inst/sql/sql_server/cdm_version/v540/insert_payer_plan_period.sql
@@ -31,6 +31,11 @@ select ROW_NUMBER()OVER(ORDER BY pat.id, pt.start_year) payer_plan_period_id,
CAST(pt.start_year AS DATE) payer_plan_period_start_date,
CAST(pt.end_year AS DATE) payer_plan_period_end_date,
}
+
+ {@synthea_version == "3.1.0" | @synthea_version == "3.2.0" } ? {
+ CAST(pt.start_date AS DATE) payer_plan_period_start_date,
+ CAST(pt.end_date AS DATE) payer_plan_period_end_date,
+ }
0 payer_concept_id,
pt.payer payer_source_value,
diff --git a/vignettes/Payer_plan_period.Rmd b/vignettes/Payer_plan_period.Rmd
index 5e9e5f3..6853bc6 100644
--- a/vignettes/Payer_plan_period.Rmd
+++ b/vignettes/Payer_plan_period.Rmd
@@ -17,8 +17,8 @@ vignette: >
| --- | --- | --- | --- |
| payer_plan_period | |Autogenerated based on the patient.id and payer transition start year | |
| person_id | patient | Map by mapping person.person_source_value to payer_transitions.patient. Find person.person_id by mapping payer_transitions.patient to person.person_source_value. | |
-| payer_plan_period_start_date | start_year | For synthea v2.7.0 CAST(CONCAT('01-JAN-',CAST(pt.start_year AS VARCHAR)) AS DATE) For synthea v3.0.0 CAST(pt.start_year AS DATE) | |
-| payer_plan_period_start_date | end_year | For synthea v2.7.0 CAST(CONCAT('01-JAN-',CAST(pt.end_year AS VARCHAR)) AS DATE) For synthea v3.0.0 CAST(pt.end_year AS DATE) | |
+| payer_plan_period_start_date | start_year | For synthea v2.7.0 CAST(CONCAT('01-JAN-',CAST(pt.start_year AS VARCHAR)) AS DATE) For synthea v3.0.0 CAST(pt.start_year AS DATE) For synthea v3.1.0 and v3.2.0 CAST(pt.start_date AS DATE) | |
+| payer_plan_period_start_date | end_year | For synthea v2.7.0 CAST(CONCAT('01-JAN-',CAST(pt.end_year AS VARCHAR)) AS DATE) For synthea v3.0.0 CAST(pt.end_year AS DATE) For synthea v3.1.0 and v3.2.0 CAST(pt.end_date AS DATE) | |
| payer_concept_id | | Set to 0 for all records | |
| payer_source_value | payer | | |
| payer_source_concept_id | | Set to 0 for all records | |
From 3428ffc38616fe21bc265fc6326f870845f78a2a Mon Sep 17 00:00:00 2001
From: Evanette Burrows
Date: Fri, 29 Dec 2023 14:42:15 -0500
Subject: [PATCH 05/10] Update additional files
---
R/DropSyntheaTables.r | 4 ++--
R/createExtraIndices.R | 4 ++--
inst/sql/sql_server/extra_indices.sql | 2 +-
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/R/DropSyntheaTables.r b/R/DropSyntheaTables.r
index 4c81280..4814e65 100644
--- a/R/DropSyntheaTables.r
+++ b/R/DropSyntheaTables.r
@@ -40,7 +40,7 @@ DropSyntheaTables <- function(connectionDetails, syntheaSchema)
conn <- DatabaseConnector::connect(connectionDetails)
allTables <- DatabaseConnector::getTableNames(conn, syntheaSchema)
tablesToDrop <- allTables[which(allTables %in% syntheaTables)]
-
+
if (length(tablesToDrop) > 0) {
writeLines("Dropping Synthea tables...")
sql <-
@@ -57,6 +57,6 @@ DropSyntheaTables <- function(connectionDetails, syntheaSchema)
} else {
print(sprintf("No synthea tables to drop in schema %s",syntheaSchema))
}
-
+
on.exit(DatabaseConnector::disconnect(conn))
}
diff --git a/R/createExtraIndices.R b/R/createExtraIndices.R
index b4899a9..9d435d8 100644
--- a/R/createExtraIndices.R
+++ b/R/createExtraIndices.R
@@ -8,13 +8,13 @@
#' Server, this should specify both the database and the schema, so for example 'cdm_instance.dbo'.
#' @param syntheaSchema The name of the Synthea database schema. Requires read and write permissions to this schema. On SQL
#' Server, this should specify both the database and the schema, so for example 'synthea.dbo'.
-#' @param syntheaVersion Your Synthea version. Currently "2.7.0" and "3.0.0" are supported.
+#' @param syntheaVersion Your Synthea version. Currently "2.7.0", "3.0.0", "3.1.0" and "3.2.0" are supported.
#' @param outputFolder Location of the SQL scripts if sqlOnly = TRUE. Default is NULL.
#' @param sqlOnly A boolean that determines whether to create the indices or generate a SQL scripts. Default is FALSE.
#'
#' @details This function creates indices which have been found to speed up certain long-running INSERT queries in LoadEventTables,
#' for some users. Indices are created on the intermediate vocabulary mapping tables; the person & provider CDM tables;
-#' and the claims_transactions Synthea table (in Synthea 3.0.0).
+#' and the claims_transactions Synthea table (in Synthea 3.0.0, 3.1.0 and 3.2.0) .
#'
#' @importFrom utils head
#'
diff --git a/inst/sql/sql_server/extra_indices.sql b/inst/sql/sql_server/extra_indices.sql
index 7a703ec..3bbdb97 100644
--- a/inst/sql/sql_server/extra_indices.sql
+++ b/inst/sql/sql_server/extra_indices.sql
@@ -31,7 +31,7 @@ CREATE INDEX person_psv ON @cdmDatabaseSchema.person (
person_source_value
);
-{@syntheaVersion == '3.0.0'}?{
+{@syntheaVersion == '3.0.0' | @syntheaVersion == '3.1.0' | @syntheaVersion == '3.2.0' }?{
CREATE INDEX claims_transactions_cpap ON @syntheaSchema.claims_transactions (
claimid,
patientid,
From 2747d1658599fe87ef6bd72296a042b75807594b Mon Sep 17 00:00:00 2001
From: Evanette Burrows
Date: Fri, 29 Dec 2023 14:46:05 -0500
Subject: [PATCH 06/10] Update overall package documentation
---
docs/articles/Payer_plan_period.html | 6 ++++--
docs/pkgdown.yml | 2 +-
docs/reference/LoadEventTables.html | 2 +-
docs/reference/createExtraIndices.html | 4 ++--
extras/codeToRun.R | 4 ++++
man/LoadEventTables.Rd | 2 +-
man/createExtraIndices.Rd | 4 ++--
7 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/docs/articles/Payer_plan_period.html b/docs/articles/Payer_plan_period.html
index 3b49cd2..6206775 100644
--- a/docs/articles/Payer_plan_period.html
+++ b/docs/articles/Payer_plan_period.html
@@ -157,14 +157,16 @@