Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update package to support synthea 3.1.0 and 3.2.0 #178

Merged
merged 10 commits into from
Jan 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
Package: ETLSyntheaBuilder
Type: Package
Title: A Builder for Converting the Synthea Data to the OMOP CDM
Version: 1.0
Version: 2.0
mDate: 2021-12-04
Author: Anthony Molinaro [aut, cre],
Clair Blacketer [aut],
Frank DeFalco [aut]
Maintainer: Anthony Molinaro <[email protected]>
Frank DeFalco [aut],
Evanette Burrows [aut]
Maintainer: Evanette Burrows <[email protected]>
Description: ETL and Builder to convert Synthea Data to the OMOP CDM.
Staring with csv files for an OMOP Vocabulary and csv files for Synthea,
this package creates database tables from these csv files and maps them
Expand Down
58 changes: 29 additions & 29 deletions R/CreateCDMTables.r
Original file line number Diff line number Diff line change
Expand Up @@ -26,41 +26,41 @@ CreateCDMTables <-
cdmSchema,
cdmVersion,
outputFolder = NULL,
createIndices = FALSE,
createIndices = FALSE,
sqlOnly = FALSE)
{
if (!sqlOnly) {

print("Creating CDM Tables....")

CommonDataModel::executeDdl(
connectionDetails = connectionDetails,
cdmVersion = cdmVersion,
cdmDatabaseSchema = cdmSchema,
executeDdl = TRUE,
executePrimaryKey = TRUE,
executeForeignKey = FALSE
) # False for now due to bug: https://github.com/OHDSI/CommonDataModel/issues/452
print("Creating CDM Tables....")

print("CDM Tables Created.")

if (createIndices) {

print("Creating Indices on CDM Tables....")
CommonDataModel::executeDdl(
connectionDetails = connectionDetails,
cdmVersion = cdmVersion,
cdmDatabaseSchema = cdmSchema,
executeDdl = TRUE,
executePrimaryKey = TRUE,
executeForeignKey = FALSE
) # False for now due to bug: https://github.com/OHDSI/CommonDataModel/issues/452

print("CDM Tables Created.")

if (createIndices) {
print("Creating Indices on CDM Tables....")

indexSQLFile <- CommonDataModel::writeIndex(
targetDialect = connectionDetails$dbms,
cdmVersion = cdmVersion,
cdmDatabaseSchema = cdmSchema,
outputfolder = tempdir())
indexSQLFile <- CommonDataModel::writeIndex(
targetDialect = connectionDetails$dbms,
cdmVersion = cdmVersion,
cdmDatabaseSchema = cdmSchema,
outputfolder = tempdir()
)

indexDDL <-
SqlRender::readSql(paste0(tempdir(), "/", indexSQLFile))
conn <- DatabaseConnector::connect(connectionDetails)
DatabaseConnector::executeSql(conn, indexDDL)
DatabaseConnector::disconnect(conn)
print("Index Creation Complete.")
}

indexDDL <- SqlRender::readSql(paste0(tempdir(),"/",indexSQLFile))
conn <- DatabaseConnector::connect(connectionDetails)
DatabaseConnector::executeSql(conn,indexDDL)
DatabaseConnector::disconnect(conn)
print("Index Creation Complete.")
}

} else {
if (is.null(outputFolder)) {
stop("Must specify an outputFolder location when using sqlOnly = TRUE")
Expand Down
39 changes: 17 additions & 22 deletions R/CreateMapAndRollupTables.r
Original file line number Diff line number Diff line change
Expand Up @@ -19,42 +19,39 @@
#' so for example 'cdm_instance.dbo'.
#' @param cdmVersion The version of your CDM. Currently "5.3" and "5.4".
#' @param syntheaVersion The version of Synthea used to generate the csv files.
#' Currently "2.7.0" and "3.0.0" are supported.
#' Currently "2.7.0", "3.0.0", "3.1.0" and "3.2.0" are supported.
#' @param cdmSourceName The source name to insert into the CDM_SOURCE table. Default is Synthea synthetic health database.
#' @param cdmSourceAbbreviation The source abbreviation to insert into the CDM_SOURCE table. Default is Synthea.
#' @param cdmHolder The holder to insert into the CDM_SOURCE table. Default is OHDSI
#' @param cdmSourceDescription The description of the source data. Default is generic Synthea description.
#' @param createIndices A boolean that determines whether or not to create indices on CDM tables before the ETL.
#' @param sqlOnly A boolean that determines whether or not to perform the load or generate SQL scripts. Default is FALSE.
#'
#'@export


CreateMapAndRollupTables <- function(connectionDetails,
cdmSchema,
syntheaSchema,
cdmVersion,
syntheaVersion = "2.7.0",
cdmSourceName = "Synthea synthetic health database",
cdmSourceAbbreviation = "Synthea",
cdmHolder = "OHDSI",
cdmSourceDescription = "SyntheaTM is a Synthetic Patient Population Simulator. The goal is to output synthetic, realistic (but not real), patient data and associated health records in a variety of formats.",
sqlOnly = FALSE)
cdmSchema,
syntheaSchema,
cdmVersion,
syntheaVersion = "2.7.0",
cdmSourceName = "Synthea synthetic health database",
cdmSourceAbbreviation = "Synthea",
cdmHolder = "OHDSI",
cdmSourceDescription = "SyntheaTM is a Synthetic Patient Population Simulator. The goal is to output synthetic, realistic (but not real), patient data and associated health records in a variety of formats.",
sqlOnly = FALSE)
{
# Determine which sql scripts to run based on the given version.
# The path is relative to inst/sql/sql_server.
if (cdmVersion == "5.3") {
sqlFilePath <- "cdm_version/v531"
} else if (cdmVersion == "5.4") {
sqlFilePath <- "cdm_version/v540"
} else {
supportedCDMVersions <- c("5.3", "5.4")

if (!(cdmVersion %in% supportedCDMVersions)) {
stop("Unsupported CDM specified. Supported CDM versions are \"5.3\" and \"5.4\".")
}

supportedSyntheaVersions <- c("2.7.0", "3.0.0")
supportedSyntheaVersions <- c("2.7.0", "3.0.0", "3.1.0", "3.2.0")

if (!(syntheaVersion %in% supportedSyntheaVersions))
stop("Invalid Synthea version specified. Currently \"2.7.0\" and \"3.0.0\" are supported.")
stop(
"Invalid Synthea version specified. Currently \"2.7.0\", \"3.0.0\",\"3.1.0\", and \"3.2.0\" are supported."
)

# Create Vocabulary mapping tables
CreateVocabMapTables(connectionDetails, cdmSchema, cdmVersion, sqlOnly)
Expand All @@ -67,5 +64,3 @@ CreateMapAndRollupTables <- function(connectionDetails,
sqlOnly)

}


19 changes: 13 additions & 6 deletions R/CreateSyntheaTables.r
Original file line number Diff line number Diff line change
Expand Up @@ -10,25 +10,32 @@
#' Server, this should specify both the database and the schema,
#' so for example 'cdm_instance.dbo'.
#' @param syntheaVersion The version of Synthea used to generate the csv files.
#' Currently "2.7.0" and "3.0.0" are is supported.
#' Currently "2.7.0", "3.0.0", "3.1.0", and "3.2.0" are is supported.
#'
#'@export



CreateSyntheaTables <-
function (connectionDetails,
syntheaSchema,
syntheaVersion = "2.7.0")
function(connectionDetails,
syntheaSchema,
syntheaVersion = "2.7.0")
{
if (syntheaVersion == "2.7.0")
sqlFilePath <- "synthea_version/v270"
else if (syntheaVersion == "3.0.0")
sqlFilePath <- "synthea_version/v300"
else if (syntheaVersion == "3.1.0")
sqlFilePath <- "synthea_version/v310"
else if (syntheaVersion == "3.2.0")
sqlFilePath <- "synthea_version/v320"
else
stop("Invalid synthea version specified. Currently \"2.7.0\" and \"3.0.0\" are supported.")
stop(
"Invalid synthea version specified. Currently \"2.7.0\", \"3.0.0\", \"3.1.0\" and \"3.2.0\" are supported."
)

sqlFilename <- paste0(sqlFilePath, "/", "create_synthea_tables.sql")
sqlFilename <-
paste0(sqlFilePath, "/", "create_synthea_tables.sql")

translatedSql <- SqlRender::loadRenderTranslateSql(
sqlFilename = sqlFilename,
Expand Down
41 changes: 21 additions & 20 deletions R/DropSyntheaTables.r
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ DropSyntheaTables <- function(connectionDetails, syntheaSchema)
syntheaTables <- c(
"ALLERGIES",
"CAREPLANS",
"CLAIMS",
"CLAIMS_TRANSACTIONS",
"CLAIMS",
"CLAIMS_TRANSACTIONS",
"CONDITIONS",
"DEVICES",
"ENCOUNTERS",
Expand All @@ -30,33 +30,34 @@ DropSyntheaTables <- function(connectionDetails, syntheaSchema)
"OBSERVATIONS",
"ORGANIZATIONS",
"PATIENTS",
"PAYERS",
"PAYER_TRANSITIONS",
"PAYERS",
"PAYER_TRANSITIONS",
"PROCEDURES",
"PROVIDERS",
"SUPPLIES"
"SUPPLIES"
)

conn <- DatabaseConnector::connect(connectionDetails)
allTables <- DatabaseConnector::getTableNames(conn, syntheaSchema)
tablesToDrop <- allTables[which(allTables %in% syntheaTables)]

if (length(tablesToDrop) > 0) {
writeLines("Dropping Synthea tables...")
sql <-
paste(
"drop table @synthea_schema.",
tablesToDrop,
";",
collapse = "\n",
sep = ""
)
sql <- SqlRender::render(sql, synthea_schema = syntheaSchema)
sql <- SqlRender::translate(sql, targetDialect = connectionDetails$dbms)
DatabaseConnector::executeSql(conn, sql)
writeLines("Dropping Synthea tables...")
sql <-
paste(
"drop table @synthea_schema.",
tablesToDrop,
";",
collapse = "\n",
sep = ""
)
sql <- SqlRender::render(sql, synthea_schema = syntheaSchema)
sql <-
SqlRender::translate(sql, targetDialect = connectionDetails$dbms)
DatabaseConnector::executeSql(conn, sql)
} else {
print(sprintf("No synthea tables to drop in schema %s",syntheaSchema))
print(sprintf("No synthea tables to drop in schema %s", syntheaSchema))
}

on.exit(DatabaseConnector::disconnect(conn))
}
59 changes: 31 additions & 28 deletions R/LoadEventTables.r
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#' so for example 'cdm_instance.dbo'.
#' @param cdmVersion The version of your CDM. Currently "5.3" and "5.4".
#' @param syntheaVersion The version of Synthea used to generate the csv files.
#' Currently "2.7.0" and "3.0.0" are supported.
#' Currently "2.7.0","3.0.0","3.1.0" and "3.2.0" are supported.
#' @param cdmSourceName The source name to insert into the CDM_SOURCE table. Default is Synthea synthetic health database.
#' @param cdmSourceAbbreviation The source abbreviation to insert into the CDM_SOURCE table. Default is Synthea.
#' @param cdmHolder The holder to insert into the CDM_SOURCE table. Default is OHDSI
Expand All @@ -39,7 +39,7 @@ LoadEventTables <- function(connectionDetails,
cdmHolder = "OHDSI",
cdmSourceDescription = "SyntheaTM is a Synthetic Patient Population Simulator. The goal is to output synthetic, realistic (but not real), patient data and associated health records in a variety of formats.",
createIndices = FALSE,
sqlOnly = FALSE)
sqlOnly = FALSE)
{
# Determine which sql scripts to run based on the given version.
# The path is relative to inst/sql/sql_server.
Expand All @@ -51,25 +51,28 @@ LoadEventTables <- function(connectionDetails,
stop("Unsupported CDM specified. Supported CDM versions are \"5.3\" and \"5.4\".")
}

supportedSyntheaVersions <- c("2.7.0", "3.0.0")
supportedSyntheaVersions <- c("2.7.0", "3.0.0", "3.1.0", "3.2.0")

if (!(syntheaVersion %in% supportedSyntheaVersions))
stop("Invalid Synthea version specified. Currently \"2.7.0\" and \"3.0.0\" are supported.")
stop(
"Invalid Synthea version specified. Currently \"2.7.0\", \"3.0.0\",\"3.1.0\", and \"3.2.0\" are supported."
)

if (createIndices) {
print("Creating Indices on CDM Tables....")
print("Creating Indices on CDM Tables....")

indexSQLFile <- CommonDataModel::writeIndex(
targetDialect = connectionDetails$dbms,
cdmVersion = cdmVersion,
cdmDatabaseSchema = cdmSchema,
outputfolder = tempdir())
indexSQLFile <- CommonDataModel::writeIndex(
targetDialect = connectionDetails$dbms,
cdmVersion = cdmVersion,
cdmDatabaseSchema = cdmSchema,
outputfolder = tempdir()
)

indexDDL <- SqlRender::readSql(paste0(tempdir(),"/",indexSQLFile))
conn <- DatabaseConnector::connect(connectionDetails)
DatabaseConnector::executeSql(conn,indexDDL)
DatabaseConnector::disconnect(conn)
print("Index Creation Complete.")
indexDDL <- SqlRender::readSql(paste0(tempdir(), "/", indexSQLFile))
conn <- DatabaseConnector::connect(connectionDetails)
DatabaseConnector::executeSql(conn, indexDDL)
DatabaseConnector::disconnect(conn)
print("Index Creation Complete.")
}

if (!sqlOnly) {
Expand Down Expand Up @@ -233,7 +236,7 @@ LoadEventTables <- function(connectionDetails,
cdm_source_name = cdmSourceName,
cdm_source_abbreviation = cdmSourceAbbreviation,
cdm_holder = cdmHolder,
source_description = cdmSourceDescription
source_description = paste("Synthea version: ", syntheaVersion, " ", cdmSourceDescription)
)
runStep(sql, fileQuery)

Expand Down Expand Up @@ -267,24 +270,24 @@ LoadEventTables <- function(connectionDetails,
dbms = connectionDetails$dbms,
cdm_schema = cdmSchema,
synthea_schema = syntheaSchema,
synthea_version = syntheaVersion
synthea_version = syntheaVersion
)
runStep(sql, fileQuery)

# cost
if (syntheaVersion == "2.7.0")
fileQuery <- "insert_cost_v270.sql"
else if (syntheaVersion == "3.0.0")
fileQuery <- "insert_cost_v300.sql"
fileQuery <- "insert_cost_v270.sql"
else if (syntheaVersion %in% c("3.0.0", "3.1.0", "3.2.0"))
fileQuery <- "insert_cost_v300.sql"

sql <- SqlRender::loadRenderTranslateSql(
sqlFilename = file.path(sqlFilePath, fileQuery),
packageName = "ETLSyntheaBuilder",
dbms = connectionDetails$dbms,
cdm_schema = cdmSchema,
synthea_schema = syntheaSchema
)
runStep(sql, fileQuery)
sql <- SqlRender::loadRenderTranslateSql(
sqlFilename = file.path(sqlFilePath, fileQuery),
packageName = "ETLSyntheaBuilder",
dbms = connectionDetails$dbms,
cdm_schema = cdmSchema,
synthea_schema = syntheaSchema
)
runStep(sql, fileQuery)

if (!sqlOnly) {
DatabaseConnector::disconnect(conn)
Expand Down
Loading
Loading