refactoring of create and write DDL
This commit is contained in:
parent
0f1d9333dd
commit
f8cd93d159
|
@ -18,7 +18,7 @@
|
||||||
#' Create a DDL script from a two csv files that detail the OMOP CDM Specifications. These files also form the basis of the CDM documentation and the Data Quality
|
#' Create a DDL script from a two csv files that detail the OMOP CDM Specifications. These files also form the basis of the CDM documentation and the Data Quality
|
||||||
#' Dashboard.
|
#' Dashboard.
|
||||||
#'
|
#'
|
||||||
#' @param cdmVersionNum The version of the CDM you are creating
|
#' @param cdmVersionNum The version of the CDM you are creating, e.g. 5.3.1
|
||||||
#'
|
#'
|
||||||
#' @param cdmTableCsvLoc The location of the csv file with the high-level CDM table information. This is defaulted to "inst/csv/OMOP_CDMv5.3.1_Table_Level.csv".
|
#' @param cdmTableCsvLoc The location of the csv file with the high-level CDM table information. This is defaulted to "inst/csv/OMOP_CDMv5.3.1_Table_Level.csv".
|
||||||
#' If a new version of this file was committed to the CDM repository the package automatically will grab it and place it in "inst/csv/".
|
#' If a new version of this file was committed to the CDM repository the package automatically will grab it and place it in "inst/csv/".
|
||||||
|
@ -27,57 +27,57 @@
|
||||||
#' @param outputFile The name of the output ddl sql file. This is defaulted to a location in the inst/sql/sql server folder and named with today's date and the CDM version.
|
#' @param outputFile The name of the output ddl sql file. This is defaulted to a location in the inst/sql/sql server folder and named with today's date and the CDM version.
|
||||||
#' @export
|
#' @export
|
||||||
|
|
||||||
createDdlFromFile <- function(cdmVersionNum = cdmVersion,
|
createDdlFromFile <- function(cdmVersion = cdmVersion){
|
||||||
cdmTableCsvLoc = "inst/csv/OMOP_CDMv5.3.1_Table_Level.csv",
|
cdmTableCsvLoc = paste0("inst/csv/OMOP_CDMv", cdmVersion, "_Table_Level.csv")
|
||||||
cdmFieldCsvLoc = "inst/csv/OMOP_CDMv5.3.1_Field_Level.csv",
|
cdmFieldCsvLoc = paste0("inst/csv/OMOP_CDMv", cdmVersion, "_Field_Level.csv")
|
||||||
outputFile = paste0("inst/sql/sql_server/OMOP CDM ddl ", cdmVersion, " ", Sys.Date(), ".sql")){
|
|
||||||
|
|
||||||
tableSpecs <- read.csv(cdmTableCsvLoc, stringsAsFactors = FALSE)
|
tableSpecs <- read.csv(cdmTableCsvLoc, stringsAsFactors = FALSE)
|
||||||
cdmSpecs <- read.csv(cdmFieldCsvLoc, stringsAsFactors = FALSE)
|
cdmSpecs <- read.csv(cdmFieldCsvLoc, stringsAsFactors = FALSE)
|
||||||
|
|
||||||
tableList <- tableSpecs$cdmTableName
|
tableList <- tableSpecs$cdmTableName
|
||||||
|
|
||||||
s <- c()
|
sql_result <- c()
|
||||||
s <- c(paste0("--@targetdialect CDM DDL Specification for OMOP Common Data Model ",cdmVersionNum))
|
sql_result <- c(paste0("--@targetdialect CDM DDL Specification for OMOP Common Data Model ", cdmVersion))
|
||||||
for (t in tableList){
|
for (tableName in tableList){
|
||||||
table <- subset(cdmSpecs, cdmTableName == t)
|
fields <- subset(cdmSpecs, cdmTableName == tableName)
|
||||||
fields <- table$cdmFieldName
|
fieldNames <- fields$cdmFieldName
|
||||||
|
|
||||||
if ('person_id' %in% fields){
|
if ('person_id' %in% fieldNames){
|
||||||
q <- "\n\n--HINT DISTRIBUTE ON KEY (person_id)\n"
|
query <- "\n\n--HINT DISTRIBUTE ON KEY (person_id)\n"
|
||||||
} else {
|
} else {
|
||||||
q <- "\n\n--HINT DISTRIBUTE ON RANDOM\n"
|
query <- "\n\n--HINT DISTRIBUTE ON RANDOM\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
s <- c(s, q, paste0("CREATE TABLE @cdmDatabaseSchema.", t, " (\n"))
|
sql_result <- c(sql_result, query, paste0("CREATE TABLE @cdmDatabaseSchema.", tableName, " (\n"))
|
||||||
|
|
||||||
end <- length(fields)
|
n_fields <- length(fieldNames)
|
||||||
a <- c()
|
for(fieldName in fieldNames) {
|
||||||
|
|
||||||
for(f in fields) {
|
if (subset(fields, cdmFieldName == fieldName, isRequired) == "Yes") {
|
||||||
|
nullable_sql <- (" NOT NULL")
|
||||||
if (subset(table, cdmFieldName == f, isRequired) == "Yes") {
|
|
||||||
r <- (" NOT NULL")
|
|
||||||
} else {
|
} else {
|
||||||
r <- (" NULL")
|
nullable_sql <- (" NULL")
|
||||||
}
|
}
|
||||||
|
|
||||||
if (f == fields[[end]]) {
|
if (fieldName == fieldNames[[n_fields]]) {
|
||||||
e <- (" );")
|
closing_sql <- (" );")
|
||||||
} else {
|
} else {
|
||||||
e <- (",")
|
closing_sql <- (",")
|
||||||
}
|
}
|
||||||
|
|
||||||
if (f=="offset") {
|
if (fieldName=="offset") {
|
||||||
field <- paste0('"',f,'"')
|
field <- paste0('"',fieldName,'"')
|
||||||
} else {
|
} else {
|
||||||
field <- f
|
field <- fieldName
|
||||||
}
|
}
|
||||||
|
fieldSql <- paste0("\n\t\t\t",
|
||||||
a <- c(a, paste0("\n\t\t\t",field," ",subset(table, cdmFieldName == f, cdmDatatype),r,e))
|
field," ",
|
||||||
|
subset(fields, cdmFieldName == fieldName, cdmDatatype),
|
||||||
|
nullable_sql,
|
||||||
|
closing_sql)
|
||||||
|
sql_result <- c(sql_result, fieldSql)
|
||||||
}
|
}
|
||||||
s <- c(s, a, "")
|
sql_result <- c(sql_result, "")
|
||||||
}
|
}
|
||||||
SqlRender::writeSql(s, targetFile = outputFile)
|
return(paste0(sql_result, collapse = ""))
|
||||||
return(s)
|
|
||||||
}
|
}
|
||||||
|
|
45
R/writeDDL.R
45
R/writeDDL.R
|
@ -17,42 +17,21 @@
|
||||||
#' Write DDL script
|
#' Write DDL script
|
||||||
#'
|
#'
|
||||||
#' @param targetdialect The dialect of the target database. Choices are "oracle", "postgresql", "pdw", "redshift", "impala", "netezza", "bigquery", "sql server"
|
#' @param targetdialect The dialect of the target database. Choices are "oracle", "postgresql", "pdw", "redshift", "impala", "netezza", "bigquery", "sql server"
|
||||||
#' @param cdmVersion The version of the CDM for which you are creating the DDL.
|
#' @param cdmVersion The version of the CDM for which you are creating the DDL. e.g. 5.3.1
|
||||||
#' @param cdmDatabaseSchema The schema of the CDM instance where the DDL will be run. For example, this would be "ohdsi.dbo" when testing on sql server. After testing
|
#' @param cdmDatabaseSchema The schema of the CDM instance where the DDL will be run. For example, this would be "ohdsi.dbo" when testing on sql server.
|
||||||
#' this can be changed to "@cdmDatabaseSchema"
|
#' Defaults to "@cdmDatabaseSchema"
|
||||||
#' @param sqlFilename The name of the sql file with the current ddl specifications to be translated and rendered
|
|
||||||
#' @param cleanUpScript Set to T if the clean up script should be created. This is for testing purposes and will create a sql script that drops all CDM tables.
|
|
||||||
#' By default set to F. Set to F for Oracle as well as the sql render translation does not work well.
|
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
writeDDL <- function(targetdialect, cdmVersion, cdmDatabaseSchema, sqlFilename, cleanUpScript = F) {
|
writeDDL <- function(targetdialect, cdmVersion, cdmDatabaseSchema = "@cdmDatabaseSchema") {
|
||||||
if(!dir.exists("output")){
|
outputpath <- file.path("ddl", cdmVersion, targetdialect)
|
||||||
dir.create("output")
|
dir.create(outputpath, showWarnings = FALSE, recursive = TRUE)
|
||||||
}
|
|
||||||
|
|
||||||
if(!dir.exists(paste0("output/",targetdialect))){
|
sql <- createDdlFromFile(cdmVersion)
|
||||||
dir.create(paste0("output/",targetdialect))
|
sql <- SqlRender::render(sql = sql, cdmDatabaseSchema = cdmDatabaseSchema)
|
||||||
}
|
sql <- SqlRender::translate(sql, targetDialect = targetdialect)
|
||||||
|
|
||||||
sql <- SqlRender::loadRenderTranslateSql(sqlFilename = sqlFilename,
|
|
||||||
packageName = "CdmDdlBase",
|
|
||||||
dbms = targetdialect,
|
|
||||||
targetdialect = targetdialect,
|
|
||||||
cdmDatabaseSchema = cdmDatabaseSchema)
|
|
||||||
|
|
||||||
|
filename <- paste0("OMOPCDM_", targetdialect, "_", cdmVersion, "_ddl.sql")
|
||||||
SqlRender::writeSql(sql = sql,
|
SqlRender::writeSql(sql = sql,
|
||||||
targetFile = paste0("output/",targetdialect,"/OMOP CDM ",targetdialect," ", cdmVersion," ddl.sql"))
|
targetFile = file.path(outputpath, filename)
|
||||||
|
)
|
||||||
|
|
||||||
if(cleanUpScript){
|
|
||||||
|
|
||||||
sql <- SqlRender::loadRenderTranslateSql(sqlFilename = "testCleanUp.sql",
|
|
||||||
packageName = "CdmDdlBase",
|
|
||||||
dbms = targetdialect,
|
|
||||||
cdmDatabaseSchema = cdmDatabaseSchema)
|
|
||||||
|
|
||||||
SqlRender::writeSql(sql = sql,
|
|
||||||
targetFile = paste0("output/",targetdialect,"/", targetdialect," testCleanUp ", cdmVersion,".sql"))
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,12 +4,11 @@
|
||||||
# the new CDM versions. Set the below variable to indicate the version of the cdm you are creating. This will be used for the name of the pdf so, for
|
# the new CDM versions. Set the below variable to indicate the version of the cdm you are creating. This will be used for the name of the pdf so, for
|
||||||
# example, write v5.3 as v5_3.
|
# example, write v5.3 as v5_3.
|
||||||
|
|
||||||
cdmVersion <- "v6_0"
|
cdmVersion <- "5.3.1"
|
||||||
|
|
||||||
# Step 3: After creating the csv files for the new version, create the sql server DDL from the file
|
# Step 3: After creating the csv files for the new version, create the sql server DDL from the file
|
||||||
|
|
||||||
s <- CdmDdlBase::createDdlFromFile(cdmTableCsvLoc = "inst/csv/OMOP_CDMv6.0_Table_Level.csv",
|
s <- CdmDdlBase::createDdlFromFile(cdmVersion)
|
||||||
cdmFieldCsvLoc = "inst/csv/OMOP_CDMv6.0_Field_Level.csv")
|
|
||||||
|
|
||||||
# Step 3.1: Create the primary key constraints for the new version
|
# Step 3.1: Create the primary key constraints for the new version
|
||||||
|
|
||||||
|
@ -103,53 +102,12 @@ writeIndex("sql server",
|
||||||
# Step 8: After testing the files for Oracle, Postgres, and Sql Server run the following to create the files for all dialects. Oracle
|
# Step 8: After testing the files for Oracle, Postgres, and Sql Server run the following to create the files for all dialects. Oracle
|
||||||
# Postgres and Sql Server are rewritten to overwrite the cdmDatabaseSchema with a token.
|
# Postgres and Sql Server are rewritten to overwrite the cdmDatabaseSchema with a token.
|
||||||
|
|
||||||
writeDDL(targetdialect = "oracle",
|
|
||||||
cdmVersion = cdmVersion,
|
|
||||||
sqlFilename = paste0("OMOP CDM ddl ", cdmVersion, " ", Sys.Date(), ".sql"),
|
|
||||||
cdmDatabaseSchema = "@cdmDatabaseSchema",
|
|
||||||
cleanUpScript = F) #oracle syntax for removing tables is weird, set this to F and make any changes to the raw file
|
|
||||||
|
|
||||||
writeDDL(targetdialect = "postgresql",
|
for (targetdialect in c("oracle", "postgresql", "pdw", "redshift", "impala", "netezza", "bigquery", "sql server")) {
|
||||||
cdmVersion = cdmVersion,
|
writeDDL(targetdialect = targetdialect,
|
||||||
sqlFilename = paste0("OMOP CDM ddl ", cdmVersion, " ", Sys.Date(), ".sql"),
|
cdmVersion = cdmVersion)
|
||||||
cdmDatabaseSchema = "@cdmDatabaseSchema",
|
}
|
||||||
cleanUpScript = F) #This needs to be updated manually right now
|
|
||||||
|
|
||||||
writeDDL(targetdialect = "sql server",
|
|
||||||
cdmVersion = cdmVersion,
|
|
||||||
sqlFilename = paste0("OMOP CDM ddl ", cdmVersion, " ", Sys.Date(), ".sql"),
|
|
||||||
cdmDatabaseSchema = "@cdmDatabaseSchema",
|
|
||||||
cleanUpScript = F) #This needs to be updated manually right now
|
|
||||||
|
|
||||||
writeDDL(targetdialect = "bigquery",
|
|
||||||
cdmVersion = cdmVersion,
|
|
||||||
sqlFilename = paste0("OMOP CDM ddl ", cdmVersion, " ", Sys.Date(), ".sql"),
|
|
||||||
cdmDatabaseSchema = "@cdmDatabaseSchema",
|
|
||||||
cleanUpScript = F)
|
|
||||||
|
|
||||||
writeDDL(targetdialect = "impala",
|
|
||||||
cdmVersion = cdmVersion,
|
|
||||||
sqlFilename = paste0("OMOP CDM ddl ", cdmVersion, " ", Sys.Date(), ".sql"),
|
|
||||||
cdmDatabaseSchema = "@cdmDatabaseSchema",
|
|
||||||
cleanUpScript = F)
|
|
||||||
|
|
||||||
writeDDL(targetdialect = "netezza",
|
|
||||||
cdmVersion = cdmVersion,
|
|
||||||
sqlFilename = paste0("OMOP CDM ddl ", cdmVersion, " ", Sys.Date(), ".sql"),
|
|
||||||
cdmDatabaseSchema = "@cdmDatabaseSchema",
|
|
||||||
cleanUpScript = F)
|
|
||||||
|
|
||||||
writeDDL(targetdialect = "pdw",
|
|
||||||
cdmVersion = cdmVersion,
|
|
||||||
sqlFilename = paste0("OMOP CDM ddl ", cdmVersion, " ", Sys.Date(), ".sql"),
|
|
||||||
cdmDatabaseSchema = "@cdmDatabaseSchema",
|
|
||||||
cleanUpScript = F)
|
|
||||||
|
|
||||||
writeDDL(targetdialect = "redshift",
|
|
||||||
cdmVersion = cdmVersion,
|
|
||||||
sqlFilename = paste0("OMOP CDM ddl ", cdmVersion, " ", Sys.Date(), ".sql"),
|
|
||||||
cdmDatabaseSchema = "@cdmDatabaseSchema",
|
|
||||||
cleanUpScript = F)
|
|
||||||
|
|
||||||
## Write all primary keys
|
## Write all primary keys
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue