refactoring of create and write DDL

2021-08-18 19:02:22 +02:00 · 2021-08-18 19:02:22 +02:00 · f8cd93d159
parent 0f1d9333dd
commit f8cd93d159
3 changed files with 50 additions and 113 deletions
--- a/R/createDdlFromFile.R
+++ b/R/createDdlFromFile.R
@ -18,7 +18,7 @@
 #' Create a DDL script from a two csv files that detail the OMOP CDM Specifications. These files also form the basis of the CDM documentation and the Data Quality
 #' Dashboard.
 #'
-#' @param cdmVersionNum The version of the CDM you are creating
+#' @param cdmVersionNum The version of the CDM you are creating, e.g. 5.3.1
 #'
 #' @param cdmTableCsvLoc  The location of the csv file with the high-level CDM table information. This is defaulted to "inst/csv/OMOP_CDMv5.3.1_Table_Level.csv".
 #'                        If a new version of this file was committed to the CDM repository the package automatically will grab it and place it in "inst/csv/".
@ -27,57 +27,57 @@
 #' @param outputFile  The name of the output ddl sql file. This is defaulted to a location in the inst/sql/sql server folder and named with today's date and the CDM version.
 #' @export

-createDdlFromFile <- function(cdmVersionNum = cdmVersion,
-                              cdmTableCsvLoc = "inst/csv/OMOP_CDMv5.3.1_Table_Level.csv",
-                              cdmFieldCsvLoc = "inst/csv/OMOP_CDMv5.3.1_Field_Level.csv",
-                              outputFile = paste0("inst/sql/sql_server/OMOP CDM ddl ", cdmVersion, " ", Sys.Date(), ".sql")){
+createDdlFromFile <- function(cdmVersion = cdmVersion){
+  cdmTableCsvLoc = paste0("inst/csv/OMOP_CDMv", cdmVersion, "_Table_Level.csv")
+  cdmFieldCsvLoc = paste0("inst/csv/OMOP_CDMv", cdmVersion, "_Field_Level.csv")

  tableSpecs <- read.csv(cdmTableCsvLoc, stringsAsFactors = FALSE)
  cdmSpecs <- read.csv(cdmFieldCsvLoc, stringsAsFactors = FALSE)

  tableList <- tableSpecs$cdmTableName

-  s <- c()
-  s <- c(paste0("--@targetdialect CDM DDL Specification for OMOP Common Data Model ",cdmVersionNum))
-  for (t in tableList){
-    table <- subset(cdmSpecs, cdmTableName == t)
-    fields <- table$cdmFieldName
+  sql_result <- c()
+  sql_result <- c(paste0("--@targetdialect CDM DDL Specification for OMOP Common Data Model ", cdmVersion))
+  for (tableName in tableList){
+    fields <- subset(cdmSpecs, cdmTableName == tableName)
+    fieldNames <- fields$cdmFieldName

-    if ('person_id' %in% fields){
-      q <- "\n\n--HINT DISTRIBUTE ON KEY (person_id)\n"
+    if ('person_id' %in% fieldNames){
+      query <- "\n\n--HINT DISTRIBUTE ON KEY (person_id)\n"
    } else {
-      q <- "\n\n--HINT DISTRIBUTE ON RANDOM\n"
+      query <- "\n\n--HINT DISTRIBUTE ON RANDOM\n"
    }

-    s <- c(s, q, paste0("CREATE TABLE @cdmDatabaseSchema.", t, " (\n"))
+    sql_result <- c(sql_result, query, paste0("CREATE TABLE @cdmDatabaseSchema.", tableName, " (\n"))

-    end <- length(fields)
-    a <- c()
+    n_fields <- length(fieldNames)
+    for(fieldName in fieldNames) {

-    for(f in fields) {
-
-      if (subset(table, cdmFieldName == f, isRequired) == "Yes") {
-        r <- (" NOT NULL")
+      if (subset(fields, cdmFieldName == fieldName, isRequired) == "Yes") {
+        nullable_sql <- (" NOT NULL")
      } else {
-        r <- (" NULL")
+        nullable_sql <- (" NULL")
      }

-      if (f == fields[[end]]) {
-        e <- (" );")
+      if (fieldName == fieldNames[[n_fields]]) {
+        closing_sql <- (" );")
      } else {
-        e <- (",")
+        closing_sql <- (",")
      }

-      if (f=="offset") {
-        field <- paste0('"',f,'"')
+      if (fieldName=="offset") {
+        field <- paste0('"',fieldName,'"')
      } else {
-        field <- f
+        field <- fieldName
      }
-
-      a <- c(a, paste0("\n\t\t\t",field," ",subset(table, cdmFieldName == f, cdmDatatype),r,e))
+      fieldSql <- paste0("\n\t\t\t",
+                         field," ",
+                         subset(fields, cdmFieldName == fieldName, cdmDatatype),
+                         nullable_sql,
+                         closing_sql)
+      sql_result <- c(sql_result, fieldSql)
    }
-    s <- c(s, a, "")
+    sql_result <- c(sql_result, "")
  }
-  SqlRender::writeSql(s, targetFile = outputFile)
-  return(s)
+  return(paste0(sql_result, collapse = ""))
 }
--- a/R/writeDDL.R
+++ b/R/writeDDL.R
@ -17,42 +17,21 @@
 #' Write DDL script
 #'
 #' @param targetdialect  The dialect of the target database. Choices are "oracle", "postgresql", "pdw", "redshift", "impala", "netezza", "bigquery", "sql server"
-#' @param cdmVersion The version of the CDM for which you are creating the DDL.
-#' @param cdmDatabaseSchema The schema of the CDM instance where the DDL will be run. For example, this would be "ohdsi.dbo" when testing on sql server. After testing
-#'                          this can be changed to "@cdmDatabaseSchema"
-#' @param sqlFilename The name of the sql file with the current ddl specifications to be translated and rendered
-#' @param cleanUpScript Set to T if the clean up script should be created. This is for testing purposes and will create a sql script that drops all CDM tables.
-#'                      By default set to F. Set to F for Oracle as well as the sql render translation does not work well.
+#' @param cdmVersion The version of the CDM for which you are creating the DDL. e.g. 5.3.1
+#' @param cdmDatabaseSchema The schema of the CDM instance where the DDL will be run. For example, this would be "ohdsi.dbo" when testing on sql server.
+#'                          Defaults to "@cdmDatabaseSchema"
 #'
 #' @export
-writeDDL <- function(targetdialect, cdmVersion, cdmDatabaseSchema, sqlFilename, cleanUpScript = F) {
-  if(!dir.exists("output")){
-    dir.create("output")
-  }
+writeDDL <- function(targetdialect, cdmVersion, cdmDatabaseSchema = "@cdmDatabaseSchema") {
+  outputpath <- file.path("ddl", cdmVersion, targetdialect)
+  dir.create(outputpath, showWarnings = FALSE, recursive = TRUE)

-  if(!dir.exists(paste0("output/",targetdialect))){
-    dir.create(paste0("output/",targetdialect))
-  }
-
-  sql <- SqlRender::loadRenderTranslateSql(sqlFilename = sqlFilename,
-                                           packageName = "CdmDdlBase",
-                                           dbms = targetdialect,
-                                           targetdialect = targetdialect,
-                                           cdmDatabaseSchema = cdmDatabaseSchema)
+  sql <- createDdlFromFile(cdmVersion)
+  sql <- SqlRender::render(sql = sql, cdmDatabaseSchema = cdmDatabaseSchema)
+  sql <- SqlRender::translate(sql, targetDialect = targetdialect)

+  filename <- paste0("OMOPCDM_", targetdialect, "_", cdmVersion, "_ddl.sql")
  SqlRender::writeSql(sql = sql,
-                      targetFile = paste0("output/",targetdialect,"/OMOP CDM ",targetdialect," ", cdmVersion," ddl.sql"))
-
-
-  if(cleanUpScript){
-
-      sql <- SqlRender::loadRenderTranslateSql(sqlFilename = "testCleanUp.sql",
-                                               packageName = "CdmDdlBase",
-                                               dbms = targetdialect,
-                                               cdmDatabaseSchema = cdmDatabaseSchema)
-
-      SqlRender::writeSql(sql = sql,
-                          targetFile = paste0("output/",targetdialect,"/", targetdialect," testCleanUp ", cdmVersion,".sql"))
-  }
-
+                      targetFile = file.path(outputpath, filename)
+  )
 }
--- a/extras/codeToRun.R
+++ b/extras/codeToRun.R
@ -4,12 +4,11 @@
 # the new CDM versions. Set the below variable to indicate the version of the cdm you are creating. This will be used for the name of the pdf so, for
 # example, write v5.3 as v5_3.

-  cdmVersion <- "v6_0"
+  cdmVersion <- "5.3.1"

 # Step 3: After creating the csv files for the new version, create the sql server DDL from the file

-    s <- CdmDdlBase::createDdlFromFile(cdmTableCsvLoc = "inst/csv/OMOP_CDMv6.0_Table_Level.csv",
-                           cdmFieldCsvLoc = "inst/csv/OMOP_CDMv6.0_Field_Level.csv")
+    s <- CdmDdlBase::createDdlFromFile(cdmVersion)

  # Step 3.1: Create the primary key constraints for the new version

@ -103,53 +102,12 @@ writeIndex("sql server",
 # Step 8: After testing the files for Oracle, Postgres, and Sql Server run the following to create the files for all dialects. Oracle
 # Postgres and Sql Server are rewritten to overwrite the cdmDatabaseSchema with a token.

-writeDDL(targetdialect = "oracle",
-         cdmVersion = cdmVersion,
-         sqlFilename = paste0("OMOP CDM ddl ", cdmVersion, " ", Sys.Date(), ".sql"),
-         cdmDatabaseSchema = "@cdmDatabaseSchema",
-         cleanUpScript = F) #oracle syntax for removing tables is weird, set this to F and make any changes to the raw file

-writeDDL(targetdialect = "postgresql",
-         cdmVersion = cdmVersion,
-         sqlFilename = paste0("OMOP CDM ddl ", cdmVersion, " ", Sys.Date(), ".sql"),
-         cdmDatabaseSchema = "@cdmDatabaseSchema",
-         cleanUpScript = F) #This needs to be updated manually right now
+for (targetdialect in c("oracle", "postgresql", "pdw", "redshift", "impala", "netezza", "bigquery", "sql server")) {
+  writeDDL(targetdialect = targetdialect,
+           cdmVersion = cdmVersion)
+}

-writeDDL(targetdialect = "sql server",
-         cdmVersion = cdmVersion,
-         sqlFilename = paste0("OMOP CDM ddl ", cdmVersion, " ", Sys.Date(), ".sql"),
-         cdmDatabaseSchema = "@cdmDatabaseSchema",
-         cleanUpScript = F) #This needs to be updated manually right now
-
-writeDDL(targetdialect = "bigquery",
-         cdmVersion = cdmVersion,
-         sqlFilename = paste0("OMOP CDM ddl ", cdmVersion, " ", Sys.Date(), ".sql"),
-         cdmDatabaseSchema = "@cdmDatabaseSchema",
-         cleanUpScript = F)
-
-writeDDL(targetdialect = "impala",
-         cdmVersion = cdmVersion,
-         sqlFilename = paste0("OMOP CDM ddl ", cdmVersion, " ", Sys.Date(), ".sql"),
-         cdmDatabaseSchema = "@cdmDatabaseSchema",
-         cleanUpScript = F)
-
-writeDDL(targetdialect = "netezza",
-         cdmVersion = cdmVersion,
-         sqlFilename = paste0("OMOP CDM ddl ", cdmVersion, " ", Sys.Date(), ".sql"),
-         cdmDatabaseSchema = "@cdmDatabaseSchema",
-         cleanUpScript = F)
-
-writeDDL(targetdialect = "pdw",
-         cdmVersion = cdmVersion,
-         sqlFilename = paste0("OMOP CDM ddl ", cdmVersion, " ", Sys.Date(), ".sql"),
-         cdmDatabaseSchema = "@cdmDatabaseSchema",
-         cleanUpScript = F)
-
-writeDDL(targetdialect = "redshift",
-         cdmVersion = cdmVersion,
-         sqlFilename = paste0("OMOP CDM ddl ", cdmVersion, " ", Sys.Date(), ".sql"),
-         cdmDatabaseSchema = "@cdmDatabaseSchema",
-         cleanUpScript = F)

 ## Write all primary keys