2021-06-09 00:19:01 +00:00
|
|
|
# Copyright 2019 Observational Health Data Sciences and Informatics
|
|
|
|
#
|
|
|
|
# This file is part of CdmDdlBase
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
2021-08-19 15:49:36 +00:00
|
|
|
#' Create the OHDSI-SQL Common Data Model DDL code
|
2021-06-09 00:19:01 +00:00
|
|
|
#'
|
2021-08-19 15:49:36 +00:00
|
|
|
#' The createDdl, createForeignKeys, and createPrimaryKeys functions each return a character string
|
|
|
|
#' containing their respective DDL SQL code in OHDSQL dialect for a specific CDM version.
|
|
|
|
#' The SQL they generate needs to be rendered and translated before it can be executed.
|
|
|
|
#'
|
|
|
|
#' The DDL SQL code is created from a two csv files that detail the OMOP CDM Specifications.
|
|
|
|
#' These files also form the basis of the CDM documentation and the Data Quality
|
|
|
|
#' Dashboard.
|
2021-06-09 00:19:01 +00:00
|
|
|
#'
|
2021-08-19 15:49:36 +00:00
|
|
|
#' @param cdmVersion The version of the CDM you are creating, e.g. 5.3, 5.4
|
|
|
|
#' @return A character string containing the OHDSQL DDL
|
2021-08-20 11:59:29 +00:00
|
|
|
#' @importFrom utils read.csv
|
2021-06-09 00:19:01 +00:00
|
|
|
#' @export
|
2021-08-19 15:49:36 +00:00
|
|
|
#' @examples
|
|
|
|
#' ddl <- createDdl("5.4")
|
|
|
|
#' pk <- createPrimaryKeys("5.4")
|
|
|
|
#' fk <- createForeignKeys("5.4")
|
|
|
|
createDdl <- function(cdmVersion){
|
2021-06-09 00:19:01 +00:00
|
|
|
|
2021-08-19 15:49:36 +00:00
|
|
|
# argument checks
|
|
|
|
stopifnot(is.character(cdmVersion), length(cdmVersion) == 1, cdmVersion %in% listSupportedVersions())
|
|
|
|
|
|
|
|
cdmTableCsvLoc <- system.file(file.path("csv", paste0("OMOP_CDMv", cdmVersion, "_Table_Level.csv")), package = "CommonDataModel", mustWork = TRUE)
|
|
|
|
cdmFieldCsvLoc <- system.file(file.path("csv", paste0("OMOP_CDMv", cdmVersion, "_Field_Level.csv")), package = "CommonDataModel", mustWork = TRUE)
|
2021-06-09 00:19:01 +00:00
|
|
|
|
|
|
|
tableSpecs <- read.csv(cdmTableCsvLoc, stringsAsFactors = FALSE)
|
|
|
|
cdmSpecs <- read.csv(cdmFieldCsvLoc, stringsAsFactors = FALSE)
|
|
|
|
|
|
|
|
tableList <- tableSpecs$cdmTableName
|
|
|
|
|
2021-08-18 17:02:22 +00:00
|
|
|
sql_result <- c()
|
2021-08-19 15:49:36 +00:00
|
|
|
sql_result <- c(paste0("--@targetDialect CDM DDL Specification for OMOP Common Data Model ", cdmVersion))
|
2021-08-18 17:02:22 +00:00
|
|
|
for (tableName in tableList){
|
|
|
|
fields <- subset(cdmSpecs, cdmTableName == tableName)
|
|
|
|
fieldNames <- fields$cdmFieldName
|
2021-06-09 00:19:01 +00:00
|
|
|
|
2021-08-18 17:02:22 +00:00
|
|
|
if ('person_id' %in% fieldNames){
|
|
|
|
query <- "\n\n--HINT DISTRIBUTE ON KEY (person_id)\n"
|
2021-06-09 00:19:01 +00:00
|
|
|
} else {
|
2021-08-18 17:02:22 +00:00
|
|
|
query <- "\n\n--HINT DISTRIBUTE ON RANDOM\n"
|
2021-06-09 00:19:01 +00:00
|
|
|
}
|
|
|
|
|
2021-08-18 17:47:13 +00:00
|
|
|
sql_result <- c(sql_result, query, paste0("CREATE TABLE @cdmDatabaseSchema.", tableName, " ("))
|
2021-06-09 00:19:01 +00:00
|
|
|
|
2021-08-18 17:02:22 +00:00
|
|
|
n_fields <- length(fieldNames)
|
|
|
|
for(fieldName in fieldNames) {
|
2021-06-09 00:19:01 +00:00
|
|
|
|
2021-08-18 17:02:22 +00:00
|
|
|
if (subset(fields, cdmFieldName == fieldName, isRequired) == "Yes") {
|
|
|
|
nullable_sql <- (" NOT NULL")
|
2021-06-09 00:19:01 +00:00
|
|
|
} else {
|
2021-08-18 17:02:22 +00:00
|
|
|
nullable_sql <- (" NULL")
|
2021-06-09 00:19:01 +00:00
|
|
|
}
|
|
|
|
|
2021-08-18 17:02:22 +00:00
|
|
|
if (fieldName == fieldNames[[n_fields]]) {
|
|
|
|
closing_sql <- (" );")
|
2021-06-09 00:19:01 +00:00
|
|
|
} else {
|
2021-08-18 17:02:22 +00:00
|
|
|
closing_sql <- (",")
|
2021-06-09 00:19:01 +00:00
|
|
|
}
|
|
|
|
|
2021-08-18 17:02:22 +00:00
|
|
|
if (fieldName=="offset") {
|
|
|
|
field <- paste0('"',fieldName,'"')
|
2021-06-09 00:19:01 +00:00
|
|
|
} else {
|
2021-08-18 17:02:22 +00:00
|
|
|
field <- fieldName
|
2021-06-09 00:19:01 +00:00
|
|
|
}
|
2021-08-18 17:02:22 +00:00
|
|
|
fieldSql <- paste0("\n\t\t\t",
|
|
|
|
field," ",
|
|
|
|
subset(fields, cdmFieldName == fieldName, cdmDatatype),
|
|
|
|
nullable_sql,
|
|
|
|
closing_sql)
|
|
|
|
sql_result <- c(sql_result, fieldSql)
|
2021-06-09 00:19:01 +00:00
|
|
|
}
|
2021-08-18 17:02:22 +00:00
|
|
|
sql_result <- c(sql_result, "")
|
2021-06-09 00:19:01 +00:00
|
|
|
}
|
2021-08-18 17:02:22 +00:00
|
|
|
return(paste0(sql_result, collapse = ""))
|
2021-06-09 00:19:01 +00:00
|
|
|
}
|
2021-08-19 15:49:36 +00:00
|
|
|
|
|
|
|
|
|
|
|
#' @describeIn createDdl createPrimaryKeys Returns a string containing the OHDSQL for creation of primary keys in the OMOP CDM.
|
|
|
|
#' @export
|
|
|
|
createPrimaryKeys <- function(cdmVersion){
|
|
|
|
|
|
|
|
# argument checks
|
|
|
|
stopifnot(is.character(cdmVersion), length(cdmVersion) == 1, cdmVersion %in% listSupportedVersions())
|
|
|
|
|
|
|
|
cdmFieldCsvLoc <- system.file(file.path("csv", paste0("OMOP_CDMv", cdmVersion, "_Field_Level.csv")), package = "CommonDataModel", mustWork = TRUE)
|
|
|
|
cdmSpecs <- read.csv(cdmFieldCsvLoc, stringsAsFactors = FALSE)
|
|
|
|
|
|
|
|
primaryKeys <- subset(cdmSpecs, isPrimaryKey == "Yes")
|
|
|
|
pkFields <- primaryKeys$cdmFieldName
|
|
|
|
|
|
|
|
sql_result <- c(paste0("--@targetDialect CDM Primary Key Constraints for OMOP Common Data Model ", cdmVersion, "\n"))
|
|
|
|
for (pkField in pkFields){
|
|
|
|
|
|
|
|
subquery <- subset(primaryKeys, cdmFieldName==pkField)
|
|
|
|
|
|
|
|
sql_result <- c(sql_result, paste0("\nALTER TABLE @cdmDatabaseSchema.", subquery$cdmTableName, " ADD CONSTRAINT xpk_", subquery$cdmTableName, " PRIMARY KEY NONCLUSTERED (", subquery$cdmFieldName , ");\n"))
|
|
|
|
|
|
|
|
}
|
|
|
|
return(paste0(sql_result, collapse = ""))
|
|
|
|
}
|
|
|
|
|
|
|
|
#' @describeIn createDdl createForeignKeys Returns a string containing the OHDSQL for creation of foreign keys in the OMOP CDM.
|
|
|
|
#' @export
|
|
|
|
createForeignKeys <- function(cdmVersion){
|
|
|
|
|
|
|
|
# argument checks
|
|
|
|
stopifnot(is.character(cdmVersion), length(cdmVersion) == 1, cdmVersion %in% listSupportedVersions())
|
|
|
|
|
|
|
|
cdmFieldCsvLoc <- system.file(file.path("csv", paste0("OMOP_CDMv", cdmVersion, "_Field_Level.csv")), package = "CommonDataModel", mustWork = TRUE)
|
|
|
|
cdmSpecs <- read.csv(cdmFieldCsvLoc, stringsAsFactors = FALSE)
|
|
|
|
|
|
|
|
foreignKeys <- subset(cdmSpecs, isForeignKey == "Yes")
|
|
|
|
foreignKeys$key <- paste0(foreignKeys$cdmTableName, "_", foreignKeys$cdmFieldName)
|
|
|
|
|
|
|
|
sql_result <- c(paste0("--@targetDialect CDM Foreign Key Constraints for OMOP Common Data Model ", cdmVersion, "\n"))
|
|
|
|
for (foreignKey in foreignKeys$key){
|
|
|
|
|
|
|
|
subquery <- subset(foreignKeys, foreignKeys$key==foreignKey)
|
|
|
|
|
|
|
|
sql_result <- c(sql_result, paste0("\nALTER TABLE @cdmDatabaseSchema.", subquery$cdmTableName, " ADD CONSTRAINT fpk_", subquery$cdmTableName, "_", subquery$cdmFieldName, " FOREIGN KEY (", subquery$cdmFieldName , ") REFERENCES @cdmDatabaseSchema.", subquery$fkTableName, " (", subquery$fkFieldName, ");\n"))
|
|
|
|
|
|
|
|
}
|
|
|
|
return(paste0(sql_result, collapse = ""))
|
|
|
|
}
|
add changes to v5.4 (#433)
* Add github actions workflow to build package and run tests.
* update Description file
* rename .Rproj file.
* Consolidate 'create' functions into one file.
* Add tests for create functions.
* update description
* removed spaces in file and folder names. Regenerated ddl output. Tried to fix Field_Level.csv file.
* consolidate write functions into one file. Add execute function.
* update docs
* add tests for write and execute functions
* update documentation
* Add windows and linux runners in github actions.
* update github actions
* download drivers before running tests
* fix small error in setup test file.
* debug github actions
* debug github actions
* debug github actions
* debug github actions
* fix tiny bug
* comment out execute ddl test
* fix bug in test
* Add execute test back in
* revert accidental change in description
* add print statement for debugging schema error on github actions.
* Fix schema environment variable name
* Add comment to github actions workflow file.
* remove placeholder text in function documentation.
* Rename createdDdl.R to createDdl.R
* Hack-a-thon updates
Closes #81, #387, #239, #412, #391, #330, #408, #365, #306, #264
* Changed bigint to integer for consistency
* Updated DDLs
* Add tests for redshift. Clean up test setup file.
* Foreign key fixes
* Add imports and update docs.
* Fix bug in setup test script.
* update setup file
* Add tests for oracle and sql server. Move setup.R file.
* fix bug in setup
* debug tests on github
* debug github actions
* debug actions.
* debug actions
* debug actions.
* Add missing secrets to yaml!!
* debug actions
* test connection on all platforms
* add ddl execution
* add windows and linux runners
* Resolving conflicts
* Removing unnecessary file
* Trying again to remove .DS_Store, adding to gitignore
* Allow user to specify output location in buildRelease
* replace outputpath with outputfolder for consitent argument names in the package.
* Add test for buildRelease.
* replace outputpath with outputfolder for consistency. update documentation.
* move ddl folder to inst so it is accessible from tests
* update documentation
* Add OMOP header genearator function
Co-authored-by: Adam Black <adam.black@odysseusinc.com>
Co-authored-by: Clair Blacketer <mblacke@its.jnj.com>
Co-authored-by: clairblacketer <clairblacketer@users.noreply.github.com>
2021-08-20 17:00:03 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# A helper function that will return a character string with the omop ascii art given a major and minor cdm version
|
|
|
|
# example: cat(createAsciiHeader(5, 3))
|
|
|
|
createAsciiHeader <- function(major, minor) {
|
|
|
|
|
|
|
|
stopifnot(is.numeric(major), is.numeric(minor), length(major) == 1, length(minor) == 1)
|
|
|
|
stopifnot(major %in% 0:99, minor %in% 0:99)
|
|
|
|
|
|
|
|
# An inner function that returns an ascii art matrix for any number between 0 and 99
|
|
|
|
numberMatrix <- function(num){
|
|
|
|
stopifnot(is.numeric(num), num %in% 0:99)
|
|
|
|
|
|
|
|
# An inner function that returns a 7x7 matrix of number ascii art for the number 0 through 9
|
|
|
|
# for the number 1 a 7x5 matrix is returned because 1 is narrower than other numbers.
|
|
|
|
singleDigit <- function(num) {
|
|
|
|
nums <- c(' ### # ##### ##### # ####### ##### ####### ##### ##### # # ## # ## ## # # # ## # # ## ## # # # # ## # # # # # ## ## # # ##### ##### # # ###### ###### # ##### ####### # # # ######## ## # # # # # # # # # # # # # ## # # # ## # ### ##### ####### ##### # ##### ##### # ##### ##### ')
|
|
|
|
numsMatrix <- matrix(data = strsplit(nums, character(0))[[1]], nrow = 7, byrow = T)
|
|
|
|
cols <- seq(num*7+1, num*7+7, by = 1)
|
|
|
|
out <- numsMatrix[1:7, cols]
|
|
|
|
# the number 1 is narrower than the other numbers
|
|
|
|
if(num == 1) out<- out[1:7, 2:6]
|
|
|
|
out
|
|
|
|
}
|
|
|
|
|
|
|
|
if(num < 10){
|
|
|
|
return(singleDigit(num))
|
|
|
|
} else {
|
|
|
|
space <- matrix(rep(" ", 7), nrow = 7)
|
|
|
|
return(cbind(singleDigit(floor(num/10)), space, singleDigit(num %% 10)))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
omop <- c('.
|
|
|
|
####### # # ####### ###### ##### ###### # # .
|
|
|
|
# # ## ## # # # # # # # # ## ## # #.
|
|
|
|
# # # # # # # # # # # # # # # # # # #.
|
|
|
|
# # # # # # # ###### # # # # # # # #.
|
|
|
|
# # # # # # # # # # # # # #.
|
|
|
|
# # # # # # # # # # # # # # # .
|
|
|
|
####### # # ####### # ##### ###### # # ## ')
|
|
|
|
|
|
|
|
# convert to matrix and remove first column
|
|
|
|
omop <- matrix(strsplit(omop, character(0))[[1]], nrow = 7, byrow = TRUE)
|
|
|
|
omop <- omop[,c(-1, -2)]
|
|
|
|
|
|
|
|
dot <- matrix(c(rep(" ", 3*4), rep("#", 3*3)), nrow = 7, byrow = TRUE)
|
|
|
|
space <- matrix(rep(" ", 7), nrow = 7)
|
|
|
|
newline <- matrix(rep("\n", 7, nrow = 7))
|
|
|
|
|
|
|
|
|
|
|
|
header <- character(0)
|
|
|
|
headerMatrix <- cbind(omop, space, numberMatrix(major), space, dot, space, numberMatrix(minor), newline)
|
|
|
|
for(i in 1:7) {
|
|
|
|
header <- c(header, as.character(headerMatrix[i,]))
|
|
|
|
}
|
|
|
|
header <- paste(header, collapse = "")
|
|
|
|
return(header)
|
|
|
|
}
|