diff --git a/R/listSupportedVersions.R b/R/listSupportedVersions.R index 746368a..93c8cdf 100644 --- a/R/listSupportedVersions.R +++ b/R/listSupportedVersions.R @@ -12,6 +12,6 @@ listSupportedVersions <- function() { #' @export listSupportedDialects <- function() { - supportedDialects <- c("oracle", "postgresql", "pdw", "redshift", "impala", "netezza", "bigquery", "sql server") + supportedDialects <- c("oracle", "postgresql", "pdw", "redshift", "impala", "netezza", "bigquery", "sql server", "spark") return(supportedDialects) } diff --git a/R/writeDDL.R b/R/writeDDL.R index 9df7235..a5bc013 100644 --- a/R/writeDDL.R +++ b/R/writeDDL.R @@ -30,7 +30,7 @@ writeDdl <- function(targetDialect, cdmVersion, outputfolder, cdmDatabaseSchema = "@cdmDatabaseSchema") { # argument checks - stopifnot(targetDialect %in% c("oracle", "postgresql", "pdw", "redshift", "impala", "netezza", "bigquery", "sql server")) + stopifnot(targetDialect %in% c("oracle", "postgresql", "pdw", "redshift", "impala", "netezza", "bigquery", "sql server", "spark")) stopifnot(cdmVersion %in% listSupportedVersions()) stopifnot(is.character(cdmDatabaseSchema)) @@ -54,7 +54,7 @@ writeDdl <- function(targetDialect, cdmVersion, outputfolder, cdmDatabaseSchema writePrimaryKeys <- function(targetDialect, cdmVersion, outputfolder, cdmDatabaseSchema = "@cdmDatabaseSchema") { # argument checks - stopifnot(targetDialect %in% c("oracle", "postgresql", "pdw", "redshift", "impala", "netezza", "bigquery", "sql server")) + stopifnot(targetDialect %in% c("oracle", "postgresql", "pdw", "redshift", "impala", "netezza", "bigquery", "sql server", "spark")) stopifnot(cdmVersion %in% listSupportedVersions()) stopifnot(is.character(cdmDatabaseSchema)) @@ -78,7 +78,7 @@ writePrimaryKeys <- function(targetDialect, cdmVersion, outputfolder, cdmDatabas writeForeignKeys <- function(targetDialect, cdmVersion, outputfolder, cdmDatabaseSchema = "@cdmDatabaseSchema") { # argument checks - stopifnot(targetDialect %in% c("oracle", "postgresql", "pdw", "redshift", "impala", "netezza", "bigquery", "sql server")) + stopifnot(targetDialect %in% c("oracle", "postgresql", "pdw", "redshift", "impala", "netezza", "bigquery", "sql server", "spark")) stopifnot(cdmVersion %in% listSupportedVersions()) stopifnot(is.character(cdmDatabaseSchema)) @@ -102,7 +102,7 @@ writeForeignKeys <- function(targetDialect, cdmVersion, outputfolder, cdmDatabas writeIndex <- function(targetDialect, cdmVersion, outputfolder, cdmDatabaseSchema = "@cdmDatabaseSchema") { # argument checks - stopifnot(targetDialect %in% c("oracle", "postgresql", "pdw", "redshift", "impala", "netezza", "bigquery", "sql server")) + stopifnot(targetDialect %in% c("oracle", "postgresql", "pdw", "redshift", "impala", "netezza", "bigquery", "sql server", "spark")) stopifnot(cdmVersion %in% listSupportedVersions()) stopifnot(is.character(cdmDatabaseSchema)) diff --git a/README.md b/README.md index 7d732d1..3de6942 100644 --- a/README.md +++ b/README.md @@ -10,16 +10,15 @@ output: # How to Use this Repository +If you are looking for the SQL DDLs and don't wish to generate them through R, they can be accessed [here](https://github.com/OHDSI/CommonDataModel/tree/v5.4.0/inst/ddl/5.4). + If you are looking for information on how to submit a bugfix, skip to the [next section](https://github.com/OHDSI/CommonDataModel#bug-fixesmodel-updates) ## Generating the DDLs -*By John and Sam Gresh* -If you prefer to generate the DDLs on your own without downloading them from the github tags, these instructions will guide you on how to do so. +This module will demonstrate two different ways the CDM R package can be used to create the CDM tables in your environment. First, it uses the `buildRelease` function to create the DDL files on your machine, intended for end users that wish to generate these scripts from R without the need to clone or download the source code from github. The SQL scripts that are created through this process are available as zip files as part of the [latest release](https://github.com/OHDSI/CommonDataModel/releases/tag/v5.4.0). They are also available on the master branch [here](https://github.com/OHDSI/CommonDataModel/tree/v5.4.0/inst/ddl/5.4). -### Introduction - -This module will demonstrate how to individually create the DDL scripts for DDL, foreign keys, primary keys, and indexed for a single database instance at a time. This module is intended for end users that wish to generate these scripts from R without the need to clone or download the source code from github. The scripts that are created through this process are available as zip files here (TODO: NEED LINK). +Second, the script shows the `executeDdl` function that will connect up to your SQL client directly (assuming your dbms is one of the supported dialects) and instantiate the tables through R. #### Dependencies and prerequisites @@ -27,27 +26,46 @@ This process required R-Studio to be installed as well as [DatabaseConnector](ht #### Create DDL, Foreign Keys, Primary Keys, and Indexes from R -Launch R-Studio and create a new project: File -> New Project -> New Directory -> New Project +### First, install the package from GitHub +``` +install.packages("devtools") +devtools::install_github("OHDSI/CommonDataModel") +``` +### List the currently supported SQL dialects +```CommonDataModel::listSupportedDialects()``` -After completing this step you should see something like the following: +### List the currently supported CDM versions +```CommonDataModel::listSupportedVersions()``` -![](docs/images/rexample1.png) - -For the next step, you can either open a new R script (File -> New File -> R Script), paste the text in the console, or open an R notebook (File -> New File -> New R Notebook). Whatever you choose, paste the following, replacing "output" with the name of the output file where you want the DDLs to appear and "YOUR_CDM_SCHEMA" with the name of your CDM schema. In this example we are generating the postgresql DDLs by specifying the dialect in the function calls. To determine which dialects are supported, run the `CommonDataModel::listSupportedDialects()` function. - -```{r} -if (!require("devtools")) install.packages("devtools") -devtools::install_github("OHDSI/CommonDataModel", "v5.4") - -CommonDataModel::buildRelease("postgresql", "5.4", "output", "YOUR_CDM_SCHEMA") +## 1. Use the `buildRelease` function +This function will generate the text files in the dialect you choose, putting the output files in the folder you specify. +``` +CommonDataModel::buildRelease(cdmVersions = "5.4", + targetDialects = "postgresql", + outputfolder = "/pathToOutput") ``` -![](docs/images/rexample2.png) +## 2. Use the `executeDdl` function -![](docs/images/rexample3.png) +If you have an empty schema ready to go, the package will connect and instantiate the tables for you. To start, you need to download DatabaseConnector in order to connect to your database. + +``` +devtools::install_github("DatabaseConnector") + +cd <- DatabaseConnector::createConnectionDetails(dbms = "postgresql", + server = "localhost/ohdsi", + user = "postgres", + password = "postgres", + pathToDriver = "/pathToDriver" + ) + +CommonDataModel::executeDdl(connectionDetails = cd, + cdmVersion = "5.4", + cdmDatabaseSchema = "ohdsi_demo" + ) +``` -You will then see something like the above, with your output directory created and the DDLs available in the folder you specified. ## Bug Fixes/Model Updates diff --git a/docs/background.html b/docs/background.html index d90cd4f..dd2e97f 100644 --- a/docs/background.html +++ b/docs/background.html @@ -11,15 +11,24 @@ -
Below is the specification document for the OMOP Common Data Model, v5.4. This is the latest version of the OMOP CDM. Each table is represented with a high-level description and ETL conventions that should be followed. This is continued with a discussion of each field in each table, any conventions related to the field, and constraints that should be followed (like primary key, foreign key, etc). Should you have questions please feel free to visit the forums or the github issue page.
+This is the specification document for the OMOP Common Data Model, v5.4. This is the latest version of the OMOP CDM. Each table is represented with a high-level description and ETL conventions that should be followed. This is continued with a discussion of each field in each table, any conventions related to the field, and constraints that should be followed (like primary key, foreign key, etc). Should you have questions please feel free to visit the forums or the github issue page.
+The table below details which OHDSI tools support CDM v5.4. There are two levels of support: legacy support means that the tool supports all tables and fields that were present in CDM v5.3 and feature support indicates that the tool supports any new tables and fields in CDM v5.4 that were not present in CDM v5.3. A green heart ๐ indicates that the support level for the listed tool is in place, has been tested, and released. A yellow heart ๐ indicates that the support level for the listed tool has been initiated but has not yet been tested and released.
Tool | +Description | +Legacy Support | +Feature Support | +
---|---|---|---|
CDM R package | +This package can be downloaded from https://github.com/OHDSI/CommonDataModel/. It functions to dynamically create the OMOP CDM documentation and DDL scripts to instantiate the CDM tables. | +๐ | +๐ | +
Data Quality Dashboard | +This package can be downloaded from https://github.com/OHDSI/DataQualityDashboard. It runs a set of > 3500 data quality checks against an OMOP CDM instance and is required to be run on all databases prior to participating in an OHDSI network research study. | +๐ | +๐ | +
Achilles | +This package can be downloaded from https://github.com/OHDSI/Achilles, performing a set of broad database characterizations agains an OMOP CDM instance. | +๐ | +๐ | +
ARES | +This package can be downloaded from https://github.com/OHDSI/Ares and is designed to display the results from both the ACHILLES and DataQualityDashboard packages to support data quality and characterization research. | +๐ | +๐ | +
ATLAS | +ATLAS is an open source software tool for researchers to conduct scientific analyses on standardized observational data. Demo | +๐ | +๐ | +
Rabbit-In-A-Hat | +This package can be downloaded from https://github.com/OHDSI/WhiteRabbit and is an application for interactive design of an ETL to the OMOP Common Data Model with the help of the the scan report generated by White Rabbit. | +๐ | +๐ | +
Feature Extraction | +This package can be downloaded from https://github.com/OHDSI/FeatureExtraction. It is designed to generate features (covariates) for a cohort generated using the OMOP CDM. | +๐ | +๐* | +
Cohort Diagnostics | +This package can be downloaded from https://github.com/OHDSI/CohortDiagnostics and is used to critically evaluate cohort phenotypes. | +๐ | +๐ | +
* The Feature Extraction package supports all relevant new features in CDM v5.4. For example, it was decided that, from a methodological perspective, the EPISODE and EPISODE_EVENT tables should not be included to define cohort covariates because the events that make up episodes are already pulled in as potential covariates.
Looking to send us a pull request for a bug fix? Please see the readme on the main github page.
+The current CDM version is CDM v5.4, depicted below. This CDM version was developed over the course of a year by considering requests that were sent via our issues page. The list of proposed changes was then shared with the community in multiple ways: through discussions at the weekly OHDSI Community calls, discussions with the OHDSI Steering Committee, and discussions with all potentially affected workgroups. The final changes were then delivered to the Community through a new R package designed to dynamically generate the DDLs and documentation for all supported SQL dialects.
The table below details which OHDSI tools support CDM v5.4. There are two levels of support: legacy support means that the tool supports all tables and fields that were present in CDM v5.3 and feature support indicates that the tool supports any new tables and fields in CDM v5.4 that were not present in CDM v5.3. A green heart ๐ indicates that the support level for the listed tool is in place, has been tested, and released. A yellow heart ๐ indicates that the support level for the listed tool has been initiated but has not yet been tested and released.
Tool | +Description | +Legacy Support | +Feature Support | +
---|---|---|---|
CDM R package | +This package can be downloaded from https://github.com/OHDSI/CommonDataModel/. It functions to dynamically create the OMOP CDM documentation and DDL scripts to instantiate the CDM tables. | +๐ | +๐ | +
Data Quality Dashboard | +This package can be downloaded from https://github.com/OHDSI/DataQualityDashboard. It runs a set of > 3500 data quality checks against an OMOP CDM instance and is required to be run on all databases prior to participating in an OHDSI network research study. | +๐ | +๐ | +
Achilles | +This package can be downloaded from https://github.com/OHDSI/Achilles, performing a set of broad database characterizations agains an OMOP CDM instance. | +๐ | +๐ | +
ARES | +This package can be downloaded from https://github.com/OHDSI/Ares and is designed to display the results from both the ACHILLES and DataQualityDashboard packages to support data quality and characterization research. | +๐ | +๐ | +
ATLAS | +ATLAS is an open source software tool for researchers to conduct scientific analyses on standardized observational data. Demo | +๐ | +๐ | +
Rabbit-In-A-Hat | +This package can be downloaded from https://github.com/OHDSI/WhiteRabbit and is an application for interactive design of an ETL to the OMOP Common Data Model with the help of the the scan report generated by White Rabbit. | +๐ | +๐ | +
Feature Extraction | +This package can be downloaded from https://github.com/OHDSI/FeatureExtraction. It is designed to generate features (covariates) for a cohort generated using the OMOP CDM. | +๐ | +๐* | +
Cohort Diagnostics | +This package can be downloaded from https://github.com/OHDSI/CohortDiagnostics and is used to critically evaluate cohort phenotypes. | +๐ | +๐ | +
* The Feature Extraction package supports all relevant new features in CDM v5.4. For example, it was decided that, from a methodological perspective, the EPISODE and EPISODE_EVENT tables should not be included to define cohort covariates because the events that make up episodes are already pulled in as potential covariates.
t |