Merge remote-tracking branch 'refs/remotes/OHDSI/master'

This commit is contained in:
vojtechhuser 2017-06-21 15:43:53 -04:00
commit ae34ce4b7f
41 changed files with 2127 additions and 15869 deletions

View File

@ -0,0 +1,4 @@
# Code Excerpts Readme
--------
This folder is intended to house useful code excerpts related to the common data model, e.g. code for building the *_ERA tables. It will not be policed as heavily as the DDLs so use at your own discretion.

512
Documentation/.Rhistory Normal file
View File

@ -0,0 +1,512 @@
names(inds) <-conceptSetIds[[1]]
conceptSetIds
inds <- lapply(conceptSetIds[[2]], getConcepts)
dir <- "\\wprdusmj12rcz\Epi\Mood_Haliperidol\project390\"
dir <- "S:Epi\Mood_Haliperidol\project390\"
dir <- "S:Epi/Mood_Haliperidol/project390/"
inds <- lapply(conceptSetIds[[2]], getConcepts)
inds
names(inds) <-conceptSetIds[[1]]
getConcepts <- function(conceptSetIdentifiers) {
results <- list()
length(results) <- length(conceptSetIdentifiers)
ind <- 0
for (conceptSetIdentifier in conceptSetIdentifiers){
ind <- ind+1
# get the concept set expression from the webapi
conceptSetExpressionUrl = paste("http://hix.jnj.com:8080/WebAPI/conceptset/", conceptSetIdentifier, "/expression", sep = "")
## http://hix.jnj.com:8080/WebAPI/VOCAB/vocabulary/lookup/mapped
expression <- httr::content(httr::GET(conceptSetExpressionUrl),"text")
result <- RJSONIO::fromJSON(expression)
concepts <- t(as.data.frame(lapply(result[[1]], function(x) c(x$concept$CONCEPT_ID,
x$concept$CONCEPT_CODE,
x$concept$CONCEPT_NAME,
x$concept$VOCABULARY_ID,
x$isExcluded, x$includeDescendants,
x$includeMapped)) ))
concepts <- as.data.frame(concepts)
rownames(concepts) <- NULL
colnames(concepts) <- c('concept_id','concept_code','concept_name','vocabulary_id',
'exclude','descendants','mapped')
concepts[,'exclude'] <- ifelse(concepts[,'exclude']=='TRUE', 1, 0)
concepts[,'descendants'] <- ifelse(concepts[,'descendants']=='TRUE', 1, 0)
concepts[,'mapped'] <- ifelse(concepts[,'mapped']=='TRUE', 1, 0)
# save to temp table
connectionDetails <- createConnectionDetails(dbms="pdw",
server="JRDUSAPSCTL01",
port=17001,
user=NULL,
password=NULL)
conn <- connect(connectionDetails)
DatabaseConnector::insertTable(conn, 'scratch.dbo.390_concepts', concepts)#, tempTable = T)
# run cocnept_extract sql using temp table
sql <- SqlRender::readSql(file.path(dir,'concept_extract.sql'))
sql <- SqlRender::renderSql(sql, cdm='Vocabulary')$sql
sql <- SqlRender::translateSql(sql, sourceDialect = 'sql server', targetDialect = 'pdw')$sql
results[[ind]] <- DatabaseConnector::querySql(conn,sql)
}
return(results)
}
conceptSetIds <- list(name=c(#'Agitation, aggression or psychotic Symptoms in dementia together',
'Agitation, aggression or psychotic Symptoms in dementia sep',
'Tic disorder',
'Schizophrenia',
'Aggression in children with autism or developmental disorders'),
code=list(#2349,
c(2478,2479), 2348, 2347,c(2361, 2362)
))
# put concept sets into inds
inds <- lapply(conceptSetIds[[2]], getConcepts)
getConcepts <- function(conceptSetIdentifiers) {
results <- list()
length(results) <- length(conceptSetIdentifiers)
ind <- 0
for (conceptSetIdentifier in conceptSetIdentifiers){
ind <- ind+1
# get the concept set expression from the webapi
conceptSetExpressionUrl = paste("http://hix.jnj.com:8080/WebAPI/conceptset/", conceptSetIdentifier, "/expression", sep = "")
## http://hix.jnj.com:8080/WebAPI/VOCAB/vocabulary/lookup/mapped
expression <- httr::content(httr::GET(conceptSetExpressionUrl),"text")
result <- RJSONIO::fromJSON(expression)
concepts <- t(as.data.frame(lapply(result[[1]], function(x) c(x$concept$CONCEPT_ID,
x$concept$CONCEPT_CODE,
x$concept$CONCEPT_NAME,
x$concept$VOCABULARY_ID,
x$isExcluded, x$includeDescendants,
x$includeMapped)) ))
concepts <- as.data.frame(concepts)
rownames(concepts) <- NULL
colnames(concepts) <- c('concept_id','concept_code','concept_name','vocabulary_id',
'exclude','descendants','mapped')
concepts[,'exclude'] <- ifelse(concepts[,'exclude']=='TRUE', 1, 0)
concepts[,'descendants'] <- ifelse(concepts[,'descendants']=='TRUE', 1, 0)
concepts[,'mapped'] <- ifelse(concepts[,'mapped']=='TRUE', 1, 0)
# save to temp table
connectionDetails <- createConnectionDetails(dbms="pdw",
server="JRDUSAPSCTL01",
port=17001,
user=NULL,
password=NULL)
conn <- connect(connectionDetails)
DatabaseConnector::insertTable(conn, 'scratch.dbo.concepts_390', concepts)#, tempTable = T)
# run cocnept_extract sql using temp table
sql <- SqlRender::readSql(file.path(dir,'concept_extract.sql'))
sql <- SqlRender::renderSql(sql, cdm='Vocabulary')$sql
sql <- SqlRender::translateSql(sql, sourceDialect = 'sql server', targetDialect = 'pdw')$sql
results[[ind]] <- DatabaseConnector::querySql(conn,sql)
}
return(results)
}
conceptSetIds <- list(name=c(#'Agitation, aggression or psychotic Symptoms in dementia together',
'Agitation, aggression or psychotic Symptoms in dementia sep',
'Tic disorder',
'Schizophrenia',
'Aggression in children with autism or developmental disorders'),
code=list(#2349,
c(2478,2479), 2348, 2347,c(2361, 2362)
))
# put concept sets into inds
inds <- lapply(conceptSetIds[[2]], getConcepts)
names(inds) <-conceptSetIds[[1]]
getConcepts <- function(conceptSetIdentifiers) {
results <- list()
length(results) <- length(conceptSetIdentifiers)
ind <- 0
for (conceptSetIdentifier in conceptSetIdentifiers){
ind <- ind+1
# get the concept set expression from the webapi
conceptSetExpressionUrl = paste("http://hix.jnj.com:8080/WebAPI/conceptset/", conceptSetIdentifier, "/expression", sep = "")
## http://hix.jnj.com:8080/WebAPI/VOCAB/vocabulary/lookup/mapped
expression <- httr::content(httr::GET(conceptSetExpressionUrl),"text")
result <- RJSONIO::fromJSON(expression)
concepts <- t(as.data.frame(lapply(result[[1]], function(x) c(x$concept$CONCEPT_ID,
x$concept$CONCEPT_CODE,
x$concept$CONCEPT_NAME,
x$concept$VOCABULARY_ID,
x$isExcluded, x$includeDescendants,
x$includeMapped)) ))
concepts <- as.data.frame(concepts)
rownames(concepts) <- NULL
colnames(concepts) <- c('concept_id','concept_code','concept_name','vocabulary_id',
'exclude','descendants','mapped')
concepts[,'exclude'] <- ifelse(concepts[,'exclude']=='TRUE', 1, 0)
concepts[,'descendants'] <- ifelse(concepts[,'descendants']=='TRUE', 1, 0)
concepts[,'mapped'] <- ifelse(concepts[,'mapped']=='TRUE', 1, 0)
# save to temp table
connectionDetails <- createConnectionDetails(dbms="pdw",
server="JRDUSAPSCTL01",
port=17001,
user=NULL,
password=NULL)
conn <- connect(connectionDetails)
DatabaseConnector::insertTable(conn, 'scratch.dbo.concepts_390', concepts)#, tempTable = T)
# run cocnept_extract sql using temp table
sql <- SqlRender::readSql(file.path(dir,'concept_extract.sql'))
sql <- SqlRender::renderSql(sql, cdm='Vocabulary')$sql
sql <- SqlRender::translateSql(sql, sourceDialect = 'sql server', targetDialect = 'pdw')$sql
results[[ind]] <- DatabaseConnector::querySql(conn,sql)
}
return(results)
}
conceptSetIds <- list(name=c(#'Agitation, aggression or psychotic Symptoms in dementia together',
'Agitation, aggression or psychotic Symptoms in dementia sep',
'Tic disorder',
'Schizophrenia',
'Aggression in children with autism or developmental disorders'),
code=list(#2349,
c(2478,2479), 2348, 2347,c(2361, 2362)
))
# put concept sets into inds
inds <- lapply(conceptSetIds[[2]], getConcepts)
names(inds) <-conceptSetIds[[1]]
getConcepts <- function(conceptSetIdentifiers) {
results <- list()
length(results) <- length(conceptSetIdentifiers)
ind <- 0
for (conceptSetIdentifier in conceptSetIdentifiers){
ind <- ind+1
# get the concept set expression from the webapi
conceptSetExpressionUrl = paste("http://hix.jnj.com:8080/WebAPI/conceptset/", conceptSetIdentifier, "/expression", sep = "")
## http://hix.jnj.com:8080/WebAPI/VOCAB/vocabulary/lookup/mapped
expression <- httr::content(httr::GET(conceptSetExpressionUrl),"text")
result <- RJSONIO::fromJSON(expression)
concepts <- t(as.data.frame(lapply(result[[1]], function(x) c(x$concept$CONCEPT_ID,
x$concept$CONCEPT_CODE,
x$concept$CONCEPT_NAME,
x$concept$VOCABULARY_ID,
x$isExcluded, x$includeDescendants,
x$includeMapped)) ))
concepts <- as.data.frame(concepts)
rownames(concepts) <- NULL
colnames(concepts) <- c('concept_id','concept_code','concept_name','vocabulary_id',
'exclude','descendants','mapped')
concepts[,'exclude'] <- ifelse(concepts[,'exclude']=='TRUE', 1, 0)
concepts[,'descendants'] <- ifelse(concepts[,'descendants']=='TRUE', 1, 0)
concepts[,'mapped'] <- ifelse(concepts[,'mapped']=='TRUE', 1, 0)
# save to temp table
connectionDetails <- createConnectionDetails(dbms="pdw",
server="JRDUSAPSCTL01",
port=17001,
user=NULL,
password=NULL)
conn <- connect(connectionDetails)
DatabaseConnector::insertTable(conn, 'concepts', concepts, tempTable = T)
# run cocnept_extract sql using temp table
sql <- SqlRender::readSql(file.path(dir,'concept_extract.sql'))
sql <- SqlRender::renderSql(sql, cdm='Vocabulary')$sql
sql <- SqlRender::translateSql(sql, sourceDialect = 'sql server', targetDialect = 'pdw')$sql
results[[ind]] <- DatabaseConnector::querySql(conn,sql)
}
return(results)
}
conceptSetIds <- list(name=c(#'Agitation, aggression or psychotic Symptoms in dementia together',
'Agitation, aggression or psychotic Symptoms in dementia sep',
'Tic disorder',
'Schizophrenia',
'Aggression in children with autism or developmental disorders'),
code=list(#2349,
c(2478,2479), 2348, 2347,c(2361, 2362)
))
# put concept sets into inds
inds <- lapply(conceptSetIds[[2]], getConcepts)
getConcepts <- function(conceptSetIdentifiers) {
results <- list()
length(results) <- length(conceptSetIdentifiers)
ind <- 0
for (conceptSetIdentifier in conceptSetIdentifiers){
ind <- ind+1
# get the concept set expression from the webapi
conceptSetExpressionUrl = paste("http://hix.jnj.com:8080/WebAPI/conceptset/", conceptSetIdentifier, "/expression", sep = "")
## http://hix.jnj.com:8080/WebAPI/VOCAB/vocabulary/lookup/mapped
expression <- httr::content(httr::GET(conceptSetExpressionUrl),"text")
result <- RJSONIO::fromJSON(expression)
concepts <- t(as.data.frame(lapply(result[[1]], function(x) c(x$concept$CONCEPT_ID,
x$concept$CONCEPT_CODE,
x$concept$CONCEPT_NAME,
x$concept$VOCABULARY_ID,
x$isExcluded, x$includeDescendants,
x$includeMapped)) ))
concepts <- as.data.frame(concepts)
rownames(concepts) <- NULL
colnames(concepts) <- c('concept_id','concept_code','concept_name','vocabulary_id',
'exclude','descendants','mapped')
concepts[,'exclude'] <- ifelse(concepts[,'exclude']=='TRUE', 1, 0)
concepts[,'descendants'] <- ifelse(concepts[,'descendants']=='TRUE', 1, 0)
concepts[,'mapped'] <- ifelse(concepts[,'mapped']=='TRUE', 1, 0)
# save to temp table
connectionDetails <- createConnectionDetails(dbms="pdw",
server="JRDUSAPSCTL01",
port=17001,
user=NULL,
password=NULL)
conn <- connect(connectionDetails)
DatabaseConnector::insertTable(conn, 'scratch.dbo.concepts_390', concepts)#, tempTable = T)
# run cocnept_extract sql using temp table
sql <- SqlRender::readSql(file.path(dir,'concept_extract.sql'))
sql <- SqlRender::renderSql(sql, cdm='Vocabulary')$sql
sql <- SqlRender::translateSql(sql, sourceDialect = 'sql server', targetDialect = 'pdw')$sql
results[[ind]] <- DatabaseConnector::querySql(conn,sql)
}
return(results)
}
conceptSetIds <- list(name=c(#'Agitation, aggression or psychotic Symptoms in dementia together',
'Agitation, aggression or psychotic Symptoms in dementia sep',
'Tic disorder',
'Schizophrenia',
'Aggression in children with autism or developmental disorders'),
code=list(#2349,
c(2478,2479), 2348, 2347,c(2361, 2362)
))
# put concept sets into inds
inds <- lapply(conceptSetIds[[2]], getConcepts)
library(SqlRender)
library(DatabaseConnector)
library(stringr)
library(openxlsx)
cohortIds <- c(2520, 2521)
buildDatabase <- function(id, name, schema)
{
dataSource <- {};
dataSource$id = id;
dataSource$name = name;
dataSource$schema = schema;
return(dataSource);
}
dbList <- list(
buildDatabase("CPRD_V423", "CDM_CPRD_V423", "dbo"),
buildDatabase("JMDC_V429", "CDM_JMDC_V429", "dbo"),
buildDatabase("OPTUMEXTSES_V427", "CDM_OPTUM_EXTENDED_SES_V427", "dbo"),
buildDatabase("PREMIER_V428", "CDM_PREMIER_V428", "dbo"),
buildDatabase("TRUVENCCAE_V418", "CDM_TRUVEN_CCAE_V418", "dbo"),
buildDatabase("TRUVENMDCD_V432", "CDM_TRUVEN_MDCD_V432", "dbo"),
buildDatabase("TRUVENMDCR_V415", "CDM_TRUVEN_MDCR_V415", "dbo")
);
connectionDetails <- createConnectionDetails(dbms = "pdw", server = "JRDUSAPSCTL01",
port = 17001, user = "hix_reader", password = "reader1!")
outputExcel <- "Output/ExcelFiles/PP_Atlas"
for (cohortId in cohortIds)
{
filePath <- paste(paste(outputExcel, cohortId, sep="/", collapse=""), ".xlsx", sep="", collapse="")
if (file.exists(filePath))
{
file.remove(filePath)
}
wb <- createWorkbook()
for (db in dbList)
{
addWorksheet(wb, db$id)
pp_sql <- readSql("sql/PP_Atlas.sql");
pp_sql <- renderSql(pp_sql, cdmDatabaseSchema = paste(db$name, db$schema, sep=".", collapse=""), cohort_definition_id = cohortId,
urlPrefix = paste("'http://hix.jnj.com/atlas/#/profiles",
db$id, cohortId, "'", sep="/", collapse=""))$sql;
conn <- connect(connectionDetails)
pp_data <- querySql(conn, pp_sql)
hs <- createStyle(textDecoration = "Bold")
writeData(wb, sheet=db$id, pp_data,
startCol = 1, startRow = 1, colNames = TRUE, rowNames = FALSE, headerStyle = hs)
setColWidths(wb, sheet = db$id, cols = 1:5, widths = "auto")
}
saveWorkbook(wb, filePath, overwrite = TRUE)
}
install_github("ohdsi/Aphrodite")
install_github("ohdsi/Aphrodite")
library(OhdsiRTools)
install_github("ohdsi/Aphrodite")
install.packages("ohdsi/Aphrodite")
install.packages("devtools")
library("devtools")
install_github("ohdsi/Aphrodite")
install.packages(chron)
install.packages("chron")
install_github("ohdsi/Aphrodite")
install_github("OHDSI/CdmAtlasCutover")
library(devtools)
install_github("OHDSI/CdmAtlasCutover")
repoConnectionDetails <- createConnectionDetails(dbms = "sql server",
server = "rndusrdhit01",
port = 1433, schema = "ohdsi_repository.dbo")
oldCdmSources <- list(
buildCdmSource(sourceKey = "DEATHSUBSET_CCAE")
)
newCdmSources <- list(
buildCdmSource(sourceKey = "DEATHSUBSET_CCAE", sourceName = "Death Subset - Truven CCAE",
dbms = "pdw",
connectionString = "jdbc:sqlserver://JRDUSAPSCTL01:17001;databaseName=CDM_DEATH_SUBSET;user=hix_writer;password=writer1!",
cdmDatabaseSchema = "CDM_DEATH_SUBSET.truven_ccae", resultsDatabaseSchema = "CDM_DEATH_SUBSET.truven_ccae",
vocabDatabaseSchema = "CDM_DEATH_SUBSET.truven_ccae"),
buildCdmSource(sourceKey = "DEATHSUBSET_MDCR", sourceName = "Death Subset - Truven MDCR",
dbms = "pdw",
connectionString = "jdbc:sqlserver://JRDUSAPSCTL01:17001;databaseName=CDM_DEATH_SUBSET;user=hix_writer;password=writer1!",
cdmDatabaseSchema = "CDM_DEATH_SUBSET.truven_mdcr",
resultsDatabaseSchema = "CDM_DEATH_SUBSET.truven_mdcr",
vocabDatabaseSchema = "VOCABULARY_20161218.dbo"),
buildCdmSource(sourceKey = "DEATHSUBSET_OPTUM", sourceName = "Death Subset - Optum Extended DOD",
dbms = "pdw",
connectionString = "jdbc:sqlserver://JRDUSAPSCTL01:17001;databaseName=CDM_DEATH_SUBSET;user=hix_writer;password=writer1!",
cdmDatabaseSchema = "CDM_DEATH_SUBSET.optum_extended_dod",
resultsDatabaseSchema = "CDM_DEATH_SUBSET.optum_extended_dod",
vocabDatabaseSchema = "VOCABULARY_20161218.dbo")
)
library(CdmAtlasCutover)
oldCdmSources <- list(
buildCdmSource(sourceKey = "DEATHSUBSET_CCAE")
)
newCdmSources <- list(
buildCdmSource(sourceKey = "DEATHSUBSET_CCAE", sourceName = "Death Subset - Truven CCAE",
dbms = "pdw",
connectionString = "jdbc:sqlserver://JRDUSAPSCTL01:17001;databaseName=CDM_DEATH_SUBSET;user=hix_writer;password=writer1!",
cdmDatabaseSchema = "CDM_DEATH_SUBSET.truven_ccae", resultsDatabaseSchema = "CDM_DEATH_SUBSET.truven_ccae",
vocabDatabaseSchema = "CDM_DEATH_SUBSET.truven_ccae"),
buildCdmSource(sourceKey = "DEATHSUBSET_MDCR", sourceName = "Death Subset - Truven MDCR",
dbms = "pdw",
connectionString = "jdbc:sqlserver://JRDUSAPSCTL01:17001;databaseName=CDM_DEATH_SUBSET;user=hix_writer;password=writer1!",
cdmDatabaseSchema = "CDM_DEATH_SUBSET.truven_mdcr",
resultsDatabaseSchema = "CDM_DEATH_SUBSET.truven_mdcr",
vocabDatabaseSchema = "VOCABULARY_20161218.dbo"),
buildCdmSource(sourceKey = "DEATHSUBSET_OPTUM", sourceName = "Death Subset - Optum Extended DOD",
dbms = "pdw",
connectionString = "jdbc:sqlserver://JRDUSAPSCTL01:17001;databaseName=CDM_DEATH_SUBSET;user=hix_writer;password=writer1!",
cdmDatabaseSchema = "CDM_DEATH_SUBSET.optum_extended_dod",
resultsDatabaseSchema = "CDM_DEATH_SUBSET.optum_extended_dod",
vocabDatabaseSchema = "VOCABULARY_20161218.dbo")
)
removeCdmSources(repoConnectionDetails = repoConnectionDetails, cdmSources = oldCdmSources, sqlOnly = TRUE)
createOhdsiResultsTables(cdmSources = newCdmSources, sqlOnly = TRUE)
insertCdmSources(repoConnectionDetails = repoConnectionDetails, cdmSources = newCdmSources, sourceIdx = TRUE,
daimonIdx = TRUE, sqlOnly = TRUE)
repoConnectionDetails <- createConnectionDetails(dbms = "sql server",
server = "rndusrdhit01",
port = 1433, schema = "ohdsi_repository.dbo")
removeCdmSources(repoConnectionDetails = repoConnectionDetails, cdmSources = oldCdmSources, sqlOnly = TRUE)
removeCdmSources(repoConnectionDetails = repoConnectionDetails, cdmSources = oldCdmSources, sqlOnly = TRUE)
insertCdmSources(repoConnectionDetails = repoConnectionDetails, cdmSources = newCdmSources, sourceIdx = TRUE,
daimonIdx = TRUE, sqlOnly = TRUE)
install_github("OHDSI/CdmAtlasCutover", force=TRUE)
library(CdmAtlasCutover)
# Initialize CDM database list, with all the CDMs to be cut over
repoConnectionDetails <- createConnectionDetails(dbms = "sql server",
server = "rndusrdhit01",
port = 1433, schema = "ohdsi_repository.dbo")
oldCdmSources <- list(
buildCdmSource(sourceKey = "DEATHSUBSET_CCAE")
)
removeCdmSources(repoConnectionDetails = repoConnectionDetails, cdmSources = oldCdmSources, sqlOnly = TRUE)
createOhdsiResultsTables(cdmSources = newCdmSources, sqlOnly = TRUE)
insertCdmSources(repoConnectionDetails = repoConnectionDetails, cdmSources = newCdmSources, sourceIdx = TRUE,
daimonIdx = TRUE, sqlOnly = TRUE)
setwd("C:/Git/CommonDataModel/Documentation")
render("GlossaryofTerms.md","pdf_document")
library(rmarkdown)
render("GlossaryofTerms.md","pdf_document")
render("GlossaryofTerms.md",pdf_document(fig_width = 6.5))
render("GlossaryofTerms.md",pdf_document(fig_width = 6.5))
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd', pdf_document(includes = includes(after_body = "footer.txt")))
rmarkdown::render('report.Rmd', pdf_document(includes = includes(after_body = "footer.txt")))
rmarkdown::render('report.Rmd', pdf_document(includes = includes(after_body = "footer.txt")))
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
---
title: "OMOP Common Data Model v5.1 Specifications"
author: "Christian Reich, Patrick Ryan, Rimma Belenkaya, Karthik Natarajan and Clair Blacketer"
date: "`r Sys.Date()`"
output:
pdf_document:
number_sections: yes
toc: yes
linkcolor: blue
---
```{r child = 'C:/Git/CommonDataModel.wiki/License.md'}
```
```{r child = 'C:/Git/CommonDataModel.wiki/Background/TheRoleoftheCommonDataModel.md'}
```
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')
rmarkdown::render('report.Rmd')

View File

@ -0,0 +1,3 @@
## Run this code and point it to the .Rmd file so it can convert the markdown files on the wiki to a pdf
rmarkdown::render(input = "C:/Git/CommonDataModel/Documentation/OMOP_CDM_PDF.Rmd", output_format = "pdf_document", output_file = "C:/Git/CommonDataModel/OMOP_CDM_v5_1_0.pdf")

View File

@ -0,0 +1,203 @@
---
title: "OMOP Common Data Model v5.1 Specifications"
author: "Christian Reich, Patrick Ryan, Rimma Belenkaya, Karthik Natarajan and Clair Blacketer"
date: "`r Sys.Date()`"
output:
pdf_document:
number_sections: yes
toc: yes
linkcolor: blue
---
# License
```{r child = 'C:/Git/CommonDataModel.wiki/License.md'}
```
# Background
```{r child = 'C:/Git/CommonDataModel.wiki/Background/Background.md'}
```
## The Role of the Common Data Model
```{r child = 'C:/Git/CommonDataModel.wiki/Background/The-Role-of-the-Common-Data-Model.md'}
```
## Design Principles
```{r child = 'C:/Git/CommonDataModel.wiki/Background/Design-Principles.md'}
```
## Data Model Conventions
```{r child = 'C:/Git/CommonDataModel.wiki/Background/Data-Model-Conventions.md'}
```
# Glossary of Terms
```{r child = 'C:/Git/CommonDataModel.wiki/Glossary-of-Terms.md'}
```
# Standardized Vocabularies
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedVocabularies/Standardized-Vocabularies.md'}
```
## CONCEPT
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedVocabularies/CONCEPT.md'}
```
## VOCABULARY
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedVocabularies/VOCABULARY.md'}
```
## DOMAIN
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedVocabularies/DOMAIN.md'}
```
## CONCEPT_CLASS
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedVocabularies/CONCEPT_CLASS.md'}
```
## CONCEPT_RELATIONSHIP
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedVocabularies/CONCEPT_RELATIONSHIP.md'}
```
## RELATIONSHIP
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedVocabularies/RELATIONSHIP.md'}
```
## CONCEPT_SYNONYM
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedVocabularies/CONCEPT_SYNONYM.md'}
```
## CONCEPT_ANCESTOR
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedVocabularies/CONCEPT_ANCESTOR.md'}
```
## SOURCE_TO_CONCEPT_MAP
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedVocabularies/SOURCE_TO_CONCEPT_MAP.md'}
```
## DRUG_STRENGTH
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedVocabularies/DRUG_STRENGTH.md'}
```
## COHORT_DEFINITION
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedVocabularies/COHORT_DEFINITION.md'}
```
## ATTRIBUTE_DEFINITION
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedVocabularies/ATTRIBUTE_DEFINITION.md'}
```
# Standardized Metadata
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedMetadata/Standardized-Metadata.md'}
```
## CDM_SOURCE
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedMetadata/CDM_SOURCE.md'}
```
# Standardized Clinical Data Tables
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedclinicalDataTables/Standardized-clinical-Data-Tables.md'}
```
## PERSON
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedclinicalDataTables/PERSON.md'}
```
## OBSERVATION_PERIOD
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedclinicalDataTables/OBSERVATION_PERIOD.md'}
```
## SPECIMEN
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedclinicalDataTables/SPECIMEN.md'}
```
## DEATH
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedclinicalDataTables/DEATH.md'}
```
## VISIT_OCCURRENCE
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedclinicalDataTables/VISIT_OCCURRENCE.md'}
```
## PROCEDURE_OCCURRENCE
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedclinicalDataTables/PROCEDURE_OCCURRENCE.md'}
```
## DRUG_EXPOSURE
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedclinicalDataTables/DRUG_EXPOSURE.md'}
```
## DEVICE_EXPOSURE
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedclinicalDataTables/DEVICE_EXPOSURE.md'}
```
## CONDITION_OCCURRENCE
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedclinicalDataTables/CONDITION_OCCURRENCE.md'}
```
## MEASUREMENT
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedclinicalDataTables/MEASUREMENT.md'}
```
## NOTE
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedclinicalDataTables/NOTE.md'}
```
## OBSERVATION
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedclinicalDataTables/OBSERVATION.md'}
```
## FACT_RELATIONSHIP
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedclinicalDataTables/FACT_RELATIONSHIP.md'}
```
# Standardized Health System Data Tables
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedHealthSystemDataTables/Standardized-Health-System-Data-Tables.md'}
```
## LOCATION
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedHealthSystemDataTables/LOCATION.md'}
```
## CARE_SITE
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedHealthSystemDataTables/CARE_SITE.md'}
```
## PROVIDER
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedHealthSystemDataTables/PROVIDER.md'}
```
# Standardized Health Economics Data Tables
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedHealthEconomicsDataTables/Standardized-Health-Economics-Data-Tables.md'}
```
## PAYER_PLAN_PERIOD
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedHealthEconomicsDataTables/PAYER_PLAN_PERIOD.md'}
```
## COST
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedHealthEconomicsDataTables/COST.md'}
```
# Standardized Derived Elements
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedDerivedElements/Standardized-Derived-Elements.md'}
```
## COHORT
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedDerivedElements/COHORT.md'}
```
## COHORT_ATTRIBUTE
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedDerivedElements/COHORT_ATTRIBUTE.md'}
```
## DRUG_ERA
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedDerivedElements/DRUG_ERA.md'}
```
## DOSE_ERA
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedDerivedElements/DOSE_ERA.md'}
```
## CONDITION_ERA
```{r child = 'C:/Git/CommonDataModel.wiki/StandardizedDerivedElements/CONDITION_ERA.md'}
```

461
Impala/OMOP_Parquet.sql Normal file
View File

@ -0,0 +1,461 @@
-- Use the search/replace regex in an editor to fix DATE columns:
-- ([^ ]+) VARCHAR\(8\), \-\- DATE
-- TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST($1 AS STRING), 1, 4), SUBSTR(CAST($1 AS STRING), 5, 2), SUBSTR(CAST($1 AS STRING), 7, 2)), 'UTC') AS $1,
CREATE TABLE omop_cdm_parquet.concept
STORED AS PARQUET
AS
SELECT
concept_id,
concept_name,
domain_id,
vocabulary_id,
concept_class_id,
standard_concept,
concept_code,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_start_date AS STRING), 1, 4), SUBSTR(CAST(valid_start_date AS STRING), 5, 2), SUBSTR(CAST(valid_start_date AS STRING), 7, 2)), 'UTC') AS valid_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_end_date AS STRING), 1, 4), SUBSTR(CAST(valid_end_date AS STRING), 5, 2), SUBSTR(CAST(valid_end_date AS STRING), 7, 2)), 'UTC') AS valid_end_date,
invalid_reason
FROM omop_cdm.concept;
CREATE TABLE omop_cdm_parquet.vocabulary
STORED AS PARQUET
AS
SELECT * from omop_cdm.vocabulary;
CREATE TABLE omop_cdm_parquet.domain
STORED AS PARQUET
AS
SELECT * from omop_cdm.domain;
CREATE TABLE omop_cdm_parquet.concept_class
STORED AS PARQUET
AS
SELECT * from omop_cdm.concept_class;
CREATE TABLE omop_cdm_parquet.concept_relationship
STORED AS PARQUET
AS
SELECT
concept_id_1,
concept_id_2,
relationship_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_start_date AS STRING), 1, 4), SUBSTR(CAST(valid_start_date AS STRING), 5, 2), SUBSTR(CAST(valid_start_date AS STRING), 7, 2)), 'UTC') AS valid_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_end_date AS STRING), 1, 4), SUBSTR(CAST(valid_end_date AS STRING), 5, 2), SUBSTR(CAST(valid_end_date AS STRING), 7, 2)), 'UTC') AS valid_end_date,
invalid_reason
FROM omop_cdm.concept_relationship;
CREATE TABLE omop_cdm_parquet.relationship
STORED AS PARQUET
AS
SELECT * from omop_cdm.relationship;
CREATE TABLE omop_cdm_parquet.concept_synonym
STORED AS PARQUET
AS
SELECT * from omop_cdm.concept_synonym;
CREATE TABLE omop_cdm_parquet.concept_ancestor
STORED AS PARQUET
AS
SELECT * from omop_cdm.concept_ancestor;
CREATE TABLE omop_cdm_parquet.source_to_concept_map
STORED AS PARQUET
AS
SELECT
source_code,
source_concept_id,
source_vocabulary_id,
source_code_description,
target_concept_id,
target_vocabulary_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_start_date AS STRING), 1, 4), SUBSTR(CAST(valid_start_date AS STRING), 5, 2), SUBSTR(CAST(valid_start_date AS STRING), 7, 2)), 'UTC') AS valid_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_end_date AS STRING), 1, 4), SUBSTR(CAST(valid_end_date AS STRING), 5, 2), SUBSTR(CAST(valid_end_date AS STRING), 7, 2)), 'UTC') AS valid_end_date,
invalid_reason
FROM omop_cdm.source_to_concept_map;
CREATE TABLE omop_cdm_parquet.drug_strength
STORED AS PARQUET
AS
SELECT
drug_concept_id,
ingredient_concept_id,
amount_value, -- NUMERIC
amount_unit_concept_id,
numerator_value, -- NUMERIC
numerator_unit_concept_id,
denominator_value, -- NUMERIC
denominator_unit_concept_id,
box_size,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_start_date AS STRING), 1, 4), SUBSTR(CAST(valid_start_date AS STRING), 5, 2), SUBSTR(CAST(valid_start_date AS STRING), 7, 2)), 'UTC') AS valid_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_end_date AS STRING), 1, 4), SUBSTR(CAST(valid_end_date AS STRING), 5, 2), SUBSTR(CAST(valid_end_date AS STRING), 7, 2)), 'UTC') AS valid_end_date,
invalid_reason
FROM omop_cdm.drug_strength;
CREATE TABLE omop_cdm_parquet.cohort_definition
STORED AS PARQUET
AS
SELECT
cohort_definition_id,
cohort_definition_name,
cohort_definition_description, -- TEXT
definition_type_concept_id,
cohort_definition_syntax, -- TEXT
subject_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(cohort_initiation_date AS STRING), 1, 4), SUBSTR(CAST(cohort_initiation_date AS STRING), 5, 2), SUBSTR(CAST(cohort_initiation_date AS STRING), 7, 2)), 'UTC') AS cohort_initiation_date
FROM omop_cdm.cohort_definition;
CREATE TABLE omop_cdm_parquet.attribute_definition
STORED AS PARQUET
AS
SELECT * from omop_cdm.attribute_definition;
CREATE TABLE omop_cdm_parquet.cdm_source
STORED AS PARQUET
AS
SELECT
cdm_source_name,
cdm_source_abbreviation,
cdm_holder,
source_description, -- TEXT
source_documentation_reference,
cdm_etl_reference,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(source_release_date AS STRING), 1, 4), SUBSTR(CAST(source_release_date AS STRING), 5, 2), SUBSTR(CAST(source_release_date AS STRING), 7, 2)), 'UTC') AS source_release_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(cdm_release_date AS STRING), 1, 4), SUBSTR(CAST(cdm_release_date AS STRING), 5, 2), SUBSTR(CAST(cdm_release_date AS STRING), 7, 2)), 'UTC') AS cdm_release_date,
cdm_version,
vocabulary_version
FROM omop_cdm.cdm_source;
CREATE TABLE omop_cdm_parquet.person
STORED AS PARQUET
AS
SELECT * from omop_cdm.person;
CREATE TABLE omop_cdm_parquet.observation_period
STORED AS PARQUET
AS
SELECT
observation_period_id,
person_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(observation_period_start_date AS STRING), 1, 4), SUBSTR(CAST(observation_period_start_date AS STRING), 5, 2), SUBSTR(CAST(observation_period_start_date AS STRING), 7, 2)), 'UTC') AS observation_period_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(observation_period_end_date AS STRING), 1, 4), SUBSTR(CAST(observation_period_end_date AS STRING), 5, 2), SUBSTR(CAST(observation_period_end_date AS STRING), 7, 2)), 'UTC') AS observation_period_end_date,
period_type_concept_id
FROM omop_cdm.observation_period;
CREATE TABLE omop_cdm_parquet.specimen
STORED AS PARQUET
AS
SELECT
specimen_id,
person_id,
specimen_concept_id,
specimen_type_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(specimen_date AS STRING), 1, 4), SUBSTR(CAST(specimen_date AS STRING), 5, 2), SUBSTR(CAST(specimen_date AS STRING), 7, 2)), 'UTC') AS specimen_date,
specimen_time,
quantity, -- NUMERIC
unit_concept_id,
anatomic_site_concept_id,
disease_status_concept_id,
specimen_source_id,
specimen_source_value,
unit_source_value,
anatomic_site_source_value,
disease_status_source_value
FROM omop_cdm.specimen;
CREATE TABLE omop_cdm_parquet.death
STORED AS PARQUET
AS
SELECT
person_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(death_date AS STRING), 1, 4), SUBSTR(CAST(death_date AS STRING), 5, 2), SUBSTR(CAST(death_date AS STRING), 7, 2)), 'UTC') AS death_date,
death_type_concept_id,
cause_concept_id,
cause_source_value,
cause_source_concept_id
FROM omop_cdm.death;
CREATE TABLE omop_cdm_parquet.visit_occurrence
STORED AS PARQUET
AS
SELECT
visit_occurrence_id,
person_id,
visit_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(visit_start_date AS STRING), 1, 4), SUBSTR(CAST(visit_start_date AS STRING), 5, 2), SUBSTR(CAST(visit_start_date AS STRING), 7, 2)), 'UTC') AS visit_start_date,
visit_start_time,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(visit_end_date AS STRING), 1, 4), SUBSTR(CAST(visit_end_date AS STRING), 5, 2), SUBSTR(CAST(visit_end_date AS STRING), 7, 2)), 'UTC') AS visit_end_date,
visit_end_time,
visit_type_concept_id,
provider_id,
care_site_id,
visit_source_value,
visit_source_concept_id
FROM omop_cdm.visit_occurrence;
CREATE TABLE omop_cdm_parquet.procedure_occurrence
STORED AS PARQUET
AS
SELECT
procedure_occurrence_id,
person_id,
procedure_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(procedure_date AS STRING), 1, 4), SUBSTR(CAST(procedure_date AS STRING), 5, 2), SUBSTR(CAST(procedure_date AS STRING), 7, 2)), 'UTC') AS procedure_date,
procedure_type_concept_id,
modifier_concept_id,
quantity,
provider_id,
visit_occurrence_id,
procedure_source_value,
procedure_source_concept_id,
qualifier_source_value
FROM omop_cdm.procedure_occurrence;
CREATE TABLE omop_cdm_parquet.drug_exposure
STORED AS PARQUET
AS
SELECT
drug_exposure_id,
person_id,
drug_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(drug_exposure_start_date AS STRING), 1, 4), SUBSTR(CAST(drug_exposure_start_date AS STRING), 5, 2), SUBSTR(CAST(drug_exposure_start_date AS STRING), 7, 2)), 'UTC') AS drug_exposure_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(drug_exposure_end_date AS STRING), 1, 4), SUBSTR(CAST(drug_exposure_end_date AS STRING), 5, 2), SUBSTR(CAST(drug_exposure_end_date AS STRING), 7, 2)), 'UTC') AS drug_exposure_end_date,
drug_type_concept_id,
stop_reason,
refills,
quantity, -- NUMERIC
days_supply,
sig, -- TEXT
route_concept_id,
effective_drug_dose, -- NUMERIC
dose_unit_concept_id,
lot_number,
provider_id,
visit_occurrence_id,
drug_source_value,
drug_source_concept_id,
route_source_value,
dose_unit_source_value
FROM omop_cdm.drug_exposure;
CREATE TABLE omop_cdm_parquet.device_exposure
STORED AS PARQUET
AS
SELECT
device_exposure_id,
person_id,
device_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(device_exposure_start_date AS STRING), 1, 4), SUBSTR(CAST(device_exposure_start_date AS STRING), 5, 2), SUBSTR(CAST(device_exposure_start_date AS STRING), 7, 2)), 'UTC') AS device_exposure_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(device_exposure_end_date AS STRING), 1, 4), SUBSTR(CAST(device_exposure_end_date AS STRING), 5, 2), SUBSTR(CAST(device_exposure_end_date AS STRING), 7, 2)), 'UTC') AS device_exposure_end_date,
device_type_concept_id,
unique_device_id,
quantity,
provider_id,
visit_occurrence_id,
device_source_value,
device_source_concept_id
FROM omop_cdm.device_exposure;
CREATE TABLE omop_cdm_parquet.condition_occurrence
STORED AS PARQUET
AS
SELECT
condition_occurrence_id,
person_id,
condition_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(condition_start_date AS STRING), 1, 4), SUBSTR(CAST(condition_start_date AS STRING), 5, 2), SUBSTR(CAST(condition_start_date AS STRING), 7, 2)), 'UTC') AS condition_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(condition_end_date AS STRING), 1, 4), SUBSTR(CAST(condition_end_date AS STRING), 5, 2), SUBSTR(CAST(condition_end_date AS STRING), 7, 2)), 'UTC') AS condition_end_date,
condition_type_concept_id,
stop_reason,
provider_id,
visit_occurrence_id,
condition_source_value,
condition_source_concept_id
FROM omop_cdm.condition_occurrence;
CREATE TABLE omop_cdm_parquet.measurement
STORED AS PARQUET
AS
SELECT
measurement_id,
person_id,
measurement_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(measurement_date AS STRING), 1, 4), SUBSTR(CAST(measurement_date AS STRING), 5, 2), SUBSTR(CAST(measurement_date AS STRING), 7, 2)), 'UTC') AS measurement_date,
measurement_time,
measurement_type_concept_id,
operator_concept_id,
value_as_number, -- NUMERIC
value_as_concept_id,
unit_concept_id,
range_low, -- NUMERIC
range_high, -- NUMERIC
provider_id,
visit_occurrence_id,
measurement_source_value,
measurement_source_concept_id,
unit_source_value,
value_source_value
FROM omop_cdm.measurement;
CREATE TABLE omop_cdm_parquet.note
STORED AS PARQUET
AS
SELECT
note_id,
person_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(note_date AS STRING), 1, 4), SUBSTR(CAST(note_date AS STRING), 5, 2), SUBSTR(CAST(note_date AS STRING), 7, 2)), 'UTC') AS note_date,
note_time,
note_type_concept_id,
note_text, -- TEXT
provider_id,
visit_occurrence_id,
note_source_value
FROM omop_cdm.note;
CREATE TABLE omop_cdm_parquet.observation
STORED AS PARQUET
AS
SELECT
observation_id,
person_id,
observation_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(observation_date AS STRING), 1, 4), SUBSTR(CAST(observation_date AS STRING), 5, 2), SUBSTR(CAST(observation_date AS STRING), 7, 2)), 'UTC') AS observation_date,
observation_time,
observation_type_concept_id,
value_as_number, -- NUMERIC
value_as_string,
value_as_concept_id,
qualifier_concept_id,
unit_concept_id,
provider_id,
visit_occurrence_id,
observation_source_value,
observation_source_concept_id ,
unit_source_value,
qualifier_source_value
FROM omop_cdm.observation;
CREATE TABLE omop_cdm_parquet.fact_relationship
STORED AS PARQUET
AS
SELECT * from omop_cdm.fact_relationship;
CREATE TABLE omop_cdm_parquet.`location`
STORED AS PARQUET
AS
SELECT * from omop_cdm.`location`;
CREATE TABLE omop_cdm_parquet.care_site
STORED AS PARQUET
AS
SELECT * from omop_cdm.care_site;
CREATE TABLE omop_cdm_parquet.provider
STORED AS PARQUET
AS
SELECT * from omop_cdm.provider;
CREATE TABLE omop_cdm_parquet.payer_plan_period
STORED AS PARQUET
AS
SELECT
payer_plan_period_id,
person_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(payer_plan_period_start_date AS STRING), 1, 4), SUBSTR(CAST(payer_plan_period_start_date AS STRING), 5, 2), SUBSTR(CAST(payer_plan_period_start_date AS STRING), 7, 2)), 'UTC') AS payer_plan_period_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(payer_plan_period_end_date AS STRING), 1, 4), SUBSTR(CAST(payer_plan_period_end_date AS STRING), 5, 2), SUBSTR(CAST(payer_plan_period_end_date AS STRING), 7, 2)), 'UTC') AS payer_plan_period_end_date,
payer_source_value,
plan_source_value,
family_source_value
FROM omop_cdm.payer_plan_period;
/* The individual cost tables are being phased out and will disappear soon
CREATE TABLE omop_cdm_parquet.visit_cost
STORED AS PARQUET
AS
SELECT * from omop_cdm.visit_cost;
CREATE TABLE omop_cdm_parquet.procedure_cost
STORED AS PARQUET
AS
SELECT * from omop_cdm.procedure_cost;
CREATE TABLE omop_cdm_parquet.drug_cost
STORED AS PARQUET
AS
SELECT * from omop_cdm.drug_cost;
CREATE TABLE omop_cdm_parquet.device_cost
STORED AS PARQUET
AS
SELECT * from omop_cdm.device_cost;
*/
CREATE TABLE omop_cdm_parquet.cost
STORED AS PARQUET
AS
SELECT * from omop_cdm.cost;
CREATE TABLE omop_cdm_parquet.cohort
STORED AS PARQUET
AS
SELECT
cohort_definition_id,
subject_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(cohort_start_date AS STRING), 1, 4), SUBSTR(CAST(cohort_start_date AS STRING), 5, 2), SUBSTR(CAST(cohort_start_date AS STRING), 7, 2)), 'UTC') AS cohort_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(cohort_end_date AS STRING), 1, 4), SUBSTR(CAST(cohort_end_date AS STRING), 5, 2), SUBSTR(CAST(cohort_end_date AS STRING), 7, 2)), 'UTC') AS cohort_end_date
FROM omop_cdm.cohort;
CREATE TABLE omop_cdm_parquet.cohort_attribute
STORED AS PARQUET
AS
SELECT
cohort_definition_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(cohort_start_date AS STRING), 1, 4), SUBSTR(CAST(cohort_start_date AS STRING), 5, 2), SUBSTR(CAST(cohort_start_date AS STRING), 7, 2)), 'UTC') AS cohort_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(cohort_end_date AS STRING), 1, 4), SUBSTR(CAST(cohort_end_date AS STRING), 5, 2), SUBSTR(CAST(cohort_end_date AS STRING), 7, 2)), 'UTC') AS cohort_end_date,
subject_id,
attribute_definition_id,
value_as_number, -- NUMERIC
value_as_concept_id
FROM omop_cdm.cohort_attribute;
CREATE TABLE omop_cdm_parquet.drug_era
STORED AS PARQUET
AS
SELECT
drug_era_id,
person_id,
drug_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(drug_era_start_date AS STRING), 1, 4), SUBSTR(CAST(drug_era_start_date AS STRING), 5, 2), SUBSTR(CAST(drug_era_start_date AS STRING), 7, 2)), 'UTC') AS drug_era_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(drug_era_end_date AS STRING), 1, 4), SUBSTR(CAST(drug_era_end_date AS STRING), 5, 2), SUBSTR(CAST(drug_era_end_date AS STRING), 7, 2)), 'UTC') AS drug_era_end_date,
drug_exposure_count,
gap_days
FROM omop_cdm.drug_era;
CREATE TABLE omop_cdm_parquet.dose_era
STORED AS PARQUET
AS
SELECT
dose_era_id,
person_id,
drug_concept_id,
unit_concept_id,
dose_value, -- NUMERIC
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(dose_era_start_date AS STRING), 1, 4), SUBSTR(CAST(dose_era_start_date AS STRING), 5, 2), SUBSTR(CAST(dose_era_start_date AS STRING), 7, 2)), 'UTC') AS dose_era_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(dose_era_end_date AS STRING), 1, 4), SUBSTR(CAST(dose_era_end_date AS STRING), 5, 2), SUBSTR(CAST(dose_era_end_date AS STRING), 7, 2)), 'UTC') AS dose_era_end_date
FROM omop_cdm.dose_era;
CREATE TABLE omop_cdm_parquet.condition_era
STORED AS PARQUET
AS
SELECT
condition_era_id,
person_id,
condition_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(condition_era_start_date AS STRING), 1, 4), SUBSTR(CAST(condition_era_start_date AS STRING), 5, 2), SUBSTR(CAST(condition_era_start_date AS STRING), 7, 2)), 'UTC') AS condition_era_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(condition_era_end_date AS STRING), 1, 4), SUBSTR(CAST(condition_era_end_date AS STRING), 5, 2), SUBSTR(CAST(condition_era_end_date AS STRING), 7, 2)), 'UTC') AS condition_era_end_date,
condition_occurrence_count
FROM omop_cdm.condition_era;

View File

@ -0,0 +1,490 @@
-- Use the search/replace regex in an editor to fix DATE columns:
-- ([^ ]+) VARCHAR\(8\), \-\- DATE
-- TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST($1 AS STRING), 1, 4), SUBSTR(CAST($1 AS STRING), 5, 2), SUBSTR(CAST($1 AS STRING), 7, 2)), 'UTC') AS $1,
CREATE TABLE omop_cdm_parquet.concept
STORED AS PARQUET
AS
SELECT
concept_id,
concept_name,
domain_id,
vocabulary_id,
concept_class_id,
standard_concept,
concept_code,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_start_date AS STRING), 1, 4), SUBSTR(CAST(valid_start_date AS STRING), 5, 2), SUBSTR(CAST(valid_start_date AS STRING), 7, 2)), 'UTC') AS valid_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_end_date AS STRING), 1, 4), SUBSTR(CAST(valid_end_date AS STRING), 5, 2), SUBSTR(CAST(valid_end_date AS STRING), 7, 2)), 'UTC') AS valid_end_date,
invalid_reason
FROM omop_cdm.concept;
CREATE TABLE omop_cdm_parquet.vocabulary
STORED AS PARQUET
AS
SELECT * from omop_cdm.vocabulary;
CREATE TABLE omop_cdm_parquet.domain
STORED AS PARQUET
AS
SELECT * from omop_cdm.domain;
CREATE TABLE omop_cdm_parquet.concept_class
STORED AS PARQUET
AS
SELECT * from omop_cdm.concept_class;
CREATE TABLE omop_cdm_parquet.concept_relationship
STORED AS PARQUET
AS
SELECT
concept_id_1,
concept_id_2,
relationship_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_start_date AS STRING), 1, 4), SUBSTR(CAST(valid_start_date AS STRING), 5, 2), SUBSTR(CAST(valid_start_date AS STRING), 7, 2)), 'UTC') AS valid_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_end_date AS STRING), 1, 4), SUBSTR(CAST(valid_end_date AS STRING), 5, 2), SUBSTR(CAST(valid_end_date AS STRING), 7, 2)), 'UTC') AS valid_end_date,
invalid_reason
FROM omop_cdm.concept_relationship;
CREATE TABLE omop_cdm_parquet.relationship
STORED AS PARQUET
AS
SELECT * from omop_cdm.relationship;
CREATE TABLE omop_cdm_parquet.concept_synonym
STORED AS PARQUET
AS
SELECT * from omop_cdm.concept_synonym;
CREATE TABLE omop_cdm_parquet.concept_ancestor
STORED AS PARQUET
AS
SELECT * from omop_cdm.concept_ancestor;
CREATE TABLE omop_cdm_parquet.source_to_concept_map
STORED AS PARQUET
AS
SELECT
source_code,
source_concept_id,
source_vocabulary_id,
source_code_description,
target_concept_id,
target_vocabulary_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_start_date AS STRING), 1, 4), SUBSTR(CAST(valid_start_date AS STRING), 5, 2), SUBSTR(CAST(valid_start_date AS STRING), 7, 2)), 'UTC') AS valid_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_end_date AS STRING), 1, 4), SUBSTR(CAST(valid_end_date AS STRING), 5, 2), SUBSTR(CAST(valid_end_date AS STRING), 7, 2)), 'UTC') AS valid_end_date,
invalid_reason
FROM omop_cdm.source_to_concept_map;
CREATE TABLE omop_cdm_parquet.drug_strength
STORED AS PARQUET
AS
SELECT
drug_concept_id,
ingredient_concept_id,
amount_value, -- NUMERIC
amount_unit_concept_id,
numerator_value, -- NUMERIC
numerator_unit_concept_id,
denominator_value, -- NUMERIC
denominator_unit_concept_id,
box_size,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_start_date AS STRING), 1, 4), SUBSTR(CAST(valid_start_date AS STRING), 5, 2), SUBSTR(CAST(valid_start_date AS STRING), 7, 2)), 'UTC') AS valid_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_end_date AS STRING), 1, 4), SUBSTR(CAST(valid_end_date AS STRING), 5, 2), SUBSTR(CAST(valid_end_date AS STRING), 7, 2)), 'UTC') AS valid_end_date,
invalid_reason
FROM omop_cdm.drug_strength;
CREATE TABLE omop_cdm_parquet.cohort_definition
STORED AS PARQUET
AS
SELECT
cohort_definition_id,
cohort_definition_name,
cohort_definition_description, -- TEXT
definition_type_concept_id,
cohort_definition_syntax, -- TEXT
subject_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(cohort_initiation_date AS STRING), 1, 4), SUBSTR(CAST(cohort_initiation_date AS STRING), 5, 2), SUBSTR(CAST(cohort_initiation_date AS STRING), 7, 2)), 'UTC') AS cohort_initiation_date
FROM omop_cdm.cohort_definition;
CREATE TABLE omop_cdm_parquet.attribute_definition
STORED AS PARQUET
AS
SELECT * from omop_cdm.attribute_definition;
CREATE TABLE omop_cdm_parquet.cdm_source
STORED AS PARQUET
AS
SELECT
cdm_source_name,
cdm_source_abbreviation,
cdm_holder,
source_description, -- TEXT
source_documentation_reference,
cdm_etl_reference,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(source_release_date AS STRING), 1, 4), SUBSTR(CAST(source_release_date AS STRING), 5, 2), SUBSTR(CAST(source_release_date AS STRING), 7, 2)), 'UTC') AS source_release_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(cdm_release_date AS STRING), 1, 4), SUBSTR(CAST(cdm_release_date AS STRING), 5, 2), SUBSTR(CAST(cdm_release_date AS STRING), 7, 2)), 'UTC') AS cdm_release_date,
cdm_version,
vocabulary_version
FROM omop_cdm.cdm_source;
CREATE TABLE omop_cdm_parquet.person
STORED AS PARQUET
AS
SELECT
person_id,
gender_concept_id,
year_of_birth,
month_of_birth,
day_of_birth,
TO_UTC_TIMESTAMP(CONCAT_WS('-', CAST(year_of_birth AS STRING), SUBSTR(CONCAT('0', CAST(month_of_birth AS STRING)), -2), SUBSTR(CONCAT('0', CAST(day_of_birth AS STRING)), -2)), 'UTC') AS birth_datetime,
race_concept_id,
ethnicity_concept_id,
location_id,
provider_id,
care_site_id,
person_source_value,
gender_source_value,
gender_source_concept_id,
race_source_value,
race_source_concept_id,
ethnicity_source_value,
ethnicity_source_concept_id
FROM omop_cdm.person;
CREATE TABLE omop_cdm_parquet.observation_period
STORED AS PARQUET
AS
SELECT
observation_period_id,
person_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(observation_period_start_date AS STRING), 1, 4), SUBSTR(CAST(observation_period_start_date AS STRING), 5, 2), SUBSTR(CAST(observation_period_start_date AS STRING), 7, 2)), 'UTC') AS observation_period_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(observation_period_start_date AS STRING), 1, 4), SUBSTR(CAST(observation_period_start_date AS STRING), 5, 2), SUBSTR(CAST(observation_period_start_date AS STRING), 7, 2)), 'UTC') AS observation_period_start_datetime,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(observation_period_end_date AS STRING), 1, 4), SUBSTR(CAST(observation_period_end_date AS STRING), 5, 2), SUBSTR(CAST(observation_period_end_date AS STRING), 7, 2)), 'UTC') AS observation_period_end_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(observation_period_end_date AS STRING), 1, 4), SUBSTR(CAST(observation_period_end_date AS STRING), 5, 2), SUBSTR(CAST(observation_period_end_date AS STRING), 7, 2)), 'UTC') AS observation_period_end_datetime,
period_type_concept_id
FROM omop_cdm.observation_period;
CREATE TABLE omop_cdm_parquet.specimen
STORED AS PARQUET
AS
SELECT
specimen_id,
person_id,
specimen_concept_id,
specimen_type_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(specimen_date AS STRING), 1, 4), SUBSTR(CAST(specimen_date AS STRING), 5, 2), SUBSTR(CAST(specimen_date AS STRING), 7, 2)), 'UTC') AS specimen_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(specimen_date AS STRING), 1, 4), SUBSTR(CAST(specimen_date AS STRING), 5, 2), SUBSTR(CAST(specimen_date AS STRING), 7, 2)), 'UTC') AS specimen_datetime,
quantity, -- NUMERIC
unit_concept_id,
anatomic_site_concept_id,
disease_status_concept_id,
specimen_source_id,
specimen_source_value,
unit_source_value,
anatomic_site_source_value,
disease_status_source_value
FROM omop_cdm.specimen;
CREATE TABLE omop_cdm_parquet.death
STORED AS PARQUET
AS
SELECT
person_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(death_date AS STRING), 1, 4), SUBSTR(CAST(death_date AS STRING), 5, 2), SUBSTR(CAST(death_date AS STRING), 7, 2)), 'UTC') AS death_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(death_date AS STRING), 1, 4), SUBSTR(CAST(death_date AS STRING), 5, 2), SUBSTR(CAST(death_date AS STRING), 7, 2)), 'UTC') AS death_datetime,
death_type_concept_id,
cause_concept_id,
cause_source_value,
cause_source_concept_id
FROM omop_cdm.death;
CREATE TABLE omop_cdm_parquet.visit_occurrence
STORED AS PARQUET
AS
SELECT
visit_occurrence_id,
person_id,
visit_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(visit_start_date AS STRING), 1, 4), SUBSTR(CAST(visit_start_date AS STRING), 5, 2), SUBSTR(CAST(visit_start_date AS STRING), 7, 2)), 'UTC') AS visit_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(visit_start_date AS STRING), 1, 4), SUBSTR(CAST(visit_start_date AS STRING), 5, 2), SUBSTR(CAST(visit_start_date AS STRING), 7, 2)), 'UTC') AS visit_start_datetime,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(visit_end_date AS STRING), 1, 4), SUBSTR(CAST(visit_end_date AS STRING), 5, 2), SUBSTR(CAST(visit_end_date AS STRING), 7, 2)), 'UTC') AS visit_end_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(visit_end_date AS STRING), 1, 4), SUBSTR(CAST(visit_end_date AS STRING), 5, 2), SUBSTR(CAST(visit_end_date AS STRING), 7, 2)), 'UTC') AS visit_end_datetime,
visit_type_concept_id,
provider_id,
care_site_id,
visit_source_value,
visit_source_concept_id
FROM omop_cdm.visit_occurrence;
CREATE TABLE omop_cdm_parquet.procedure_occurrence
STORED AS PARQUET
AS
SELECT
procedure_occurrence_id,
person_id,
procedure_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(procedure_date AS STRING), 1, 4), SUBSTR(CAST(procedure_date AS STRING), 5, 2), SUBSTR(CAST(procedure_date AS STRING), 7, 2)), 'UTC') AS procedure_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(procedure_date AS STRING), 1, 4), SUBSTR(CAST(procedure_date AS STRING), 5, 2), SUBSTR(CAST(procedure_date AS STRING), 7, 2)), 'UTC') AS procedure_datetime,
procedure_type_concept_id,
modifier_concept_id,
quantity,
provider_id,
visit_occurrence_id,
procedure_source_value,
procedure_source_concept_id,
qualifier_source_value
FROM omop_cdm.procedure_occurrence;
CREATE TABLE omop_cdm_parquet.drug_exposure
STORED AS PARQUET
AS
SELECT
drug_exposure_id,
person_id,
drug_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(drug_exposure_start_date AS STRING), 1, 4), SUBSTR(CAST(drug_exposure_start_date AS STRING), 5, 2), SUBSTR(CAST(drug_exposure_start_date AS STRING), 7, 2)), 'UTC') AS drug_exposure_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(drug_exposure_start_date AS STRING), 1, 4), SUBSTR(CAST(drug_exposure_start_date AS STRING), 5, 2), SUBSTR(CAST(drug_exposure_start_date AS STRING), 7, 2)), 'UTC') AS drug_exposure_start_datetime,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(drug_exposure_end_date AS STRING), 1, 4), SUBSTR(CAST(drug_exposure_end_date AS STRING), 5, 2), SUBSTR(CAST(drug_exposure_end_date AS STRING), 7, 2)), 'UTC') AS drug_exposure_end_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(drug_exposure_end_date AS STRING), 1, 4), SUBSTR(CAST(drug_exposure_end_date AS STRING), 5, 2), SUBSTR(CAST(drug_exposure_end_date AS STRING), 7, 2)), 'UTC') AS drug_exposure_end_datetime,
drug_type_concept_id,
stop_reason,
refills,
quantity, -- NUMERIC
days_supply,
sig, -- TEXT
route_concept_id,
effective_drug_dose, -- NUMERIC
dose_unit_concept_id,
lot_number,
provider_id,
visit_occurrence_id,
drug_source_value,
drug_source_concept_id,
route_source_value,
dose_unit_source_value
FROM omop_cdm.drug_exposure;
CREATE TABLE omop_cdm_parquet.device_exposure
STORED AS PARQUET
AS
SELECT
device_exposure_id,
person_id,
device_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(device_exposure_start_date AS STRING), 1, 4), SUBSTR(CAST(device_exposure_start_date AS STRING), 5, 2), SUBSTR(CAST(device_exposure_start_date AS STRING), 7, 2)), 'UTC') AS device_exposure_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(device_exposure_start_date AS STRING), 1, 4), SUBSTR(CAST(device_exposure_start_date AS STRING), 5, 2), SUBSTR(CAST(device_exposure_start_date AS STRING), 7, 2)), 'UTC') AS device_exposure_start_datetime,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(device_exposure_end_date AS STRING), 1, 4), SUBSTR(CAST(device_exposure_end_date AS STRING), 5, 2), SUBSTR(CAST(device_exposure_end_date AS STRING), 7, 2)), 'UTC') AS device_exposure_end_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(device_exposure_end_date AS STRING), 1, 4), SUBSTR(CAST(device_exposure_end_date AS STRING), 5, 2), SUBSTR(CAST(device_exposure_end_date AS STRING), 7, 2)), 'UTC') AS device_exposure_end_datetime,
device_type_concept_id,
unique_device_id,
quantity,
provider_id,
visit_occurrence_id,
device_source_value,
device_source_concept_id
FROM omop_cdm.device_exposure;
CREATE TABLE omop_cdm_parquet.condition_occurrence
STORED AS PARQUET
AS
SELECT
condition_occurrence_id,
person_id,
condition_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(condition_start_date AS STRING), 1, 4), SUBSTR(CAST(condition_start_date AS STRING), 5, 2), SUBSTR(CAST(condition_start_date AS STRING), 7, 2)), 'UTC') AS condition_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(condition_start_date AS STRING), 1, 4), SUBSTR(CAST(condition_start_date AS STRING), 5, 2), SUBSTR(CAST(condition_start_date AS STRING), 7, 2)), 'UTC') AS condition_start_datetime,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(condition_end_date AS STRING), 1, 4), SUBSTR(CAST(condition_end_date AS STRING), 5, 2), SUBSTR(CAST(condition_end_date AS STRING), 7, 2)), 'UTC') AS condition_end_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(condition_end_date AS STRING), 1, 4), SUBSTR(CAST(condition_end_date AS STRING), 5, 2), SUBSTR(CAST(condition_end_date AS STRING), 7, 2)), 'UTC') AS condition_end_datetime,
condition_type_concept_id,
stop_reason,
provider_id,
visit_occurrence_id,
condition_source_value,
condition_source_concept_id
FROM omop_cdm.condition_occurrence;
CREATE TABLE omop_cdm_parquet.measurement
STORED AS PARQUET
AS
SELECT
measurement_id,
person_id,
measurement_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(measurement_date AS STRING), 1, 4), SUBSTR(CAST(measurement_date AS STRING), 5, 2), SUBSTR(CAST(measurement_date AS STRING), 7, 2)), 'UTC') AS measurement_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(measurement_date AS STRING), 1, 4), SUBSTR(CAST(measurement_date AS STRING), 5, 2), SUBSTR(CAST(measurement_date AS STRING), 7, 2)), 'UTC') AS measurement_datetime,
measurement_type_concept_id,
operator_concept_id,
value_as_number, -- NUMERIC
value_as_concept_id,
unit_concept_id,
range_low, -- NUMERIC
range_high, -- NUMERIC
provider_id,
visit_occurrence_id,
measurement_source_value,
measurement_source_concept_id,
unit_source_value,
value_source_value
FROM omop_cdm.measurement;
CREATE TABLE omop_cdm_parquet.note
STORED AS PARQUET
AS
SELECT
note_id,
person_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(note_date AS STRING), 1, 4), SUBSTR(CAST(note_date AS STRING), 5, 2), SUBSTR(CAST(note_date AS STRING), 7, 2)), 'UTC') AS note_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(note_date AS STRING), 1, 4), SUBSTR(CAST(note_date AS STRING), 5, 2), SUBSTR(CAST(note_date AS STRING), 7, 2)), 'UTC') AS note_datetime,
note_type_concept_id,
note_text, -- TEXT
provider_id,
visit_occurrence_id,
note_source_value
FROM omop_cdm.note;
CREATE TABLE omop_cdm_parquet.observation
STORED AS PARQUET
AS
SELECT
observation_id,
person_id,
observation_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(observation_date AS STRING), 1, 4), SUBSTR(CAST(observation_date AS STRING), 5, 2), SUBSTR(CAST(observation_date AS STRING), 7, 2)), 'UTC') AS observation_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(observation_date AS STRING), 1, 4), SUBSTR(CAST(observation_date AS STRING), 5, 2), SUBSTR(CAST(observation_date AS STRING), 7, 2)), 'UTC') AS observation_datetime,
observation_type_concept_id,
value_as_number, -- NUMERIC
value_as_string,
value_as_concept_id,
qualifier_concept_id,
unit_concept_id,
provider_id,
visit_occurrence_id,
observation_source_value,
observation_source_concept_id ,
unit_source_value,
qualifier_source_value
FROM omop_cdm.observation;
CREATE TABLE omop_cdm_parquet.fact_relationship
STORED AS PARQUET
AS
SELECT * from omop_cdm.fact_relationship;
CREATE TABLE omop_cdm_parquet.`location`
STORED AS PARQUET
AS
SELECT * from omop_cdm.`location`;
CREATE TABLE omop_cdm_parquet.care_site
STORED AS PARQUET
AS
SELECT * from omop_cdm.care_site;
CREATE TABLE omop_cdm_parquet.provider
STORED AS PARQUET
AS
SELECT * from omop_cdm.provider;
CREATE TABLE omop_cdm_parquet.payer_plan_period
STORED AS PARQUET
AS
SELECT
payer_plan_period_id,
person_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(payer_plan_period_start_date AS STRING), 1, 4), SUBSTR(CAST(payer_plan_period_start_date AS STRING), 5, 2), SUBSTR(CAST(payer_plan_period_start_date AS STRING), 7, 2)), 'UTC') AS payer_plan_period_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(payer_plan_period_end_date AS STRING), 1, 4), SUBSTR(CAST(payer_plan_period_end_date AS STRING), 5, 2), SUBSTR(CAST(payer_plan_period_end_date AS STRING), 7, 2)), 'UTC') AS payer_plan_period_end_date,
payer_source_value,
plan_source_value,
family_source_value
FROM omop_cdm.payer_plan_period;
/* The individual cost tables are being phased out and will disappear soon
CREATE TABLE omop_cdm_parquet.visit_cost
STORED AS PARQUET
AS
SELECT * from omop_cdm.visit_cost;
CREATE TABLE omop_cdm_parquet.procedure_cost
STORED AS PARQUET
AS
SELECT * from omop_cdm.procedure_cost;
CREATE TABLE omop_cdm_parquet.drug_cost
STORED AS PARQUET
AS
SELECT * from omop_cdm.drug_cost;
CREATE TABLE omop_cdm_parquet.device_cost
STORED AS PARQUET
AS
SELECT * from omop_cdm.device_cost;
*/
CREATE TABLE omop_cdm_parquet.cost
STORED AS PARQUET
AS
SELECT * from omop_cdm.cost;
CREATE TABLE omop_cdm_parquet.cohort
STORED AS PARQUET
AS
SELECT
cohort_definition_id,
subject_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(cohort_start_date AS STRING), 1, 4), SUBSTR(CAST(cohort_start_date AS STRING), 5, 2), SUBSTR(CAST(cohort_start_date AS STRING), 7, 2)), 'UTC') AS cohort_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(cohort_end_date AS STRING), 1, 4), SUBSTR(CAST(cohort_end_date AS STRING), 5, 2), SUBSTR(CAST(cohort_end_date AS STRING), 7, 2)), 'UTC') AS cohort_end_date
FROM omop_cdm.cohort;
CREATE TABLE omop_cdm_parquet.cohort_attribute
STORED AS PARQUET
AS
SELECT
cohort_definition_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(cohort_start_date AS STRING), 1, 4), SUBSTR(CAST(cohort_start_date AS STRING), 5, 2), SUBSTR(CAST(cohort_start_date AS STRING), 7, 2)), 'UTC') AS cohort_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(cohort_end_date AS STRING), 1, 4), SUBSTR(CAST(cohort_end_date AS STRING), 5, 2), SUBSTR(CAST(cohort_end_date AS STRING), 7, 2)), 'UTC') AS cohort_end_date,
subject_id,
attribute_definition_id,
value_as_number, -- NUMERIC
value_as_concept_id
FROM omop_cdm.cohort_attribute;
CREATE TABLE omop_cdm_parquet.drug_era
STORED AS PARQUET
AS
SELECT
drug_era_id,
person_id,
drug_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(drug_era_start_date AS STRING), 1, 4), SUBSTR(CAST(drug_era_start_date AS STRING), 5, 2), SUBSTR(CAST(drug_era_start_date AS STRING), 7, 2)), 'UTC') AS drug_era_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(drug_era_end_date AS STRING), 1, 4), SUBSTR(CAST(drug_era_end_date AS STRING), 5, 2), SUBSTR(CAST(drug_era_end_date AS STRING), 7, 2)), 'UTC') AS drug_era_end_date,
drug_exposure_count,
gap_days
FROM omop_cdm.drug_era;
CREATE TABLE omop_cdm_parquet.dose_era
STORED AS PARQUET
AS
SELECT
dose_era_id,
person_id,
drug_concept_id,
unit_concept_id,
dose_value, -- NUMERIC
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(dose_era_start_date AS STRING), 1, 4), SUBSTR(CAST(dose_era_start_date AS STRING), 5, 2), SUBSTR(CAST(dose_era_start_date AS STRING), 7, 2)), 'UTC') AS dose_era_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(dose_era_end_date AS STRING), 1, 4), SUBSTR(CAST(dose_era_end_date AS STRING), 5, 2), SUBSTR(CAST(dose_era_end_date AS STRING), 7, 2)), 'UTC') AS dose_era_end_date
FROM omop_cdm.dose_era;
CREATE TABLE omop_cdm_parquet.condition_era
STORED AS PARQUET
AS
SELECT
condition_era_id,
person_id,
condition_concept_id,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(condition_era_start_date AS STRING), 1, 4), SUBSTR(CAST(condition_era_start_date AS STRING), 5, 2), SUBSTR(CAST(condition_era_start_date AS STRING), 7, 2)), 'UTC') AS condition_era_start_date,
TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(condition_era_end_date AS STRING), 1, 4), SUBSTR(CAST(condition_era_end_date AS STRING), 5, 2), SUBSTR(CAST(condition_era_end_date AS STRING), 7, 2)), 'UTC') AS condition_era_end_date,
condition_occurrence_count
FROM omop_cdm.condition_era;

View File

@ -43,9 +43,16 @@ hadoop fs -chmod +w synpuf
impala-shell -d omop_cdm -f DataImport/OMOP_CDM_synpuf_load_Impala.sql --var=OMOP_SYNPUF_PATH=/user/$USER/synpuf
```
4. Run simple queries to sanity check.
4. Convert to Parquet format.
```bash
impala-shell -d omop_cdm -q 'SELECT COUNT(1) FROM concept'
impala-shell -d omop_cdm -q 'SELECT COUNT(1) FROM person'
impala-shell -q 'CREATE DATABASE omop_cdm_parquet'
impala-shell -f OMOP_Parquet.sql
```
5. Run simple queries to sanity check.
```bash
impala-shell -d omop_cdm_parquet -q 'SELECT COUNT(1) FROM concept'
impala-shell -d omop_cdm_parquet -q 'SELECT COUNT(1) FROM person'
```

Binary file not shown.

BIN
OMOP_CDM_v5_1_0.pdf Normal file

Binary file not shown.

View File

@ -154,13 +154,7 @@ Standardized health economics
ALTER TABLE payer_plan_period ADD CONSTRAINT xpk_payer_plan_period PRIMARY KEY ( payer_plan_period_id ) ;
ALTER TABLE visit_cost ADD CONSTRAINT xpk_visit_cost PRIMARY KEY ( visit_cost_id ) ;
ALTER TABLE procedure_cost ADD CONSTRAINT xpk_procedure_cost PRIMARY KEY ( procedure_cost_id ) ;
ALTER TABLE drug_cost ADD CONSTRAINT xpk_drug_cost PRIMARY KEY ( drug_cost_id ) ;
ALTER TABLE device_cost ADD CONSTRAINT xpk_device_cost PRIMARY KEY ( device_cost_id ) ;
ALTER TABLE cost ADD CONSTRAINT xpk_visit_cost PRIMARY KEY ( cost_id ) ;
@ -474,39 +468,9 @@ Standardized health economics
ALTER TABLE payer_plan_period ADD CONSTRAINT fpk_payer_plan_period FOREIGN KEY (person_id) REFERENCES person (person_id);
ALTER TABLE cost ADD CONSTRAINT fpk_visit_cost_currency FOREIGN KEY (currency_concept_id) REFERENCES concept (concept_id);
ALTER TABLE visit_cost ADD CONSTRAINT fpk_visit_cost_id FOREIGN KEY (visit_occurrence_id) REFERENCES visit_occurrence (visit_occurrence_id);
ALTER TABLE visit_cost ADD CONSTRAINT fpk_visit_cost_currency FOREIGN KEY (currency_concept_id) REFERENCES concept (concept_id);
ALTER TABLE visit_cost ADD CONSTRAINT fpk_visit_cost_period FOREIGN KEY (payer_plan_period_id) REFERENCES payer_plan_period (payer_plan_period_id);
ALTER TABLE procedure_cost ADD CONSTRAINT fpk_procedure_cost_id FOREIGN KEY (procedure_occurrence_id) REFERENCES procedure_occurrence (procedure_occurrence_id);
ALTER TABLE procedure_cost ADD CONSTRAINT fpk_procedure_cost_currency FOREIGN KEY (currency_concept_id) REFERENCES concept (concept_id);
ALTER TABLE procedure_cost ADD CONSTRAINT fpk_procedure_cost_period FOREIGN KEY (payer_plan_period_id) REFERENCES payer_plan_period (payer_plan_period_id);
ALTER TABLE procedure_cost ADD CONSTRAINT fpk_procedure_cost_revenue FOREIGN KEY (revenue_code_concept_id) REFERENCES concept (concept_id);
ALTER TABLE drug_cost ADD CONSTRAINT fpk_drug_cost_id FOREIGN KEY (drug_exposure_id) REFERENCES drug_exposure (drug_exposure_id);
ALTER TABLE drug_cost ADD CONSTRAINT fpk_drug_cost_currency FOREIGN KEY (currency_concept_id) REFERENCES concept (concept_id);
ALTER TABLE drug_cost ADD CONSTRAINT fpk_drug_cost_period FOREIGN KEY (payer_plan_period_id) REFERENCES payer_plan_period (payer_plan_period_id);
ALTER TABLE device_cost ADD CONSTRAINT fpk_device_cost_id FOREIGN KEY (device_exposure_id) REFERENCES device_exposure (device_exposure_id);
ALTER TABLE device_cost ADD CONSTRAINT fpk_device_cost_currency FOREIGN KEY (currency_concept_id) REFERENCES concept (concept_id);
ALTER TABLE device_cost ADD CONSTRAINT fpk_device_cost_period FOREIGN KEY (payer_plan_period_id) REFERENCES payer_plan_period (payer_plan_period_id);
ALTER TABLE cost ADD CONSTRAINT fpk_visit_cost_period FOREIGN KEY (payer_plan_period_id) REFERENCES payer_plan_period (payer_plan_period_id);
/************************

View File

@ -229,7 +229,7 @@ CREATE TABLE person
year_of_birth INTEGER NOT NULL ,
month_of_birth INTEGER NULL,
day_of_birth INTEGER NULL,
time_of_birth VARCHAR(10) NULL,
birth_datetime TIMESTAMP WITH TIME ZONE NULL,
race_concept_id INTEGER NOT NULL,
ethnicity_concept_id INTEGER NOT NULL,
location_id INTEGER NULL,
@ -254,7 +254,9 @@ CREATE TABLE observation_period
observation_period_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
observation_period_start_date DATE NOT NULL ,
observation_period_start_datetime TIMESTAMP WITH TIME ZONE NOT NULL ,
observation_period_end_date DATE NOT NULL ,
observation_period_end_datetime TIMESTAMP WITH TIME ZONE NOT NULL ,
period_type_concept_id INTEGER NOT NULL
)
;
@ -268,7 +270,7 @@ CREATE TABLE specimen
specimen_concept_id INTEGER NOT NULL ,
specimen_type_concept_id INTEGER NOT NULL ,
specimen_date DATE NOT NULL ,
specimen_time VARCHAR(10) NULL ,
specimen_datetime TIMESTAMP WITH TIME ZONE NULL ,
quantity FLOAT NULL ,
unit_concept_id INTEGER NULL ,
anatomic_site_concept_id INTEGER NULL ,
@ -287,7 +289,8 @@ CREATE TABLE death
(
person_id INTEGER NOT NULL ,
death_date DATE NOT NULL ,
death_type_concept_id INTEGER NOT NULL ,
death_datetime TIMESTAMP WITH TIME ZONE NULL ,
death_type_concept_id INTEGER NOT NULL ,
cause_concept_id INTEGER NULL ,
cause_source_value VARCHAR(50) NULL,
cause_source_concept_id INTEGER NULL
@ -302,9 +305,9 @@ CREATE TABLE visit_occurrence
person_id INTEGER NOT NULL ,
visit_concept_id INTEGER NOT NULL ,
visit_start_date DATE NOT NULL ,
visit_start_time VARCHAR(10) NULL ,
visit_start_datetime TIMESTAMP WITH TIME ZONE NULL ,
visit_end_date DATE NOT NULL ,
visit_end_time VARCHAR(10) NULL ,
visit_end_datetime TIMESTAMP WITH TIME ZONE NULL ,
visit_type_concept_id INTEGER NOT NULL ,
provider_id INTEGER NULL,
care_site_id INTEGER NULL,
@ -321,6 +324,7 @@ CREATE TABLE procedure_occurrence
person_id INTEGER NOT NULL ,
procedure_concept_id INTEGER NOT NULL ,
procedure_date DATE NOT NULL ,
procedure_datetime TIMESTAMP WITH TIME ZONE NOT NULL ,
procedure_type_concept_id INTEGER NOT NULL ,
modifier_concept_id INTEGER NULL ,
quantity INTEGER NULL ,
@ -340,8 +344,10 @@ CREATE TABLE drug_exposure
person_id INTEGER NOT NULL ,
drug_concept_id INTEGER NOT NULL ,
drug_exposure_start_date DATE NOT NULL ,
drug_exposure_end_date DATE NULL ,
drug_type_concept_id INTEGER NOT NULL ,
drug_exposure_start_datetime TIMESTAMP WITH TIME ZONE NOT NULL ,
drug_exposure_end_date DATE NULL ,
drug_exposure_end_datetime TIMESTAMP WITH TIME ZONE NULL ,
drug_type_concept_id INTEGER NOT NULL ,
stop_reason VARCHAR(20) NULL ,
refills INTEGER NULL ,
quantity FLOAT NULL ,
@ -367,8 +373,10 @@ CREATE TABLE device_exposure
person_id INTEGER NOT NULL ,
device_concept_id INTEGER NOT NULL ,
device_exposure_start_date DATE NOT NULL ,
device_exposure_end_date DATE NULL ,
device_type_concept_id INTEGER NOT NULL ,
device_exposure_start_datetime TIMESTAMP WITH TIME ZONE NOT NULL ,
device_exposure_end_date DATE NULL ,
device_exposure_end_datetime TIMESTAMP WITH TIME ZONE NULL ,
device_type_concept_id INTEGER NOT NULL ,
unique_device_id VARCHAR(50) NULL ,
quantity INTEGER NULL ,
provider_id INTEGER NULL ,
@ -385,8 +393,10 @@ CREATE TABLE condition_occurrence
person_id INTEGER NOT NULL ,
condition_concept_id INTEGER NOT NULL ,
condition_start_date DATE NOT NULL ,
condition_end_date DATE NULL ,
condition_type_concept_id INTEGER NOT NULL ,
condition_start_datetime TIMESTAMP WITH TIME ZONE NOT NULL ,
condition_end_date DATE NULL ,
condition_end_datetime TIMESTAMP WITH TIME ZONE NULL ,
condition_type_concept_id INTEGER NOT NULL ,
stop_reason VARCHAR(20) NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
@ -403,7 +413,7 @@ CREATE TABLE measurement
person_id INTEGER NOT NULL ,
measurement_concept_id INTEGER NOT NULL ,
measurement_date DATE NOT NULL ,
measurement_time VARCHAR(10) NULL ,
measurement_datetime TIMESTAMP WITH TIME ZONE NULL ,
measurement_type_concept_id INTEGER NOT NULL ,
operator_concept_id INTEGER NULL ,
value_as_number FLOAT NULL ,
@ -427,7 +437,7 @@ CREATE TABLE note
note_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
note_date DATE NOT NULL ,
note_time VARCHAR(10) NULL ,
note_datetime TIMESTAMP WITH TIME ZONE NULL ,
note_type_concept_id INTEGER NOT NULL ,
note_text CLOB NOT NULL ,
provider_id INTEGER NULL ,
@ -444,7 +454,7 @@ CREATE TABLE observation
person_id INTEGER NOT NULL ,
observation_concept_id INTEGER NOT NULL ,
observation_date DATE NOT NULL ,
observation_time VARCHAR(10) NULL ,
observation_datetime TIMESTAMP WITH TIME ZONE NULL ,
observation_type_concept_id INTEGER NOT NULL ,
value_as_number FLOAT NULL ,
value_as_string VARCHAR(60) NULL ,

View File

@ -155,14 +155,7 @@ Standardized health economics
ALTER TABLE payer_plan_period ADD CONSTRAINT xpk_payer_plan_period PRIMARY KEY ( payer_plan_period_id ) ;
ALTER TABLE visit_cost ADD CONSTRAINT xpk_visit_cost PRIMARY KEY ( visit_cost_id ) ;
ALTER TABLE procedure_cost ADD CONSTRAINT xpk_procedure_cost PRIMARY KEY ( procedure_cost_id ) ;
ALTER TABLE drug_cost ADD CONSTRAINT xpk_drug_cost PRIMARY KEY ( drug_cost_id ) ;
ALTER TABLE device_cost ADD CONSTRAINT xpk_device_cost PRIMARY KEY ( device_cost_id ) ;
ALTER TABLE cost ADD CONSTRAINT xpk_visit_cost PRIMARY KEY ( cost_id ) ;
/************************
@ -475,39 +468,9 @@ Standardized health economics
ALTER TABLE payer_plan_period ADD CONSTRAINT fpk_payer_plan_period FOREIGN KEY (person_id) REFERENCES person (person_id);
ALTER TABLE cost ADD CONSTRAINT fpk_visit_cost_currency FOREIGN KEY (currency_concept_id) REFERENCES concept (concept_id);
ALTER TABLE visit_cost ADD CONSTRAINT fpk_visit_cost_id FOREIGN KEY (visit_occurrence_id) REFERENCES visit_occurrence (visit_occurrence_id);
ALTER TABLE visit_cost ADD CONSTRAINT fpk_visit_cost_currency FOREIGN KEY (currency_concept_id) REFERENCES concept (concept_id);
ALTER TABLE visit_cost ADD CONSTRAINT fpk_visit_cost_period FOREIGN KEY (payer_plan_period_id) REFERENCES payer_plan_period (payer_plan_period_id);
ALTER TABLE procedure_cost ADD CONSTRAINT fpk_procedure_cost_id FOREIGN KEY (procedure_occurrence_id) REFERENCES procedure_occurrence (procedure_occurrence_id);
ALTER TABLE procedure_cost ADD CONSTRAINT fpk_procedure_cost_currency FOREIGN KEY (currency_concept_id) REFERENCES concept (concept_id);
ALTER TABLE procedure_cost ADD CONSTRAINT fpk_procedure_cost_period FOREIGN KEY (payer_plan_period_id) REFERENCES payer_plan_period (payer_plan_period_id);
ALTER TABLE procedure_cost ADD CONSTRAINT fpk_procedure_cost_revenue FOREIGN KEY (revenue_code_concept_id) REFERENCES concept (concept_id);
ALTER TABLE drug_cost ADD CONSTRAINT fpk_drug_cost_id FOREIGN KEY (drug_exposure_id) REFERENCES drug_exposure (drug_exposure_id);
ALTER TABLE drug_cost ADD CONSTRAINT fpk_drug_cost_currency FOREIGN KEY (currency_concept_id) REFERENCES concept (concept_id);
ALTER TABLE drug_cost ADD CONSTRAINT fpk_drug_cost_period FOREIGN KEY (payer_plan_period_id) REFERENCES payer_plan_period (payer_plan_period_id);
ALTER TABLE device_cost ADD CONSTRAINT fpk_device_cost_id FOREIGN KEY (device_exposure_id) REFERENCES device_exposure (device_exposure_id);
ALTER TABLE device_cost ADD CONSTRAINT fpk_device_cost_currency FOREIGN KEY (currency_concept_id) REFERENCES concept (concept_id);
ALTER TABLE device_cost ADD CONSTRAINT fpk_device_cost_period FOREIGN KEY (payer_plan_period_id) REFERENCES payer_plan_period (payer_plan_period_id);
ALTER TABLE cost ADD CONSTRAINT fpk_visit_cost_period FOREIGN KEY (payer_plan_period_id) REFERENCES payer_plan_period (payer_plan_period_id);
/************************

View File

@ -229,7 +229,7 @@ CREATE TABLE person
year_of_birth INTEGER NOT NULL ,
month_of_birth INTEGER NULL,
day_of_birth INTEGER NULL,
time_of_birth VARCHAR(10) NULL,
birth_datetime TIMESTAMP NULL,
race_concept_id INTEGER NOT NULL,
ethnicity_concept_id INTEGER NOT NULL,
location_id INTEGER NULL,
@ -254,7 +254,9 @@ CREATE TABLE observation_period
observation_period_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
observation_period_start_date DATE NOT NULL ,
observation_period_start_datetime TIMESTAMP NOT NULL ,
observation_period_end_date DATE NOT NULL ,
observation_period_end_datetime TIMESTAMP NOT NULL ,
period_type_concept_id INTEGER NOT NULL
)
;
@ -268,7 +270,7 @@ CREATE TABLE specimen
specimen_concept_id INTEGER NOT NULL ,
specimen_type_concept_id INTEGER NOT NULL ,
specimen_date DATE NOT NULL ,
specimen_time VARCHAR(10) NULL ,
specimen_datetime TIMESTAMP NULL ,
quantity NUMERIC NULL ,
unit_concept_id INTEGER NULL ,
anatomic_site_concept_id INTEGER NULL ,
@ -287,7 +289,8 @@ CREATE TABLE death
(
person_id INTEGER NOT NULL ,
death_date DATE NOT NULL ,
death_type_concept_id INTEGER NOT NULL ,
death_datetime TIMESTAMP NULL ,
death_type_concept_id INTEGER NOT NULL ,
cause_concept_id INTEGER NULL ,
cause_source_value VARCHAR(50) NULL,
cause_source_concept_id INTEGER NULL
@ -302,9 +305,9 @@ CREATE TABLE visit_occurrence
person_id INTEGER NOT NULL ,
visit_concept_id INTEGER NOT NULL ,
visit_start_date DATE NOT NULL ,
visit_start_time VARCHAR(10) NULL ,
visit_start_datetime TIMESTAMP NULL ,
visit_end_date DATE NOT NULL ,
visit_end_time VARCHAR(10) NULL ,
visit_end_datetime TIMESTAMP NULL ,
visit_type_concept_id INTEGER NOT NULL ,
provider_id INTEGER NULL,
care_site_id INTEGER NULL,
@ -321,6 +324,7 @@ CREATE TABLE procedure_occurrence
person_id INTEGER NOT NULL ,
procedure_concept_id INTEGER NOT NULL ,
procedure_date DATE NOT NULL ,
procedure_datetime TIMESTAMP NOT NULL ,
procedure_type_concept_id INTEGER NOT NULL ,
modifier_concept_id INTEGER NULL ,
quantity INTEGER NULL ,
@ -340,8 +344,10 @@ CREATE TABLE drug_exposure
person_id INTEGER NOT NULL ,
drug_concept_id INTEGER NOT NULL ,
drug_exposure_start_date DATE NOT NULL ,
drug_exposure_end_date DATE NULL ,
drug_type_concept_id INTEGER NOT NULL ,
drug_exposure_start_datetime TIMESTAMP NOT NULL ,
drug_exposure_end_date DATE NULL ,
drug_exposure_end_datetime TIMESTAMP NULL ,
drug_type_concept_id INTEGER NOT NULL ,
stop_reason VARCHAR(20) NULL ,
refills INTEGER NULL ,
quantity NUMERIC NULL ,
@ -367,8 +373,10 @@ CREATE TABLE device_exposure
person_id INTEGER NOT NULL ,
device_concept_id INTEGER NOT NULL ,
device_exposure_start_date DATE NOT NULL ,
device_exposure_end_date DATE NULL ,
device_type_concept_id INTEGER NOT NULL ,
device_exposure_start_datetime TIMESTAMP NOT NULL ,
device_exposure_end_date DATE NULL ,
device_exposure_end_datetime TIMESTAMP NULL ,
device_type_concept_id INTEGER NOT NULL ,
unique_device_id VARCHAR(50) NULL ,
quantity INTEGER NULL ,
provider_id INTEGER NULL ,
@ -385,8 +393,10 @@ CREATE TABLE condition_occurrence
person_id INTEGER NOT NULL ,
condition_concept_id INTEGER NOT NULL ,
condition_start_date DATE NOT NULL ,
condition_end_date DATE NULL ,
condition_type_concept_id INTEGER NOT NULL ,
condition_start_datetime TIMESTAMP NOT NULL ,
condition_end_date DATE NULL ,
condition_end_datetime TIMESTAMP NULL ,
condition_type_concept_id INTEGER NOT NULL ,
stop_reason VARCHAR(20) NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
@ -403,7 +413,7 @@ CREATE TABLE measurement
person_id INTEGER NOT NULL ,
measurement_concept_id INTEGER NOT NULL ,
measurement_date DATE NOT NULL ,
measurement_time VARCHAR(10) NULL ,
measurement_datetime TIMESTAMP NULL ,
measurement_type_concept_id INTEGER NOT NULL ,
operator_concept_id INTEGER NULL ,
value_as_number NUMERIC NULL ,
@ -427,7 +437,7 @@ CREATE TABLE note
note_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
note_date DATE NOT NULL ,
note_time VARCHAR(10) NULL ,
note_datetime TIMESTAMP NULL ,
note_type_concept_id INTEGER NOT NULL ,
note_text TEXT NOT NULL ,
provider_id INTEGER NULL ,
@ -444,7 +454,7 @@ CREATE TABLE observation
person_id INTEGER NOT NULL ,
observation_concept_id INTEGER NOT NULL ,
observation_date DATE NOT NULL ,
observation_time VARCHAR(10) NULL ,
observation_datetime TIMESTAMP NULL ,
observation_type_concept_id INTEGER NOT NULL ,
value_as_number NUMERIC NULL ,
value_as_string VARCHAR(60) NULL ,

View File

@ -1,9 +1,54 @@
Common-Data-Model
Common Data Model v5.1.0
=================
This repo contains the definition of the OMOP Common Data Model. It supports the 4 SQL technologies: Impala, Oracle, Postgres and SQL Server. For each, the DDL, constraints and indexes (if appropirate) are defined.
See full CDM specification file on our github [wiki](https://github.com/OHDSI/CommonDataModel/wiki) or in the [CDM V5.1.0 PDF](https://github.com/OHDSI/CommonDataModel/blob/master/OMOP_CDM_v5_1_0.pdf)
Versions are defined using tagging and versioning. Full versions (V6, 7 etc.) are released irregularly after a major strategy change or use case coverage. It will be issued during an OHDSI Symposium. Major version (V5.1, 5.2 etc.) are released half yearly (1-Jul and 1-Jan). Those versions are not guaranteed to be backward compatible. Minor versions (V5.1.1, V5.1.2 etc.) are released irregularly and often, and contain small hot fixes or backward compatible changes to the last Major Version.
Release Notes
=============
This version is based on this CDM working group proposal [#60](https://github.com/OHDSI/CommonDataModel/issues/60) and [#59](https://github.com/OHDSI/CommonDataModel/issues/59). The proposed and accepted changes include adding a datetime field to every table that had a date column and adding field DENOMINATOR_VALUE to the DRUG_STRENGTH table. These were the new columns added:
**PERSON**
* birth_datetime, not required
See full CDM specification file on our [Wiki](http://www.ohdsi.org/web/wiki/doku.php?id=documentation:cdm:single-page).
**SPECIMEN**
* specimen_datetime, not required
**DEATH**
* death_datetime, not required
**VISIT_OCCURRENCE**
* visit_start_datetime, not required
* visit_end_datetime, not required
**PROCEDURE_OCCURRENCE**
* procedure_datetime, not required
**DRUG_EXPOSURE**
* drug_exposure_start_datetime, not required
* drug_exposure_end_datetime, not required
**DRUG_STRENGTH**
* DENOMINATOR_VALUE, not required
**DEVICE_EXPOSURE**
* device_exposure_start_datetime, not required
* device_exposure_end_datetime, not required
**CONDITION_OCCURRENCE**
* condition_start_datetime, not required
* condition_end_datetime, not required
**MEASUREMENT**
* measurement_datetime as time, not required
**OBSERVATION**
* observation_datetime, not required
**NOTE**
* note_datetime, not required
---------
This repo contains the definition of the OMOP Common Data Model. It supports the 4 SQL technologies: Impala, Oracle, Postgres and SQL Server. For each, the DDL, constraints and indexes (if appropriate) are defined.
Versions are defined using tagging and versioning. Full versions (V6, 7 etc.) are released each year (1-Jan) and are not backwards compatible. Minor versions (V5.1, 5.2 etc.) are released each quarter (1-Apr, 1-Jul and 1-Sep) and are not guaranteed to be backwards compatible though an effort is made to make sure that current queries will not break. Micro versions (V5.1.1, V5.1.2 etc.) are released irregularly and often, and contain small hot fixes or backward compatible changes to the last minor version.

View File

@ -155,14 +155,7 @@ Standardized health economics
ALTER TABLE payer_plan_period ADD CONSTRAINT xpk_payer_plan_period PRIMARY KEY NONCLUSTERED ( payer_plan_period_id ) ;
ALTER TABLE visit_cost ADD CONSTRAINT xpk_visit_cost PRIMARY KEY NONCLUSTERED ( visit_cost_id ) ;
ALTER TABLE procedure_cost ADD CONSTRAINT xpk_procedure_cost PRIMARY KEY NONCLUSTERED ( procedure_cost_id ) ;
ALTER TABLE drug_cost ADD CONSTRAINT xpk_drug_cost PRIMARY KEY NONCLUSTERED ( drug_cost_id ) ;
ALTER TABLE device_cost ADD CONSTRAINT xpk_device_cost PRIMARY KEY NONCLUSTERED ( device_cost_id ) ;
ALTER TABLE cost ADD CONSTRAINT xpk_visit_cost PRIMARY KEY NONCLUSTERED ( cost_id ) ;
/************************
@ -475,39 +468,9 @@ Standardized health economics
ALTER TABLE payer_plan_period ADD CONSTRAINT fpk_payer_plan_period FOREIGN KEY (person_id) REFERENCES person (person_id);
ALTER TABLE cost ADD CONSTRAINT fpk_visit_cost_currency FOREIGN KEY (currency_concept_id) REFERENCES concept (concept_id);
ALTER TABLE visit_cost ADD CONSTRAINT fpk_visit_cost_id FOREIGN KEY (visit_occurrence_id) REFERENCES visit_occurrence (visit_occurrence_id);
ALTER TABLE visit_cost ADD CONSTRAINT fpk_visit_cost_currency FOREIGN KEY (currency_concept_id) REFERENCES concept (concept_id);
ALTER TABLE visit_cost ADD CONSTRAINT fpk_visit_cost_period FOREIGN KEY (payer_plan_period_id) REFERENCES payer_plan_period (payer_plan_period_id);
ALTER TABLE procedure_cost ADD CONSTRAINT fpk_procedure_cost_id FOREIGN KEY (procedure_occurrence_id) REFERENCES procedure_occurrence (procedure_occurrence_id);
ALTER TABLE procedure_cost ADD CONSTRAINT fpk_procedure_cost_currency FOREIGN KEY (currency_concept_id) REFERENCES concept (concept_id);
ALTER TABLE procedure_cost ADD CONSTRAINT fpk_procedure_cost_period FOREIGN KEY (payer_plan_period_id) REFERENCES payer_plan_period (payer_plan_period_id);
ALTER TABLE procedure_cost ADD CONSTRAINT fpk_procedure_cost_revenue FOREIGN KEY (revenue_code_concept_id) REFERENCES concept (concept_id);
ALTER TABLE drug_cost ADD CONSTRAINT fpk_drug_cost_id FOREIGN KEY (drug_exposure_id) REFERENCES drug_exposure (drug_exposure_id);
ALTER TABLE drug_cost ADD CONSTRAINT fpk_drug_cost_currency FOREIGN KEY (currency_concept_id) REFERENCES concept (concept_id);
ALTER TABLE drug_cost ADD CONSTRAINT fpk_drug_cost_period FOREIGN KEY (payer_plan_period_id) REFERENCES payer_plan_period (payer_plan_period_id);
ALTER TABLE device_cost ADD CONSTRAINT fpk_device_cost_id FOREIGN KEY (device_exposure_id) REFERENCES device_exposure (device_exposure_id);
ALTER TABLE device_cost ADD CONSTRAINT fpk_device_cost_currency FOREIGN KEY (currency_concept_id) REFERENCES concept (concept_id);
ALTER TABLE device_cost ADD CONSTRAINT fpk_device_cost_period FOREIGN KEY (payer_plan_period_id) REFERENCES payer_plan_period (payer_plan_period_id);
ALTER TABLE cost ADD CONSTRAINT fpk_visit_cost_period FOREIGN KEY (payer_plan_period_id) REFERENCES payer_plan_period (payer_plan_period_id);
/************************

View File

@ -194,19 +194,19 @@ Standardized meta-data
***************************/
CREATE TABLE cdm_source
(
cdm_source_name VARCHAR(255) NOT NULL,
cdm_source_abbreviation VARCHAR(25) NULL,
cdm_holder VARCHAR(255) NULL,
source_description VARCHAR(MAX) NULL,
source_documentation_reference VARCHAR(255) NULL,
cdm_etl_reference VARCHAR(255) NULL,
source_release_date DATE NULL,
cdm_release_date DATE NULL,
cdm_version VARCHAR(10) NULL,
vocabulary_version VARCHAR(20) NULL
)
CREATE TABLE cdm_source
(
cdm_source_name VARCHAR(255) NOT NULL,
cdm_source_abbreviation VARCHAR(25) NULL,
cdm_holder VARCHAR(255) NULL,
source_description VARCHAR(MAX) NULL,
source_documentation_reference VARCHAR(255) NULL,
cdm_etl_reference VARCHAR(255) NULL,
source_release_date DATE NULL,
cdm_release_date DATE NULL,
cdm_version VARCHAR(10) NULL,
vocabulary_version VARCHAR(20) NULL
)
;
@ -222,254 +222,264 @@ Standardized clinical data
************************/
CREATE TABLE person
(
person_id INTEGER NOT NULL ,
gender_concept_id INTEGER NOT NULL ,
year_of_birth INTEGER NOT NULL ,
month_of_birth INTEGER NULL,
day_of_birth INTEGER NULL,
time_of_birth VARCHAR(10) NULL,
race_concept_id INTEGER NOT NULL,
ethnicity_concept_id INTEGER NOT NULL,
location_id INTEGER NULL,
provider_id INTEGER NULL,
care_site_id INTEGER NULL,
person_source_value VARCHAR(50) NULL,
gender_source_value VARCHAR(50) NULL,
gender_source_concept_id INTEGER NULL,
race_source_value VARCHAR(50) NULL,
race_source_concept_id INTEGER NULL,
ethnicity_source_value VARCHAR(50) NULL,
ethnicity_source_concept_id INTEGER NULL
)
CREATE TABLE person
(
person_id INTEGER NOT NULL ,
gender_concept_id INTEGER NOT NULL ,
year_of_birth INTEGER NOT NULL ,
month_of_birth INTEGER NULL,
day_of_birth INTEGER NULL,
birth_datetime DATETIME2 NULL,
race_concept_id INTEGER NOT NULL,
ethnicity_concept_id INTEGER NOT NULL,
location_id INTEGER NULL,
provider_id INTEGER NULL,
care_site_id INTEGER NULL,
person_source_value VARCHAR(50) NULL,
gender_source_value VARCHAR(50) NULL,
gender_source_concept_id INTEGER NULL,
race_source_value VARCHAR(50) NULL,
race_source_concept_id INTEGER NULL,
ethnicity_source_value VARCHAR(50) NULL,
ethnicity_source_concept_id INTEGER NULL
)
;
CREATE TABLE observation_period
(
observation_period_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
observation_period_start_date DATE NOT NULL ,
observation_period_end_date DATE NOT NULL ,
period_type_concept_id INTEGER NOT NULL
)
CREATE TABLE observation_period
(
observation_period_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
observation_period_start_date DATE NOT NULL ,
observation_period_start_datetime DATETIME2 NOT NULL ,
observation_period_end_date DATE NOT NULL ,
observation_period_end_datetime DATETIME2 NOT NULL ,
period_type_concept_id INTEGER NOT NULL
)
;
CREATE TABLE specimen
(
specimen_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
specimen_concept_id INTEGER NOT NULL ,
specimen_type_concept_id INTEGER NOT NULL ,
specimen_date DATE NOT NULL ,
specimen_time VARCHAR(10) NULL ,
quantity FLOAT NULL ,
unit_concept_id INTEGER NULL ,
anatomic_site_concept_id INTEGER NULL ,
disease_status_concept_id INTEGER NULL ,
specimen_source_id VARCHAR(50) NULL ,
specimen_source_value VARCHAR(50) NULL ,
unit_source_value VARCHAR(50) NULL ,
anatomic_site_source_value VARCHAR(50) NULL ,
disease_status_source_value VARCHAR(50) NULL
)
(
specimen_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
specimen_concept_id INTEGER NOT NULL ,
specimen_type_concept_id INTEGER NOT NULL ,
specimen_date DATE NOT NULL ,
specimen_datetime DATETIME2 NULL ,
quantity FLOAT NULL ,
unit_concept_id INTEGER NULL ,
anatomic_site_concept_id INTEGER NULL ,
disease_status_concept_id INTEGER NULL ,
specimen_source_id VARCHAR(50) NULL ,
specimen_source_value VARCHAR(50) NULL ,
unit_source_value VARCHAR(50) NULL ,
anatomic_site_source_value VARCHAR(50) NULL ,
disease_status_source_value VARCHAR(50) NULL
)
;
CREATE TABLE death
(
person_id INTEGER NOT NULL ,
death_date DATE NOT NULL ,
death_type_concept_id INTEGER NOT NULL ,
cause_concept_id INTEGER NULL ,
cause_source_value VARCHAR(50) NULL,
cause_source_concept_id INTEGER NULL
)
CREATE TABLE death
(
person_id INTEGER NOT NULL ,
death_date DATE NOT NULL ,
death_datetime DATETIME2 NULL ,
death_type_concept_id INTEGER NOT NULL ,
cause_concept_id INTEGER NULL ,
cause_source_value VARCHAR(50) NULL,
cause_source_concept_id INTEGER NULL
)
;
CREATE TABLE visit_occurrence
(
visit_occurrence_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
visit_concept_id INTEGER NOT NULL ,
visit_start_date DATE NOT NULL ,
visit_start_time VARCHAR(10) NULL ,
visit_end_date DATE NOT NULL ,
visit_end_time VARCHAR(10) NULL ,
visit_type_concept_id INTEGER NOT NULL ,
provider_id INTEGER NULL,
care_site_id INTEGER NULL,
visit_source_value VARCHAR(50) NULL,
visit_source_concept_id INTEGER NULL
)
CREATE TABLE visit_occurrence
(
visit_occurrence_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
visit_concept_id INTEGER NOT NULL ,
visit_start_date DATE NOT NULL ,
visit_start_datetime DATETIME2 NULL ,
visit_end_date DATE NOT NULL ,
visit_end_datetime DATETIME2 NULL ,
visit_type_concept_id INTEGER NOT NULL ,
provider_id INTEGER NULL,
care_site_id INTEGER NULL,
visit_source_value VARCHAR(50) NULL,
visit_source_concept_id INTEGER NULL
)
;
CREATE TABLE procedure_occurrence
(
procedure_occurrence_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
procedure_concept_id INTEGER NOT NULL ,
procedure_date DATE NOT NULL ,
procedure_type_concept_id INTEGER NOT NULL ,
modifier_concept_id INTEGER NULL ,
quantity INTEGER NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
procedure_source_value VARCHAR(50) NULL ,
procedure_source_concept_id INTEGER NULL ,
qualifier_source_value VARCHAR(50) NULL
)
CREATE TABLE procedure_occurrence
(
procedure_occurrence_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
procedure_concept_id INTEGER NOT NULL ,
procedure_date DATE NOT NULL ,
procedure_datetime DATETIME2 NOT NULL ,
procedure_type_concept_id INTEGER NOT NULL ,
modifier_concept_id INTEGER NULL ,
quantity INTEGER NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
procedure_source_value VARCHAR(50) NULL ,
procedure_source_concept_id INTEGER NULL ,
qualifier_source_value VARCHAR(50) NULL
)
;
CREATE TABLE drug_exposure
(
drug_exposure_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
drug_concept_id INTEGER NOT NULL ,
drug_exposure_start_date DATE NOT NULL ,
drug_exposure_end_date DATE NULL ,
drug_type_concept_id INTEGER NOT NULL ,
stop_reason VARCHAR(20) NULL ,
refills INTEGER NULL ,
quantity FLOAT NULL ,
days_supply INTEGER NULL ,
sig VARCHAR(MAX) NULL ,
route_concept_id INTEGER NULL ,
effective_drug_dose FLOAT NULL ,
dose_unit_concept_id INTEGER NULL ,
lot_number VARCHAR(50) NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
drug_source_value VARCHAR(50) NULL ,
drug_source_concept_id INTEGER NULL ,
route_source_value VARCHAR(50) NULL ,
dose_unit_source_value VARCHAR(50) NULL
)
CREATE TABLE drug_exposure
(
drug_exposure_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
drug_concept_id INTEGER NOT NULL ,
drug_exposure_start_date DATE NOT NULL ,
drug_exposure_start_datetime DATETIME2 NOT NULL ,
drug_exposure_end_date DATE NULL ,
drug_exposure_end_datetime DATETIME2 NULL ,
drug_type_concept_id INTEGER NOT NULL ,
stop_reason VARCHAR(20) NULL ,
refills INTEGER NULL ,
quantity FLOAT NULL ,
days_supply INTEGER NULL ,
sig VARCHAR(MAX) NULL ,
route_concept_id INTEGER NULL ,
effective_drug_dose FLOAT NULL ,
dose_unit_concept_id INTEGER NULL ,
lot_number VARCHAR(50) NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
drug_source_value VARCHAR(50) NULL ,
drug_source_concept_id INTEGER NULL ,
route_source_value VARCHAR(50) NULL ,
dose_unit_source_value VARCHAR(50) NULL
)
;
CREATE TABLE device_exposure
(
device_exposure_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
device_concept_id INTEGER NOT NULL ,
device_exposure_start_date DATE NOT NULL ,
device_exposure_end_date DATE NULL ,
device_type_concept_id INTEGER NOT NULL ,
unique_device_id VARCHAR(50) NULL ,
quantity INTEGER NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
device_source_value VARCHAR(100) NULL ,
device_source_concept_id INTEGER NULL
)
CREATE TABLE device_exposure
(
device_exposure_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
device_concept_id INTEGER NOT NULL ,
device_exposure_start_date DATE NOT NULL ,
device_exposure_start_datetime DATETIME2 NOT NULL ,
device_exposure_end_date DATE NULL ,
device_exposure_end_datetime DATETIME2 NULL ,
device_type_concept_id INTEGER NOT NULL ,
unique_device_id VARCHAR(50) NULL ,
quantity INTEGER NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
device_source_value VARCHAR(100) NULL ,
device_source_concept_id INTEGER NULL
)
;
CREATE TABLE condition_occurrence
(
condition_occurrence_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
condition_concept_id INTEGER NOT NULL ,
condition_start_date DATE NOT NULL ,
condition_end_date DATE NULL ,
condition_type_concept_id INTEGER NOT NULL ,
stop_reason VARCHAR(20) NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
condition_source_value VARCHAR(50) NULL ,
condition_source_concept_id INTEGER NULL
)
CREATE TABLE condition_occurrence
(
condition_occurrence_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
condition_concept_id INTEGER NOT NULL ,
condition_start_date DATE NOT NULL ,
condition_start_datetime DATETIME2 NOT NULL ,
condition_end_date DATE NULL ,
condition_end_datetime DATETIME2 NULL ,
condition_type_concept_id INTEGER NOT NULL ,
stop_reason VARCHAR(20) NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
condition_source_value VARCHAR(50) NULL ,
condition_source_concept_id INTEGER NULL
)
;
CREATE TABLE measurement
(
measurement_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
measurement_concept_id INTEGER NOT NULL ,
measurement_date DATE NOT NULL ,
measurement_time VARCHAR(10) NULL ,
measurement_type_concept_id INTEGER NOT NULL ,
operator_concept_id INTEGER NULL ,
value_as_number FLOAT NULL ,
value_as_concept_id INTEGER NULL ,
unit_concept_id INTEGER NULL ,
range_low FLOAT NULL ,
range_high FLOAT NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
measurement_source_value VARCHAR(50) NULL ,
measurement_source_concept_id INTEGER NULL ,
unit_source_value VARCHAR(50) NULL ,
value_source_value VARCHAR(50) NULL
)
CREATE TABLE measurement
(
measurement_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
measurement_concept_id INTEGER NOT NULL ,
measurement_date DATE NOT NULL ,
measurement_datetime DATETIME2 NULL ,
measurement_type_concept_id INTEGER NOT NULL ,
operator_concept_id INTEGER NULL ,
value_as_number FLOAT NULL ,
value_as_concept_id INTEGER NULL ,
unit_concept_id INTEGER NULL ,
range_low FLOAT NULL ,
range_high FLOAT NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
measurement_source_value VARCHAR(50) NULL ,
measurement_source_concept_id INTEGER NULL ,
unit_source_value VARCHAR(50) NULL ,
value_source_value VARCHAR(50) NULL
)
;
CREATE TABLE note
(
note_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
note_date DATE NOT NULL ,
note_time VARCHAR(10) NULL ,
note_type_concept_id INTEGER NOT NULL ,
note_text VARCHAR(MAX) NOT NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
note_source_value VARCHAR(50) NULL
)
CREATE TABLE note
(
note_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
note_date DATE NOT NULL ,
note_datetime DATETIME2 NULL ,
note_type_concept_id INTEGER NOT NULL ,
note_text VARCHAR(MAX) NOT NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
note_source_value VARCHAR(50) NULL
)
;
CREATE TABLE observation
(
observation_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
observation_concept_id INTEGER NOT NULL ,
observation_date DATE NOT NULL ,
observation_time VARCHAR(10) NULL ,
observation_type_concept_id INTEGER NOT NULL ,
value_as_number FLOAT NULL ,
value_as_string VARCHAR(60) NULL ,
value_as_concept_id INTEGER NULL ,
qualifier_concept_id INTEGER NULL ,
unit_concept_id INTEGER NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
observation_source_value VARCHAR(50) NULL ,
observation_source_concept_id INTEGER NULL ,
unit_source_value VARCHAR(50) NULL ,
qualifier_source_value VARCHAR(50) NULL
)
CREATE TABLE observation
(
observation_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
observation_concept_id INTEGER NOT NULL ,
observation_date DATE NOT NULL ,
observation_datetime DATETIME2 NULL ,
observation_type_concept_id INTEGER NOT NULL ,
value_as_number FLOAT NULL ,
value_as_string VARCHAR(60) NULL ,
value_as_concept_id INTEGER NULL ,
qualifier_concept_id INTEGER NULL ,
unit_concept_id INTEGER NULL ,
provider_id INTEGER NULL ,
visit_occurrence_id INTEGER NULL ,
observation_source_value VARCHAR(50) NULL ,
observation_source_concept_id INTEGER NULL ,
unit_source_value VARCHAR(50) NULL ,
qualifier_source_value VARCHAR(50) NULL
)
;
CREATE TABLE fact_relationship
(
domain_concept_id_1 INTEGER NOT NULL ,
fact_id_1 INTEGER NOT NULL ,
domain_concept_id_2 INTEGER NOT NULL ,
fact_id_2 INTEGER NOT NULL ,
relationship_concept_id INTEGER NOT NULL
)
CREATE TABLE fact_relationship
(
domain_concept_id_1 INTEGER NOT NULL ,
fact_id_1 INTEGER NOT NULL ,
domain_concept_id_2 INTEGER NOT NULL ,
fact_id_2 INTEGER NOT NULL ,
relationship_concept_id INTEGER NOT NULL
)
;
@ -483,50 +493,50 @@ Standardized health system data
CREATE TABLE location
(
location_id INTEGER NOT NULL ,
address_1 VARCHAR(50) NULL ,
address_2 VARCHAR(50) NULL ,
city VARCHAR(50) NULL ,
state VARCHAR(2) NULL ,
zip VARCHAR(9) NULL ,
county VARCHAR(20) NULL ,
location_source_value VARCHAR(50) NULL
)
CREATE TABLE location
(
location_id INTEGER NOT NULL ,
address_1 VARCHAR(50) NULL ,
address_2 VARCHAR(50) NULL ,
city VARCHAR(50) NULL ,
state VARCHAR(2) NULL ,
zip VARCHAR(9) NULL ,
county VARCHAR(20) NULL ,
location_source_value VARCHAR(50) NULL
)
;
CREATE TABLE care_site
(
care_site_id INTEGER NOT NULL ,
care_site_name VARCHAR(255) NULL ,
place_of_service_concept_id INTEGER NULL ,
location_id INTEGER NULL ,
care_site_source_value VARCHAR(50) NULL ,
place_of_service_source_value VARCHAR(50) NULL
)
CREATE TABLE care_site
(
care_site_id INTEGER NOT NULL ,
care_site_name VARCHAR(255) NULL ,
place_of_service_concept_id INTEGER NULL ,
location_id INTEGER NULL ,
care_site_source_value VARCHAR(50) NULL ,
place_of_service_source_value VARCHAR(50) NULL
)
;
CREATE TABLE provider
(
provider_id INTEGER NOT NULL ,
provider_name VARCHAR(255) NULL ,
NPI VARCHAR(20) NULL ,
DEA VARCHAR(20) NULL ,
specialty_concept_id INTEGER NULL ,
care_site_id INTEGER NULL ,
year_of_birth INTEGER NULL ,
gender_concept_id INTEGER NULL ,
provider_source_value VARCHAR(50) NULL ,
specialty_source_value VARCHAR(50) NULL ,
specialty_source_concept_id INTEGER NULL ,
gender_source_value VARCHAR(50) NULL ,
gender_source_concept_id INTEGER NULL
)
CREATE TABLE provider
(
provider_id INTEGER NOT NULL ,
provider_name VARCHAR(255) NULL ,
NPI VARCHAR(20) NULL ,
DEA VARCHAR(20) NULL ,
specialty_concept_id INTEGER NULL ,
care_site_id INTEGER NULL ,
year_of_birth INTEGER NULL ,
gender_concept_id INTEGER NULL ,
provider_source_value VARCHAR(50) NULL ,
specialty_source_value VARCHAR(50) NULL ,
specialty_source_concept_id INTEGER NULL ,
gender_source_value VARCHAR(50) NULL ,
gender_source_concept_id INTEGER NULL
)
;
@ -539,16 +549,16 @@ Standardized health economics
************************/
CREATE TABLE payer_plan_period
(
payer_plan_period_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
payer_plan_period_start_date DATE NOT NULL ,
payer_plan_period_end_date DATE NOT NULL ,
payer_source_value VARCHAR (50) NULL ,
plan_source_value VARCHAR (50) NULL ,
family_source_value VARCHAR (50) NULL
)
CREATE TABLE payer_plan_period
(
payer_plan_period_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
payer_plan_period_start_date DATE NOT NULL ,
payer_plan_period_end_date DATE NOT NULL ,
payer_source_value VARCHAR (50) NULL ,
plan_source_value VARCHAR (50) NULL ,
family_source_value VARCHAR (50) NULL
)
;
@ -631,29 +641,29 @@ CREATE TABLE device_cost
*/
CREATE TABLE cost
(
cost_id INTEGER NOT NULL ,
cost_event_id INTEGER NOT NULL ,
cost_domain_id VARCHAR(20) NOT NULL ,
cost_type_concept_id INTEGER NOT NULL ,
currency_concept_id INTEGER NULL ,
total_charge FLOAT NULL ,
total_cost FLOAT NULL ,
total_paid FLOAT NULL ,
paid_by_payer FLOAT NULL ,
paid_by_patient FLOAT NULL ,
paid_patient_copay FLOAT NULL ,
paid_patient_coinsurance FLOAT NULL ,
paid_patient_deductible FLOAT NULL ,
paid_by_primary FLOAT NULL ,
paid_ingredient_cost FLOAT NULL ,
paid_dispensing_fee FLOAT NULL ,
payer_plan_period_id INTEGER NULL ,
amount_allowed FLOAT NULL ,
revenue_code_concept_id INTEGER NULL ,
reveue_code_source_value VARCHAR(50) NULL
)
CREATE TABLE cost
(
cost_id INTEGER NOT NULL ,
cost_event_id INTEGER NOT NULL ,
cost_domain_id VARCHAR(20) NOT NULL ,
cost_type_concept_id INTEGER NOT NULL ,
currency_concept_id INTEGER NULL ,
total_charge FLOAT NULL ,
total_cost FLOAT NULL ,
total_paid FLOAT NULL ,
paid_by_payer FLOAT NULL ,
paid_by_patient FLOAT NULL ,
paid_patient_copay FLOAT NULL ,
paid_patient_coinsurance FLOAT NULL ,
paid_patient_deductible FLOAT NULL ,
paid_by_primary FLOAT NULL ,
paid_ingredient_cost FLOAT NULL ,
paid_dispensing_fee FLOAT NULL ,
payer_plan_period_id INTEGER NULL ,
amount_allowed FLOAT NULL ,
revenue_code_concept_id INTEGER NULL ,
reveue_code_source_value VARCHAR(50) NULL
)
;
@ -666,68 +676,68 @@ Standardized derived elements
************************/
CREATE TABLE cohort
(
cohort_definition_id INTEGER NOT NULL ,
subject_id INTEGER NOT NULL ,
cohort_start_date DATE NOT NULL ,
cohort_end_date DATE NOT NULL
)
CREATE TABLE cohort
(
cohort_definition_id INTEGER NOT NULL ,
subject_id INTEGER NOT NULL ,
cohort_start_date DATE NOT NULL ,
cohort_end_date DATE NOT NULL
)
;
CREATE TABLE cohort_attribute
(
cohort_definition_id INTEGER NOT NULL ,
cohort_start_date DATE NOT NULL ,
cohort_end_date DATE NOT NULL ,
subject_id INTEGER NOT NULL ,
attribute_definition_id INTEGER NOT NULL ,
value_as_number FLOAT NULL ,
value_as_concept_id INTEGER NULL
)
CREATE TABLE cohort_attribute
(
cohort_definition_id INTEGER NOT NULL ,
cohort_start_date DATE NOT NULL ,
cohort_end_date DATE NOT NULL ,
subject_id INTEGER NOT NULL ,
attribute_definition_id INTEGER NOT NULL ,
value_as_number FLOAT NULL ,
value_as_concept_id INTEGER NULL
)
;
CREATE TABLE drug_era
(
drug_era_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
drug_concept_id INTEGER NOT NULL ,
drug_era_start_date DATE NOT NULL ,
drug_era_end_date DATE NOT NULL ,
drug_exposure_count INTEGER NULL ,
gap_days INTEGER NULL
)
CREATE TABLE drug_era
(
drug_era_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
drug_concept_id INTEGER NOT NULL ,
drug_era_start_date DATE NOT NULL ,
drug_era_end_date DATE NOT NULL ,
drug_exposure_count INTEGER NULL ,
gap_days INTEGER NULL
)
;
CREATE TABLE dose_era
(
dose_era_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
drug_concept_id INTEGER NOT NULL ,
unit_concept_id INTEGER NOT NULL ,
dose_value FLOAT NOT NULL ,
dose_era_start_date DATE NOT NULL ,
dose_era_end_date DATE NOT NULL
)
CREATE TABLE dose_era
(
dose_era_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
drug_concept_id INTEGER NOT NULL ,
unit_concept_id INTEGER NOT NULL ,
dose_value FLOAT NOT NULL ,
dose_era_start_date DATE NOT NULL ,
dose_era_end_date DATE NOT NULL
)
;
CREATE TABLE condition_era
(
condition_era_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
condition_concept_id INTEGER NOT NULL ,
condition_era_start_date DATE NOT NULL ,
condition_era_end_date DATE NOT NULL ,
condition_occurrence_count INTEGER NULL
)
CREATE TABLE condition_era
(
condition_era_id INTEGER NOT NULL ,
person_id INTEGER NOT NULL ,
condition_concept_id INTEGER NOT NULL ,
condition_era_start_date DATE NOT NULL ,
condition_era_end_date DATE NOT NULL ,
condition_occurrence_count INTEGER NULL
)
;

View File

@ -1,60 +0,0 @@
Conversion from CDM v4 to CDM v5
==============================================================
The scripts in this directory will aid you in moving your data from the Common Data Model (CDM) version 4 to version 5.
Overview
==============================================================
The resources in this folder provide you with a means for converting your CDM V4 database to CDM V5. The goal of these scripts is to provide a path for converting your data to the CDM V5 to take advantage of the other tools that are being built to support research on CDM V5. These scripts are **NOT** designed to replace a proper ETL from your source data to CDM V5.
One of the most important aspects to this conversion script is the use of the **[Standarized Vocabularies](http://www.ohdsi.org/web/wiki/doku.php?id=documentation:vocabulary:introduction "Standarized Vocabularies")** to map from tables in the V4 database to their cooresponding V5 table using the vocabulary **[domains](http://www.ohdsi.org/web/wiki/doku.php?id=documentation:vocabulary:domains "domains")**. At the beginning of the conversion script, we create a #concept\_map temporary table which holds a mapping from source_concept_id's to standard target_concept_ids for each of the domains. This table is then used throughout the remainder of the script to map rows from each of the source V4 tables (i.e. condition\_occurrence) to the proper table in the V5 data model. As a result, the number of rows in the V4 condition\_occurrence will not be the same as in V5 since some rows may be converted to a different table based on the standard concept mapping.
Assumptions
==============================================================
We have created a directory per Relational Database Management System (RDBMS) that contains the conversion script for that database platform. All of the scripts have the same assumptions:
1. Your source CDM V4 database is on the same sever as your target CDM v5 database.
2. You have read rights to the CDM V4 database and database owner privileges on the target V5 database as this script will create an "ETL_WARNINGS" table in the process.
3. You have enough storage on your database server to perform the conversion.
Usage
=====
1. **Create your V5 Target Database:** Create a CDM V5 database on the same server as your CDM V4 database by using the **[Common Data Model Scripts](https://github.com/OHDSI/CommonDataModel "Common Data Model Scripts")** for your RDBMS. **NOTE: Please review the data types that exist on your V4 database and ensure you carry forward any data type changes from V4 to V5. For example, if you converted columns from an INT to a BIGINT to accommodate tables with > 2.1 Billion Rows, you will need to make the corresponding changes in your V5 Database and potentially to this conversion script**
2. **Load the V5 Vocabulary**: Download the V5 vocabulary from **[Athena](http://www.ohdsi.org/web/athena/ "Athena")** and load them into the V5 database created in step 1 above.
2. **Download the conversion script:** The **[CDM V4 to V5 Conversion](https://github.com/OHDSI/CommonDataModel/tree/master/Version4%20To%20Version5%20Conversion "CDM V4 to V5 Conversion Directory")** folder has subfolders with scripts that will work on each RDBMS. In order to make this file work in your environment, you will need to perform a global "FIND AND REPLACE" on the conversion script to fill in the file with values that pertain to your environment. The following are the tokens you should use when doing your "FIND AND REPLACE" operation:
* [SOURCE_CDMV4] - Your V4 database name
* [SOURCE_CDMV4].[SCHEMA] - Your V4 database name + schema
* [TARGET_CDMV5] - Your V5 database name
* [TARGET_CDMV5].[SCHEMA] - Your V5 database name + schema
3. Run the resulting script on your target RDBDMS. ** **NOTE** ** If you are running the Oracle script via Sql Developer or similar, you may need to alter the script to include the appropriate "/" symbols to mark the end of the anonymous code blocks. This has been done in the Oracle script that has been provided in this repository.
4. At the end of the conversion process, several tables will be produced that will help you to understand how your data has changed as a result of the conversion process. This is described in the Quality Assurance section below.
Quality Assurance
===================
At the end of the conversion script, there are 3 queries which will provide information on the conversion process. For reference, this section of the conversion script has a header comment:
/**** QUALITY ASSURANCE OUTPUT ****/
The first query provides a means for comparing the table row counts between the V4 and V5 databases. As mentioned in the overview section above, table row counts will differ between V4 and V5 based on the way that the standard vocabulary maps the data. The next set of queries will help to tie out the row count changes in these tables.
The second query shows the source V4 table (i.e condition\_occurrence) and how the row counts maps to the V5 domain. This table is useful to understand how the data from the V4 source was distributed into the V5 tables. As a note, 1 record in the V4 table could map to multiple records in V5 as some concepts will map to multiple standard domains.
The third query uses the information from the second query and provides a summary for each V5 domain. This is useful for tying out the rowcounts we'd expect from the script with the actual results observed in the first query.
We have included a spreadsheet called "QA-Results.xlsx" which provides an example of how to utilize these 3 result queries to understand the results of the conversion process. The results of the first query go into the "Rowcounts" worksheet. The results of the second and third queries go into the "Classification Map Results" worksheet. If the conversion process worked as expected, all of the "Difference" columns should equal 0 in the "Classification Map Results" worksheet.
Getting Involved
==============================================================
Each script found in the RDBMS directory was generated from the OHDSI-SQL file: *OMOP CDMv4 to CDMv5 - OHDSI-SQL.sql* found in the root of this directory. If you would like to contribute to this script, we'd suggest you modify this script and use **[SqlRender](https://github.com/OHDSI/SqlRender "SqlRender")** to re-generate the specific RDBMS scripts. We have also supplied a basic R script in this directory to help re-generate the scripts using SqlRender.
Developer questions/comments/feedback: OHDSI Forum
We use the GitHub issue tracker for all bugs/issues/enhancements

View File

@ -1,310 +0,0 @@
/*********************************************************************************
# Copyright 2015 Observational Health Data Sciences and Informatics
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.4
********************************************************************************/
/************************
####### # # ####### ###### ##### ###### # # ## #######
# # ## ## # # # # # # # # ## ## # # # # #
# # # # # # # # # # # # # # # # # # # # # #
# # # # # # # ###### # # # # # # # # ####### #######
# # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # #
####### # # ####### # ##### ###### # # ## # # #####
script to create OMOP common data model, version 5.0 for Oracle database
last revised: 1 Mar 2015
author: Christian Reich
*************************/
CREATE TABLE concept
(
concept_id INTEGER NOT NULL,
concept_name VARCHAR2(256 BYTE) NOT NULL,
concept_level NUMBER NOT NULL,
concept_class VARCHAR2(60 BYTE) NOT NULL,
vocabulary_id INTEGER NOT NULL,
concept_code VARCHAR2(40 BYTE) NOT NULL,
valid_start_date DATE NOT NULL,
valid_end_date DATE DEFAULT '31-Dec-2099' NOT NULL,
invalid_reason CHAR(1 BYTE)
) NOLOGGING;
COMMENT ON TABLE concept IS 'A list of all valid terminology concepts across domains and their attributes. Concepts are derived from existing standards.';
COMMENT ON COLUMN concept.concept_id IS 'A system-generated identifier to uniquely identify each concept across all concept types.';
COMMENT ON COLUMN concept.concept_name IS 'An unambiguous, meaningful and descriptive name for the concept.';
COMMENT ON COLUMN concept.concept_level IS 'The level of hierarchy associated with the concept. Different concept levels are assigned to concepts to depict their seniority in a clearly defined hierarchy, such as drugs, conditions, etc. A concept level of 0 is assigned to concepts that are not part of a standard vocabulary, but are part of the vocabulary for reference purposes (e.g. drug form).';
COMMENT ON COLUMN concept.concept_class IS 'The category or class of the concept along both the hierarchical tree as well as different domains within a vocabulary. Examples are ''Clinical Drug'', ''Ingredient'', ''Clinical Finding'' etc.';
COMMENT ON COLUMN concept.vocabulary_id IS 'A foreign key to the vocabulary table indicating from which source the concept has been adapted.';
COMMENT ON COLUMN concept.concept_code IS 'The concept code represents the identifier of the concept in the source data it originates from, such as SNOMED-CT concept IDs, RxNorm RXCUIs etc. Note that concept codes are not unique across vocabularies.';
COMMENT ON COLUMN concept.valid_start_date IS 'The date when the was first recorded.';
COMMENT ON COLUMN concept.valid_end_date IS 'The date when the concept became invalid because it was deleted or superseded (updated) by a new concept. The default value is 31-Dec-2099.';
COMMENT ON COLUMN concept.invalid_reason IS 'Concepts that are replaced with a new concept are designated "Updated" (U) and concepts that are removed without replacement are "Deprecated" (D).';
CREATE INDEX concept_code ON concept (concept_code, vocabulary_id);
CREATE UNIQUE INDEX XPKconcept ON concept (concept_id);
ALTER TABLE concept ADD (
CHECK ( invalid_reason IN ('D', 'U'))
ENABLE VALIDATE,
CONSTRAINT XPKCONCEPT
PRIMARY KEY
(concept_id)
USING INDEX XPKCONCEPT
ENABLE VALIDATE);
--add table RELATIONSHIP
CREATE TABLE relationship
(
relationship_id INTEGER NOT NULL,
relationship_name VARCHAR2(256 BYTE) NOT NULL,
is_hierarchical INTEGER NOT NULL,
defines_ancestry INTEGER DEFAULT 1 NOT NULL,
reverse_relationship INTEGER
) NOLOGGING;
COMMENT ON TABLE relationship IS 'A list of relationship between concepts. Some of these relationships are generic (e.g. "Subsumes" relationship), others are domain-specific.';
COMMENT ON COLUMN relationship.relationship_id IS 'The type of relationship captured by the relationship record.';
COMMENT ON COLUMN relationship.relationship_name IS 'The text that describes the relationship type.';
COMMENT ON COLUMN relationship.is_hierarchical IS 'Defines whether a relationship defines concepts into classes or hierarchies. Values are Y for hierarchical relationship or NULL if not';
COMMENT ON COLUMN relationship.defines_ancestry IS 'Defines whether a hierarchical relationship contributes to the concept_ancestor table. These are subsets of the hierarchical relationships. Valid values are Y or NULL.';
COMMENT ON COLUMN relationship.reverse_relationship IS 'relationship ID of the reverse relationship to this one. Corresponding records of reverse relationships have their concept_id_1 and concept_id_2 swapped.';
CREATE UNIQUE INDEX XPKRELATIONHIP_TYPE ON relationship
(relationship_id);
ALTER TABLE relationship ADD (
CONSTRAINT xpkrelationship_type
PRIMARY KEY
(relationship_id)
USING INDEX xpkrelationship_type
ENABLE VALIDATE);
--add table concept_relationship
CREATE TABLE concept_relationship
(
concept_id_1 INTEGER NOT NULL,
concept_id_2 INTEGER NOT NULL,
relationship_id INTEGER NOT NULL,
valid_start_date DATE NOT NULL,
valid_end_date DATE DEFAULT '31-Dec-2099' NOT NULL,
invalid_reason CHAR(1 BYTE)
) NOLOGGING;
COMMENT ON TABLE concept_relationship IS 'A list of relationship between concepts. Some of these relationships are generic (e.g. ''Subsumes'' relationship), others are domain-specific.';
COMMENT ON COLUMN concept_relationship.concept_id_1 IS 'A foreign key to the concept in the concept table associated with the relationship. relationships are directional, and this field represents the source concept designation.';
COMMENT ON COLUMN concept_relationship.concept_id_2 IS 'A foreign key to the concept in the concept table associated with the relationship. relationships are directional, and this field represents the destination concept designation.';
COMMENT ON COLUMN concept_relationship.relationship_id IS 'The type of relationship as defined in the relationship table.';
COMMENT ON COLUMN concept_relationship.valid_start_date IS 'The date when the the relationship was first recorded.';
COMMENT ON COLUMN concept_relationship.valid_end_date IS 'The date when the relationship became invalid because it was deleted or superseded (updated) by a new relationship. Default value is 31-Dec-2099.';
COMMENT ON COLUMN concept_relationship.invalid_reason IS 'Reason the relationship was invalidated. Possible values are D (deleted), U (replaced with an update) or NULL when valid_end_date has the default value.';
CREATE UNIQUE INDEX xpkconcept_relationship ON concept_relationship
(concept_id_1, concept_id_2, relationship_id);
ALTER TABLE concept_relationship ADD (
CHECK ( invalid_reason IN ('D', 'U'))
ENABLE VALIDATE,
CHECK ( invalid_reason IN ('D', 'U'))
ENABLE VALIDATE,
CHECK (invalid_reason in ('D', 'U'))
ENABLE VALIDATE,
CONSTRAINT xpkconcept_relationship
PRIMARY KEY
(concept_id_1, concept_id_2, relationship_id)
USING INDEX xpkconcept_relationship
ENABLE VALIDATE);
ALTER TABLE concept_relationship ADD (
CONSTRAINT concept_REL_CHILD_FK
FOREIGN KEY (concept_id_2)
REFERENCES concept (concept_id)
ENABLE VALIDATE,
CONSTRAINT concept_REL_PARENT_FK
FOREIGN KEY (concept_id_1)
REFERENCES concept (concept_id)
ENABLE VALIDATE,
CONSTRAINT concept_REL_REL_type_FK
FOREIGN KEY (relationship_id)
REFERENCES relationship (relationship_id)
ENABLE VALIDATE);
--add table concept_ancestor
CREATE TABLE concept_ancestor
(
ancestor_concept_id INTEGER NOT NULL,
descendant_concept_id INTEGER NOT NULL,
max_levels_of_separation NUMBER,
min_levels_of_separation NUMBER
) NOLOGGING;
COMMENT ON TABLE concept_ancestor IS 'A specialized table containing only hierarchical relationship between concepts that may span several generations.';
COMMENT ON COLUMN concept_ancestor.ancestor_concept_id IS 'A foreign key to the concept code in the concept table for the higher-level concept that forms the ancestor in the relationship.';
COMMENT ON COLUMN concept_ancestor.descendant_concept_id IS 'A foreign key to the concept code in the concept table for the lower-level concept that forms the descendant in the relationship.';
COMMENT ON COLUMN concept_ancestor.max_levels_of_separation IS 'The maximum separation in number of levels of hierarchy between ancestor and descendant concepts. This is an optional attribute that is used to simplify hierarchic analysis. ';
COMMENT ON COLUMN concept_ancestor.min_levels_of_separation IS 'The minimum separation in number of levels of hierarchy between ancestor and descendant concepts. This is an optional attribute that is used to simplify hierarchic analysis.';
CREATE UNIQUE INDEX xpkconcept_ancestor ON concept_ancestor
(ancestor_concept_id, descendant_concept_id);
ALTER TABLE concept_ancestor ADD (
CONSTRAINT xpkconcept_ancestor
PRIMARY KEY
(ancestor_concept_id, descendant_concept_id)
USING INDEX xpkconcept_ancestor
ENABLE VALIDATE);
ALTER TABLE concept_ancestor ADD (
CONSTRAINT concept_ancestor_FK
FOREIGN KEY (ancestor_concept_id)
REFERENCES concept (concept_id)
ENABLE VALIDATE,
CONSTRAINT concept_descendant_FK
FOREIGN KEY (descendant_concept_id)
REFERENCES concept (concept_id)
ENABLE VALIDATE);
--add table concept_synonym
CREATE TABLE concept_synonym
(
concept_synonym_id INTEGER NOT NULL,
concept_id INTEGER NOT NULL,
concept_synonym_name VARCHAR2(1000 BYTE) NOT NULL
) NOLOGGING;
COMMENT ON TABLE concept_synonym IS 'A table with synonyms for concepts that have more than one valid name or description.';
COMMENT ON COLUMN concept_synonym.concept_synonym_id IS 'A system-generated unique identifier for each concept synonym.';
COMMENT ON COLUMN concept_synonym.concept_id IS 'A foreign key to the concept in the concept table. ';
COMMENT ON COLUMN concept_synonym.concept_synonym_name IS 'The alternative name for the concept.';
CREATE UNIQUE INDEX xpkconcept_synonym ON concept_synonym
(concept_synonym_id);
ALTER TABLE concept_synonym ADD (
CONSTRAINT xpkconcept_synonym
PRIMARY KEY
(concept_synonym_id)
USING INDEX xpkconcept_synonym
ENABLE VALIDATE);
ALTER TABLE concept_synonym ADD (
CONSTRAINT concept_synonym_concept_FK
FOREIGN KEY (concept_id)
REFERENCES concept (concept_id)
ENABLE VALIDATE);
--add table source_to_concept_map
CREATE TABLE source_to_concept_map
(
source_code VARCHAR2(40 BYTE) NOT NULL,
source_vocabulary_id INTEGER NOT NULL,
source_code_description VARCHAR2(256 BYTE),
target_concept_id INTEGER NOT NULL,
target_vocabulary_id INTEGER NOT NULL,
mapping_type VARCHAR2(256 BYTE),
primary_map CHAR(1 BYTE),
valid_start_date DATE NOT NULL,
valid_end_date DATE NOT NULL,
invalid_reason CHAR(1 BYTE)
) NOLOGGING;
CREATE INDEX SOURCE_TO_concept_SOURCE_idX ON source_to_concept_map
(SOURCE_CODE);
CREATE UNIQUE INDEX xpksource_to_concept_map ON source_to_concept_map
(SOURCE_vocabulary_id, TARGET_concept_id, SOURCE_CODE, valid_end_date);
ALTER TABLE source_to_concept_map ADD (
CHECK (primary_map in ('Y'))
ENABLE VALIDATE,
CHECK (invalid_reason in ('D', 'U'))
ENABLE VALIDATE,
CONSTRAINT xpksource_to_concept_map
PRIMARY KEY
(SOURCE_vocabulary_id, TARGET_concept_id, SOURCE_CODE, valid_end_date)
USING INDEX xpksource_to_concept_map
ENABLE VALIDATE);
ALTER TABLE source_to_concept_map ADD (
CONSTRAINT SOURCE_TO_concept_concept
FOREIGN KEY (TARGET_concept_id)
REFERENCES concept (concept_id)
ENABLE VALIDATE);
--add table drug_strength
CREATE TABLE drug_strength
(
drug_concept_id INTEGER NOT NULL,
ingredient_concept_id INTEGER NOT NULL,
amount_value NUMBER,
amount_unit VARCHAR2 (60 BYTE),
concentration_value NUMBER,
concentration_enum_unit VARCHAR2 (60 BYTE),
concentration_denom_unit VARCHAR2 (60 BYTE),
valid_start_date DATE NOT NULL,
valid_end_date DATE NOT NULL,
invalid_reason VARCHAR2 (1 BYTE)
);
--add table vocabulary
CREATE TABLE VOCABULARY
(
VOCABULARY_ID INTEGER NOT NULL,
VOCABULARY_NAME VARCHAR2 (256 BYTE) NOT NULL
);

View File

@ -1,14 +0,0 @@
Common-Data-Model / Oracle
=================
This folder contains the SQL scripts for Oracle.
In order to create your instantiation of the Common Data Model, we recommend following these steps:
1. Create an empty schema.
2. Execute the script `CDM V4 ddl.sql` to create the tables and fields.
3. Load your data into the schema using the loading scripts in VocabImport
Note: you could also apply the constraints and the indexes after loading the data, this will speed up the insertion of the data considerably.

View File

@ -1,51 +0,0 @@
/*********************************************************************************
# Copyright 2015 Observational Health Data Sciences and Informatics
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.4
********************************************************************************/
/************************
####### # # ####### ###### ##### ###### # # ## #######
# # ## ## # # # # # # # # ## ## # # # # #
# # # # # # # # # # # # # # # # # # # # # #
# # # # # # # ###### # # # # # # # # ####### #######
# # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # #
####### # # ####### # ##### ###### # # ## # # #####
script to load the Vocabulary related tables in the OMOP common data model, version 4.5 for Oracle database
last revised: 19 Mar 2015
author: Lee Evans
Notes
1) This script assumes the CDM version 4.5 vocabulary zip file has been unzipped into the "C:\CDM" directory.
2) If you unzipped your CDM version 4.5 vocabulary files into a different directory then replace all file paths below, with your directory path.
3) If you have existing data in your CDM vocabulary tables then backup that data (if needed) and truncate those tables before loading
*************************/
sqlldr CDM/<password> CONTROL=CONCEPT.ctl LOG=C:\CDM\CONCEPT.log BAD=C:\CDM\CONCEPT.bad
sqlldr CDM/<password> CONTROL=CONCEPT_ANCESTOR.ctl LOG=C:\CDM\CONCEPT_ANCESTOR.log BAD=C:\CDM\CONCEPT_ANCESTOR.bad
sqlldr CDM/<password> CONTROL=CONCEPT_RELATIONSHIP.ctl LOG=C:\CDM\CONCEPT_RELATIONSHIP.log BAD=C:\CDM\CONCEPT_RELATIONSHIP.bad
sqlldr CDM/<password> CONTROL=CONCEPT_SYNONYM.ctl LOG=C:\CDM\CONCEPT_SYNONYM.log BAD=C:\CDM\CONCEPT_SYNONYM.bad
sqlldr CDM/<password> CONTROL=DRUG_STRENGTH.ctl LOG=C:\CDM\DRUG_STRENGTH.log BAD=C:\CDM\DRUG_STRENGTH.bad
sqlldr CDM/<password> CONTROL=RELATIONSHIP.ctl LOG=C:\CDM\RELATIONSHIP.log BAD=C:\CDM\RELATIONSHIP.bad
sqlldr CDM/<password> CONTROL=VOCABULARY.ctl LOG=C:\CDM\VOCABULARY.log BAD=C:\CDM\VOCABULARY.bad
sqlldr CDM/<password> CONTROL=SOURCE_TO_CONCEPT_MAP.ctl LOG=C:\CDM\SOURCE_TO_CONCEPT_MAP.log BAD=C:\CDM\SOURCE_TO_CONCEPT_MAP.bad

View File

@ -1,18 +0,0 @@
options (skip=1)
load data
infile concept.csv
into table concept
replace
fields terminated by '\t'
trailing nullcols
(
concept_id,
concept_name CHAR(256),
concept_level,
concept_class,
vocabulary_id,
concept_code,
valid_start_date DATE 'YYYYMMDD',
valid_end_date DATE 'YYYYMMDD',
invalid_reason
)

View File

@ -1,13 +0,0 @@
options (skip=1)
load data
infile concept_ancestor.csv
into table concept_ancestor
replace
fields terminated by '\t'
trailing nullcols
(
ancestor_concept_id,
descendant_concept_id,
min_levels_of_separation,
max_levels_of_separation
)

View File

@ -1,15 +0,0 @@
options (skip=1)
load data
infile concept_relationship.csv
into table concept_relationship
replace
fields terminated by '\t'
trailing nullcols
(
concept_id_1,
concept_id_2,
relationship_id,
valid_start_date DATE 'YYYYMMDD',
valid_end_date DATE 'YYYYMMDD',
invalid_reason
)

View File

@ -1,12 +0,0 @@
options (skip=1)
load data
infile concept_synonym.csv
into table concept_synonym
replace
fields terminated by '\t'
trailing nullcols
(
concept_synonym_id,
concept_id,
concept_synonym_name CHAR(1000)
)

View File

@ -1,19 +0,0 @@
options (skip=1)
load data
infile drug_strength.csv
into table drug_strength
replace
fields terminated by '\t'
trailing nullcols
(
drug_concept_id,
ingredient_concept_id,
amount_value,
amount_unit,
concentration_value,
concentration_enum_unit,
concentration_denom_unit,
valid_start_date DATE 'YYYYMMDD',
valid_end_date DATE 'YYYYMMDD',
invalid_reason
)

View File

@ -1,14 +0,0 @@
options (skip=1)
load data
infile relationship.csv
into table relationship
replace
fields terminated by '\t'
trailing nullcols
(
relationship_id,
relationship_name,
is_hierarchical,
defines_ancestry,
reverse_relationship
)

View File

@ -1,19 +0,0 @@
options (skip=1)
load data
infile source_to_concept_map.csv
into table source_to_concept_map
replace
fields terminated by '\t'
trailing nullcols
(
source_code,
source_vocabulary_id,
source_code_description CHAR(256),
target_concept_id,
target_vocabulary_id,
mapping_type,
primary_map,
valid_start_date DATE 'YYYYMMDD',
valid_end_date DATE 'YYYYMMDD',
invalid_reason
)

View File

@ -1,11 +0,0 @@
options (skip=1)
load data
infile vocabulary.csv
into table vocabulary
replace
fields terminated by '\t'
trailing nullcols
(
vocabulary_id,
vocabulary_name
)

View File

@ -1,131 +0,0 @@
/*********************************************************************************
# Copyright 2015 Observational Health Data Sciences and Informatics
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.4
********************************************************************************/
/************************
####### # # ####### ###### ##### ###### # # ## #######
# # ## ## # # # # # # # # ## ## # # # # #
# # # # # # # # # # # # # # # # # # # # # #
# # # # # # # ###### # # # # # # # # ####### #######
# # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # #
####### # # ####### # ##### ###### # # ## # # #####
script to create OMOP common data model, version 4.5 for PostgreSQL database
last revised: 20 Mar 2015
author: Lee Evans
*************************/
CREATE TABLE concept (
concept_id integer NOT NULL,
concept_name varchar(256) NOT NULL,
concept_level integer NOT NULL,
concept_class varchar(60) NOT NULL,
vocabulary_id integer NOT NULL,
concept_code varchar(40) NOT NULL,
valid_start_date date NOT NULL,
valid_end_date date NOT NULL DEFAULT '2099-12-31'::date,
invalid_reason varchar(1)
)
;
CREATE TABLE concept_ancestor (
ancestor_concept_id INTEGER NOT NULL,
descendant_concept_id INTEGER NOT NULL,
min_levels_of_separation INTEGER NOT NULL,
max_levels_of_separation INTEGER NOT NULL
)
;
CREATE TABLE concept_relationship (
concept_id_1 integer NOT NULL,
concept_id_2 integer NOT NULL,
relationship_id integer NOT NULL,
valid_start_date date NOT NULL,
valid_end_date date NOT NULL DEFAULT '2099-12-31'::date,
invalid_reason varchar(1)
)
;
CREATE TABLE concept_synonym (
concept_synonym_id integer NOT NULL,
concept_id integer NOT NULL,
concept_synonym_name varchar(1000) NOT NULL
)
;
CREATE TABLE drug_strength (
drug_concept_id integer NOT NULL,
ingredient_concept_id integer NOT NULL,
amount_value double precision,
amount_unit varchar(60),
concentration_value double precision,
concentration_enum_unit character varying(60),
concentration_denom_unit character varying(60),
valid_start_date date NOT NULL,
valid_end_date date NOT NULL,
invalid_reason varchar(1)
)
;
CREATE TABLE relationship (
relationship_id VARCHAR(20) NOT NULL,
relationship_name VARCHAR(256) NOT NULL,
is_hierarchical integer NOT NULL,
defines_ancestry integer DEFAULT 1,
reverse_relationship integer
)
;
CREATE TABLE source_to_concept_map (
source_code VARCHAR(40) NOT NULL,
source_vocabulary_id INTEGER NOT NULL,
source_code_description VARCHAR(256),
target_concept_id INTEGER NOT NULL,
target_vocabulary_id INTEGER NOT NULL,
mapping_type VARCHAR(256),
primary_map VARCHAR(1),
valid_start_date DATE NOT NULL,
valid_end_date DATE NOT NULL,
invalid_reason VARCHAR(1) NULL
)
;
CREATE TABLE vocabulary (
vocabulary_id integer NOT NULL,
vocabulary_name VARCHAR(256) NOT NULL
)
;

View File

@ -1,13 +0,0 @@
Common-Data-Model / PostgreSQL
=================
This folder contains the SQL scripts for PostgreSQL.
In order to create your instantiation of the Common Data Model, we recommend following these steps:
1. Create an empty schema.
2. Execute the script `CDM V4 ddl.sql` to create the tables and fields.
3. Load your data into the schema.

View File

@ -1,54 +0,0 @@
/*********************************************************************************
# Copyright 2015 Observational Health Data Sciences and Informatics
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
********************************************************************************/
/************************
####### # # ####### ###### ##### ###### # # #######
# # ## ## # # # # # # # # ## ## # # #
# # # # # # # # # # # # # # # # # # # #
# # # # # # # ###### # # # # # # # # ######
# # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # #
####### # # ####### # ##### ###### # # ## #####
Script to load the common data model, version 4.5 vocabulary tables for PostgreSQL database
Notes
1) This script assumes the CDM version 4.5 vocabulary zip file has been unzipped into the "C:\CDM" directory.
2) If you unzipped your CDM version 4.5 vocabulary files into a different directory then replace all file paths below, with your directory path.
3) Run this SQL query script in the database where you created your CDM Version 4.5 tables
last revised: 20th March 2015
author: Lee Evans
*************************/
COPY CONCEPT FROM 'C:\CDM\CONCEPT.csv' WITH DELIMITER E'\t' CSV HEADER QUOTE E'\b' ;
COPY CONCEPT_ANCESTOR FROM 'C:\CDM\CONCEPT_ANCESTOR.csv' WITH DELIMITER E'\t' CSV HEADER QUOTE E'\b' ;
COPY CONCEPT_RELATIONSHIP FROM 'C:\CDM\CONCEPT_RELATIONSHIP.csv' WITH DELIMITER E'\t' CSV HEADER QUOTE E'\b' ;
COPY CONCEPT_SYNONYM FROM 'C:\CDM\CONCEPT_SYNONYM.csv' WITH DELIMITER E'\t' CSV HEADER QUOTE E'\b' ;
COPY DRUG_STRENGTH FROM 'C:\CDM\DRUG_STRENGTH.csv' WITH DELIMITER E'\t' CSV HEADER QUOTE E'\b' ;
COPY RELATIONSHIP FROM 'C:\CDM\RELATIONSHIP.csv' WITH DELIMITER E'\t' CSV HEADER QUOTE E'\b' ;
COPY SOURCE_TO_CONCEPT_MAP FROM 'C:\CDM\SOURCE_TO_CONCEPT_MAP.csv' WITH DELIMITER E'\t' CSV HEADER QUOTE E'\b' ;
COPY VOCABULARY FROM 'C:\CDM\VOCABULARY.csv' WITH DELIMITER E'\t' CSV HEADER QUOTE E'\b' ;

View File

@ -1,117 +0,0 @@
/*********************************************************************************
# Copyright 2015 Observational Health Data Sciences and Informatics
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.4
********************************************************************************/
/************************
####### # # ####### ###### ##### ###### # # ## #######
# # ## ## # # # # # # # # ## ## # # # # #
# # # # # # # # # # # # # # # # # # # # # #
# # # # # # # ###### # # # # # # # # ####### #######
# # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # #
####### # # ####### # ##### ###### # # ## # # #####
script to create OMOP common data model, version 4.5 for Sql Server database
last revised: 20 Mar 2015
author: Lee Evans
*************************/
CREATE TABLE CONCEPT(
CONCEPT_ID int NOT NULL,
CONCEPT_NAME varchar(256) NOT NULL,
CONCEPT_LEVEL int NOT NULL,
CONCEPT_CLASS varchar(60) NOT NULL,
VOCABULARY_ID int NOT NULL,
CONCEPT_CODE varchar(40) NOT NULL,
VALID_START_DATE date NOT NULL,
VALID_END_DATE date NOT NULL DEFAULT ('31-Dec-2099'),
INVALID_REASON char(1) NULL
);
CREATE TABLE CONCEPT_ANCESTOR(
ANCESTOR_CONCEPT_ID int NOT NULL,
DESCENDANT_CONCEPT_ID int NOT NULL,
MAX_LEVELS_OF_SEPARATION int NULL,
MIN_LEVELS_OF_SEPARATION int NULL
);
CREATE TABLE CONCEPT_RELATIONSHIP(
CONCEPT_ID_1 int NOT NULL,
CONCEPT_ID_2 int NOT NULL,
RELATIONSHIP_ID int NOT NULL,
VALID_START_DATE date NOT NULL,
VALID_END_DATE date NOT NULL DEFAULT ('31-Dec-2099'),
INVALID_REASON char(1) NULL
);
CREATE TABLE CONCEPT_SYNONYM(
CONCEPT_SYNONYM_ID int NOT NULL,
CONCEPT_ID int NOT NULL,
CONCEPT_SYNONYM_NAME varchar(1000) NOT NULL
);
CREATE TABLE DRUG_STRENGTH(
DRUG_CONCEPT_ID int NOT NULL,
INGREDIENT_CONCEPT_ID int NOT NULL,
AMOUNT_VALUE float NULL,
AMOUNT_UNIT varchar(60) NULL,
CONCENTRATION_VALUE float NULL,
CONCENTRATION_ENUM_UNIT varchar(60) NULL,
CONCENTRATION_DENOM_UNIT varchar(60) NULL,
VALID_START_DATE date NOT NULL,
VALID_END_DATE date NOT NULL,
INVALID_REASON varchar(1) NULL
);
CREATE TABLE RELATIONSHIP(
RELATIONSHIP_ID int NOT NULL,
RELATIONSHIP_NAME varchar(256) NOT NULL,
IS_HIERARCHICAL int NOT NULL,
DEFINES_ANCESTRY int NOT NULL DEFAULT ((1)),
REVERSE_RELATIONSHIP int NULL
);
CREATE TABLE SOURCE_TO_CONCEPT_MAP(
SOURCE_CODE varchar(40) NOT NULL,
SOURCE_VOCABULARY_ID int NOT NULL,
SOURCE_CODE_DESCRIPTION varchar(256) NULL,
TARGET_CONCEPT_ID int NOT NULL,
TARGET_VOCABULARY_ID int NOT NULL,
MAPPING_TYPE varchar(256) NULL,
PRIMARY_MAP char(1) NULL,
VALID_START_DATE date NOT NULL,
VALID_END_DATE date NOT NULL DEFAULT ('31-Dec-2099'),
INVALID_REASON char(1) NULL
);
CREATE TABLE VOCABULARY(
VOCABULARY_ID int NOT NULL,
VOCABULARY_NAME varchar(256) NOT NULL
);

View File

@ -1,14 +0,0 @@
Common-Data-Model / SQL Server
=================
This folder contains the SQL scripts for SQL Server.
In order to create your instantiation of the Common Data Model, we recommend following these steps:
1. Create an empty schema.
2. Execute the script `CDM V4 ddl.sql` to create the tables and fields.
3. Load your data into the schema using the loading scripts in VocabImport
Note: you could also apply the constraints and the indexes after loading the data, this will speed up the insertion of the data considerably.

View File

@ -1,130 +0,0 @@
/*********************************************************************************
# Copyright 2015 Observational Health Data Sciences and Informatics
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
********************************************************************************/
/************************
####### # # ####### ###### ##### ###### # # #######
# # ## ## # # # # # # # # ## ## # # #
# # # # # # # # # # # # # # # # # # # #
# # # # # # # ###### # # # # # # # # ######
# # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # #
####### # # ####### # ##### ###### # # ## #####
Script to load the common data model, version 4.5 vocabulary tables for SQL Server database
Notes
1) This script assumes the CDM version 4.5 vocabulary zip file has been unzipped into the "C:\CDM" directory.
2) If you unzipped your CDM version 4.5 vocabulary files into a different directory then replace all file paths below, with your directory path.
3) Run this SQL query script in the database where you created your CDM Version 4.5 tables
last revised: 20th March 2015
author: Lee Evans
*************************/
TRUNCATE TABLE CONCEPT;
BULK INSERT CONCEPT
FROM 'C:\CDM\CONCEPT.csv'
WITH (
FIRSTROW = 2,
FIELDTERMINATOR = '\t',
ROWTERMINATOR = '0x0a',
ERRORFILE = 'C:\CDM\CONCEPT.bad',
TABLOCK
);
TRUNCATE TABLE CONCEPT_ANCESTOR;
BULK INSERT CONCEPT_ANCESTOR
FROM 'C:\CDM\CONCEPT_ANCESTOR.csv'
WITH (
FIRSTROW = 2,
FIELDTERMINATOR = '\t',
ROWTERMINATOR = '0x0a',
ERRORFILE = 'C:\CDM\CONCEPT_ANCESTOR.bad',
TABLOCK
);
TRUNCATE TABLE CONCEPT_RELATIONSHIP;
BULK INSERT CONCEPT_RELATIONSHIP
FROM 'C:\CDM\CONCEPT_RELATIONSHIP.csv'
WITH (
FIRSTROW = 2,
FIELDTERMINATOR = '\t',
ROWTERMINATOR = '0x0a',
ERRORFILE = 'C:\CDM\CONCEPT_RELATIONSHIP.bad',
TABLOCK
);
TRUNCATE TABLE CONCEPT_SYNONYM;
BULK INSERT CONCEPT_SYNONYM
FROM 'C:\CDM\CONCEPT_SYNONYM.csv'
WITH (
FIRSTROW = 2,
FIELDTERMINATOR = '\t',
ROWTERMINATOR = '0x0a',
ERRORFILE = 'C:\CDM\CONCEPT_SYNONYM.bad',
TABLOCK
);
TRUNCATE TABLE DRUG_STRENGTH;
BULK INSERT DRUG_STRENGTH
FROM 'C:\CDM\DRUG_STRENGTH.csv'
WITH (
FIRSTROW = 2,
FIELDTERMINATOR = '\t',
ROWTERMINATOR = '0x0a',
ERRORFILE = 'C:\CDM\DRUG_STRENGTH.bad',
TABLOCK
);
TRUNCATE TABLE RELATIONSHIP;
BULK INSERT RELATIONSHIP
FROM 'C:\CDM\RELATIONSHIP.csv'
WITH (
FIRSTROW = 2,
FIELDTERMINATOR = '\t',
ROWTERMINATOR = '0x0a',
ERRORFILE = 'C:\CDM\RELATIONSHIP.bad',
TABLOCK
);
TRUNCATE TABLE SOURCE_TO_CONCEPT_MAP;
BULK INSERT SOURCE_TO_CONCEPT_MAP
FROM 'C:\CDM\SOURCE_TO_CONCEPT_MAP.csv'
WITH (
FIRSTROW = 2,
FIELDTERMINATOR = '\t',
ROWTERMINATOR = '0x0a',
ERRORFILE = 'C:\CDM\SOURCE_TO_CONCEPT_MAP.bad',
TABLOCK
);
TRUNCATE TABLE VOCABULARY;
BULK INSERT VOCABULARY
FROM 'C:\CDM\VOCABULARY.csv'
WITH (
FIRSTROW = 2,
FIELDTERMINATOR = '\t',
ROWTERMINATOR = '0x0a',
ERRORFILE = 'C:\CDM\VOCABULARY.bad',
TABLOCK
);