OMOP/Impala/OMOP_CDM_ddl_Impala.sql

717 lines
17 KiB
SQL

/*********************************************************************************
# Copyright 2014-6 Observational Health Data Sciences and Informatics
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
********************************************************************************/
/************************
####### # # ####### ###### ##### ###### # # ####### #####
# # ## ## # # # # # # # # ## ## # # # # #
# # # # # # # # # # # # # # # # # # # # #
# # # # # # # ###### # # # # # # # # ###### #####
# # # # # # # # # # # # # # # ### #
# # # # # # # # # # # # # # # # # ### #
####### # # ####### # ##### ###### # # ## ##### ### #######
script to create OMOP common data model, version 5.2 for Hadoop (Hive/Impala) database
Based on the PostgreSQL version, with the following changes:
* NULL/NOT NULL is not used.
* Dates are stored as VARCHAR(8). See http://stackoverflow.com/questions/33024309/convert-yyyymmdd-string-to-date-in-impala for how to treat date columns as dates.
* PostgreSQL NUMERIC is stored as DOUBLE for amounts, and DECIMAL(19,4) for prices.
* PostgreSQL TEXT is stored as STRING.
*************************/
/************************
Standardized vocabulary
************************/
CREATE TABLE concept (
concept_id INTEGER,
concept_name VARCHAR(255),
domain_id VARCHAR(20),
vocabulary_id VARCHAR(20),
concept_class_id VARCHAR(20),
standard_concept CHAR(1),
concept_code VARCHAR(50),
valid_start_date VARCHAR(8), -- DATE
valid_end_date VARCHAR(8), -- DATE
invalid_reason CHAR(1)
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE vocabulary (
vocabulary_id VARCHAR(20),
vocabulary_name VARCHAR(255),
vocabulary_reference VARCHAR(255),
vocabulary_version VARCHAR(255),
vocabulary_concept_id INTEGER
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE domain (
domain_id VARCHAR(20),
domain_name VARCHAR(255),
domain_concept_id INTEGER
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE concept_class (
concept_class_id VARCHAR(20),
concept_class_name VARCHAR(255),
concept_class_concept_id INTEGER
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE concept_relationship (
concept_id_1 INTEGER,
concept_id_2 INTEGER,
relationship_id VARCHAR(20),
valid_start_date VARCHAR(8), -- DATE
valid_end_date VARCHAR(8), -- DATE
invalid_reason CHAR(1)
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE relationship (
relationship_id VARCHAR(20),
relationship_name VARCHAR(255),
is_hierarchical CHAR(1),
defines_ancestry CHAR(1),
reverse_relationship_id VARCHAR(20),
relationship_concept_id INTEGER
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE concept_synonym (
concept_id INTEGER,
concept_synonym_name VARCHAR(1000),
language_concept_id INTEGER
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE concept_ancestor (
ancestor_concept_id INTEGER,
descendant_concept_id INTEGER,
min_levels_of_separation INTEGER,
max_levels_of_separation INTEGER
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE source_to_concept_map (
source_code VARCHAR(50),
source_concept_id INTEGER,
source_vocabulary_id VARCHAR(20),
source_code_description VARCHAR(255),
target_concept_id INTEGER,
target_vocabulary_id VARCHAR(20),
valid_start_date VARCHAR(8), -- DATE
valid_end_date VARCHAR(8), -- DATE
invalid_reason CHAR(1)
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE drug_strength (
drug_concept_id INTEGER,
ingredient_concept_id INTEGER,
amount_value DOUBLE, -- NUMERIC
amount_unit_concept_id INTEGER,
numerator_value DOUBLE, -- NUMERIC
numerator_unit_concept_id INTEGER,
denominator_value DOUBLE, -- NUMERIC
denominator_unit_concept_id INTEGER,
box_size INTEGER,
valid_start_date VARCHAR(8), -- DATE
valid_end_date VARCHAR(8), -- DATE
invalid_reason CHAR(1)
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE cohort_definition (
cohort_definition_id INTEGER,
cohort_definition_name VARCHAR(255),
cohort_definition_description STRING, -- TEXT
definition_type_concept_id INTEGER,
cohort_definition_syntax STRING, -- TEXT
subject_concept_id INTEGER,
cohort_initiation_date VARCHAR(8) -- DATE
)
;
CREATE TABLE attribute_definition (
attribute_definition_id INTEGER,
attribute_name VARCHAR(255),
attribute_description STRING, -- TEXT
attribute_type_concept_id INTEGER,
attribute_syntax STRING -- TEXT
)
;
/**************************
Standardized meta-data
***************************/
CREATE TABLE cdm_source
(
cdm_source_name VARCHAR(255),
cdm_source_abbreviation VARCHAR(25),
cdm_holder VARCHAR(255),
source_description STRING, -- TEXT
source_documentation_reference VARCHAR(255),
cdm_etl_reference VARCHAR(255),
source_release_date VARCHAR(8), -- DATE
cdm_release_date VARCHAR(8), -- DATE
cdm_version VARCHAR(10),
vocabulary_version VARCHAR(20)
)
;
/************************
Standardized clinical data
************************/
CREATE TABLE person (
person_id INTEGER,
gender_concept_id INTEGER,
year_of_birth INTEGER,
month_of_birth INTEGER,
day_of_birth INTEGER,
birth_datetime VARCHAR(10),
race_concept_id INTEGER,
ethnicity_concept_id INTEGER,
location_id INTEGER,
provider_id INTEGER,
care_site_id INTEGER,
person_source_value VARCHAR(50),
gender_source_value VARCHAR(50),
gender_source_concept_id INTEGER,
race_source_value VARCHAR(50),
race_source_concept_id INTEGER,
ethnicity_source_value VARCHAR(50),
ethnicity_source_concept_id INTEGER
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE observation_period (
observation_period_id INTEGER,
person_id INTEGER,
observation_period_start_date VARCHAR(8), -- DATE
observation_period_end_date VARCHAR(8), -- DATE
period_type_concept_id INTEGER
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE specimen (
specimen_id INTEGER,
person_id INTEGER,
specimen_concept_id INTEGER,
specimen_type_concept_id INTEGER,
specimen_date VARCHAR(8), -- DATE
specimen_datetime VARCHAR(10),
quantity DOUBLE, -- NUMERIC
unit_concept_id INTEGER,
anatomic_site_concept_id INTEGER,
disease_status_concept_id INTEGER,
specimen_source_id VARCHAR(50),
specimen_source_value VARCHAR(50),
unit_source_value VARCHAR(50),
anatomic_site_source_value VARCHAR(50),
disease_status_source_value VARCHAR(50)
)
;
CREATE TABLE death (
person_id INTEGER,
death_date VARCHAR(8), -- DATE
death_type_concept_id INTEGER,
cause_concept_id INTEGER,
cause_source_value VARCHAR(50),
cause_source_concept_id INTEGER
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE visit_occurrence (
visit_occurrence_id INTEGER,
person_id INTEGER,
visit_concept_id INTEGER,
visit_start_date VARCHAR(8), -- DATE
visit_start_datetime VARCHAR(10),
visit_end_date VARCHAR(8), -- DATE
visit_end_datetime VARCHAR(10),
visit_type_concept_id INTEGER,
provider_id INTEGER,
care_site_id INTEGER,
visit_source_value VARCHAR(50),
visit_source_concept_id INTEGER,
admitting_source_concept_id INTEGER,
admitting_source_value VARCHAR(50),
discharge_to_concept_id INTEGER,
discharge_to_source_value VARCHAR(50),
preceding_visit_occurrence_id INTEGER
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE procedure_occurrence (
procedure_occurrence_id INTEGER,
person_id INTEGER,
procedure_concept_id INTEGER,
procedure_date VARCHAR(8), -- DATE
procedure_type_concept_id INTEGER,
modifier_concept_id INTEGER,
quantity INTEGER,
provider_id INTEGER,
visit_occurrence_id INTEGER,
procedure_source_value VARCHAR(50),
procedure_source_concept_id INTEGER,
qualifier_source_value VARCHAR(50)
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE drug_exposure (
drug_exposure_id INTEGER,
person_id INTEGER,
drug_concept_id INTEGER,
drug_exposure_start_date VARCHAR(8), -- DATE
drug_exposure_end_date VARCHAR(8), -- DATE
verbatim_end_date VARCHAR(8), --DATE
drug_type_concept_id INTEGER,
stop_reason VARCHAR(20),
refills INTEGER,
quantity DOUBLE, -- NUMERIC
days_supply INTEGER,
sig STRING, -- TEXT
route_concept_id INTEGER,
lot_number VARCHAR(50),
provider_id INTEGER,
visit_occurrence_id INTEGER,
drug_source_value VARCHAR(50),
drug_source_concept_id INTEGER,
route_source_value VARCHAR(50),
dose_unit_source_value VARCHAR(50)
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE device_exposure (
device_exposure_id INTEGER,
person_id INTEGER,
device_concept_id INTEGER,
device_exposure_start_date VARCHAR(8), -- DATE
device_exposure_end_date VARCHAR(8), -- DATE
device_type_concept_id INTEGER,
unique_device_id VARCHAR(50),
quantity INTEGER,
provider_id INTEGER,
visit_occurrence_id INTEGER,
device_source_value VARCHAR(100),
device_source_concept_id INTEGER
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE condition_occurrence (
condition_occurrence_id INTEGER,
person_id INTEGER,
condition_concept_id INTEGER,
condition_start_date VARCHAR(8), -- DATE
condition_end_date VARCHAR(8), -- DATE
condition_type_concept_id INTEGER,
stop_reason VARCHAR(20),
provider_id INTEGER,
visit_occurrence_id INTEGER,
condition_source_value VARCHAR(50),
condition_source_concept_id INTEGER,
condition_status_source_value VARCHAR(50),
condition_status_concept_id INTEGER
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE measurement (
measurement_id INTEGER,
person_id INTEGER,
measurement_concept_id INTEGER,
measurement_date VARCHAR(8), -- DATE
measurement_datetime VARCHAR(10),
measurement_type_concept_id INTEGER,
operator_concept_id INTEGER,
value_as_number DOUBLE, -- NUMERIC
value_as_concept_id INTEGER,
unit_concept_id INTEGER,
range_low DOUBLE, -- NUMERIC
range_high DOUBLE, -- NUMERIC
provider_id INTEGER,
visit_occurrence_id INTEGER,
measurement_source_value VARCHAR(50),
measurement_source_concept_id INTEGER,
unit_source_value VARCHAR(50),
value_source_value VARCHAR(50)
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE note (
note_id INTEGER,
person_id INTEGER,
note_date VARCHAR(8), -- DATE
note_datetime VARCHAR(10),
note_type_concept_id INTEGER,
note_class_concept_id INTEGER,
note_title VARCHAR(250),
note_text STRING, -- TEXT
encoding_concept_id INTEGER,
language_concept_id INTEGER,
provider_id INTEGER,
visit_occurrence_id INTEGER,
note_source_value VARCHAR(50)
)
;
CREATE TABLE note_nlp (
note_nlp_id BIGINT,
note_id INTEGER,
section_concept_id INTEGER,
snippet VARCHAR(250),
offset VARCHAR(250),
lexical_variant VARCHAR(250),
note_nlp_concept_id INTEGER,
note_nlp_source_concept_id INTEGER,
nlp_system VARCHAR(250),
nlp_date VARCHAR(8),
nlp_datetime VARCHAR(10),
term_exists CHAR(1),
term_temporal VARCHAR(50),
term_modifiers VARCHAR(2000)
)
;
CREATE TABLE observation (
observation_id INTEGER,
person_id INTEGER,
observation_concept_id INTEGER,
observation_date VARCHAR(8), -- DATE
observation_datetime VARCHAR(10),
observation_type_concept_id INTEGER,
value_as_number DOUBLE, -- NUMERIC
value_as_string VARCHAR(60),
value_as_concept_id INTEGER,
qualifier_concept_id INTEGER,
unit_concept_id INTEGER,
provider_id INTEGER,
visit_occurrence_id INTEGER,
observation_source_value VARCHAR(50),
observation_source_concept_id INTEGER,
unit_source_value VARCHAR(50),
qualifier_source_value VARCHAR(50)
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE fact_relationship (
domain_concept_id_1 INTEGER,
fact_id_1 INTEGER,
domain_concept_id_2 INTEGER,
fact_id_2 INTEGER,
relationship_concept_id INTEGER
)
;
/************************
Standardized health system data
************************/
CREATE TABLE `location` (
location_id INTEGER,
address_1 VARCHAR(50),
address_2 VARCHAR(50),
city VARCHAR(50),
state CHAR(2),
zip VARCHAR(9),
county VARCHAR(20),
location_source_value VARCHAR(50)
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE care_site (
care_site_id INTEGER,
care_site_name VARCHAR(255),
place_of_service_concept_id INTEGER,
location_id INTEGER,
care_site_source_value VARCHAR(50),
place_of_service_source_value VARCHAR(50)
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE provider (
provider_id INTEGER,
provider_name VARCHAR(255),
NPI VARCHAR(20),
DEA VARCHAR(20),
specialty_concept_id INTEGER,
care_site_id INTEGER,
year_of_birth INTEGER,
gender_concept_id INTEGER,
provider_source_value VARCHAR(50),
specialty_source_value VARCHAR(50),
specialty_source_concept_id INTEGER,
gender_source_value VARCHAR(50),
gender_source_concept_id INTEGER
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
TBLPROPERTIES ("skip.header.line.count"="1")
;
/************************
Standardized health economics
************************/
CREATE TABLE payer_plan_period (
payer_plan_period_id INTEGER,
person_id INTEGER,
payer_plan_period_start_date VARCHAR(8), -- DATE
payer_plan_period_end_date VARCHAR(8), -- DATE
payer_source_value VARCHAR (50),
plan_source_value VARCHAR (50),
family_source_value VARCHAR (50)
)
;
CREATE TABLE cost (
cost_id INTEGER,
cost_event_id INTEGER,
cost_domain_id VARCHAR(20),
cost_type_concept_id INTEGER,
currency_concept_id INTEGER,
total_charge DECIMAL(19,4), -- NUMERIC
total_cost DECIMAL(19,4), -- NUMERIC
total_paid DECIMAL(19,4), -- NUMERIC
paid_by_payer DECIMAL(19,4), -- NUMERIC
paid_by_patient DECIMAL(19,4), -- NUMERIC
paid_patient_copay DECIMAL(19,4), -- NUMERIC
paid_patient_coinsurance DECIMAL(19,4), -- NUMERIC
paid_patient_deductible DECIMAL(19,4), -- NUMERIC
paid_by_primary DECIMAL(19,4), -- NUMERIC
paid_ingredient_cost DECIMAL(19,4), -- NUMERIC
paid_dispensing_fee DECIMAL(19,4), -- NUMERIC
payer_plan_period_id INTEGER,
amount_allowed DECIMAL(19,4), -- NUMERIC
revenue_code_concept_id INTEGER,
reveue_code_source_value VARCHAR(50),
drg_concept_id INTEGER,
drg_source_value CHAR(3)
)
;
/************************
Standardized derived elements
************************/
CREATE TABLE cohort (
cohort_definition_id INTEGER,
subject_id INTEGER,
cohort_start_date VARCHAR(8), -- DATE
cohort_end_date VARCHAR(8) -- DATE
)
;
CREATE TABLE cohort_attribute (
cohort_definition_id INTEGER,
cohort_start_date VARCHAR(8), -- DATE
cohort_end_date VARCHAR(8), -- DATE
subject_id INTEGER,
attribute_definition_id INTEGER,
value_as_number DOUBLE, -- NUMERIC
value_as_concept_id INTEGER
)
;
CREATE TABLE drug_era (
drug_era_id INTEGER,
person_id INTEGER,
drug_concept_id INTEGER,
drug_era_start_date VARCHAR(8), -- DATE
drug_era_end_date VARCHAR(8), -- DATE
drug_exposure_count INTEGER,
gap_days INTEGER
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
TBLPROPERTIES ("skip.header.line.count"="1")
;
CREATE TABLE dose_era (
dose_era_id INTEGER,
person_id INTEGER,
drug_concept_id INTEGER,
unit_concept_id INTEGER,
dose_value DOUBLE, -- NUMERIC
dose_era_start_date VARCHAR(8), -- DATE
dose_era_end_date VARCHAR(8) -- DATE
)
;
CREATE TABLE condition_era (
condition_era_id INTEGER,
person_id INTEGER,
condition_concept_id INTEGER,
condition_era_start_date VARCHAR(8), -- DATE
condition_era_end_date VARCHAR(8), -- DATE
condition_occurrence_count INTEGER
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
TBLPROPERTIES ("skip.header.line.count"="1")
;