OMOP/Impala/OMOP CDM impala ddl.txt

787 lines
22 KiB
Plaintext

/*********************************************************************************
# Copyright 2018-08 Observational Health Data Sciences and Informatics
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
********************************************************************************/
/************************
####### # # ####### ###### ##### ###### # # ##### ###
# # ## ## # # # # # # # # ## ## # # # # # #
# # # # # # # # # # # # # # # # # # # # # #
# # # # # # # ###### # # # # # # # # ###### # #
# # # # # # # # # # # # # # # # ### # #
# # # # # # # # # # # # # # # # # ### # #
####### # # ####### # ##### ###### # # ## ##### ### ###
impala script to create OMOP common data model version 6.0
last revised: 27-Aug-2018
Authors: Patrick Ryan, Christian Reich, Clair Blacketer
*************************/
/************************
Standardized vocabulary
************************/
CREATE TABLE concept (
concept_id INTEGER ,
concept_name VARCHAR(255) ,
domain_id VARCHAR(20) ,
vocabulary_id VARCHAR(20) ,
concept_class_id VARCHAR(20) ,
standard_concept VARCHAR(1) ,
concept_code VARCHAR(50) ,
valid_start_date TIMESTAMP ,
valid_end_date TIMESTAMP ,
invalid_reason VARCHAR(1)
)
;
CREATE TABLE vocabulary (
vocabulary_id VARCHAR(20),
vocabulary_name VARCHAR(255),
vocabulary_reference VARCHAR(255),
vocabulary_version VARCHAR(255),
vocabulary_concept_id INTEGER
)
;
CREATE TABLE domain (
domain_id VARCHAR(20),
domain_name VARCHAR(255),
domain_concept_id INTEGER
)
;
CREATE TABLE concept_class (
concept_class_id VARCHAR(20),
concept_class_name VARCHAR(255),
concept_class_concept_id INTEGER
)
;
CREATE TABLE concept_relationship (
concept_id_1 INTEGER ,
concept_id_2 INTEGER ,
relationship_id VARCHAR(20),
valid_start_date TIMESTAMP,
valid_end_date TIMESTAMP,
invalid_reason VARCHAR(1)
)
;
CREATE TABLE relationship (
relationship_id VARCHAR(20),
relationship_name VARCHAR(255),
is_hierarchical VARCHAR(1),
defines_ancestry VARCHAR(1),
reverse_relationship_id VARCHAR(20),
relationship_concept_id INTEGER
)
;
CREATE TABLE concept_synonym (
concept_id INTEGER ,
concept_synonym_name STRING,
language_concept_id INTEGER
)
;
CREATE TABLE concept_ancestor (
ancestor_concept_id INTEGER ,
descendant_concept_id INTEGER ,
min_levels_of_separation INTEGER ,
max_levels_of_separation INTEGER
)
;
CREATE TABLE source_to_concept_map (
source_code VARCHAR(50),
source_concept_id INTEGER ,
source_vocabulary_id VARCHAR(20),
source_code_description VARCHAR(255),
target_concept_id INTEGER ,
target_vocabulary_id VARCHAR(20),
valid_start_date TIMESTAMP,
valid_end_date TIMESTAMP,
invalid_reason VARCHAR(1)
)
;
CREATE TABLE drug_strength (
drug_concept_id INTEGER ,
ingredient_concept_id INTEGER ,
amount_value FLOAT,
amount_unit_concept_id INTEGER ,
numerator_value FLOAT,
numerator_unit_concept_id INTEGER ,
denominator_value FLOAT,
denominator_unit_concept_id INTEGER ,
box_size INTEGER ,
valid_start_date TIMESTAMP,
valid_end_date TIMESTAMP,
invalid_reason VARCHAR(1)
)
;
CREATE TABLE cohort_definition (
cohort_definition_id INTEGER ,
cohort_definition_name VARCHAR(255),
cohort_definition_description STRING,
definition_type_concept_id INTEGER ,
cohort_definition_syntax STRING,
subject_concept_id INTEGER ,
cohort_initiation_date TIMESTAMP
)
;
CREATE TABLE attribute_definition (
attribute_definition_id INTEGER ,
attribute_name VARCHAR(255),
attribute_description STRING,
attribute_type_concept_id INTEGER ,
attribute_syntax STRING
)
;
/**************************
Standardized meta-data
***************************/
CREATE TABLE cdm_source
(
cdm_source_name VARCHAR(255) ,
cdm_source_abbreviation VARCHAR(25) ,
cdm_holder VARCHAR(255) ,
source_description STRING ,
source_documentation_reference VARCHAR(255) ,
cdm_etl_reference VARCHAR(255) ,
source_release_date TIMESTAMP ,
cdm_release_date TIMESTAMP ,
cdm_version VARCHAR(10) ,
vocabulary_version VARCHAR(20)
)
;
CREATE TABLE `metadata`
(
metadata_concept_id INTEGER ,
metadata_type_concept_id INTEGER ,
name VARCHAR(250) ,
value_as_string STRING ,
value_as_concept_id INTEGER ,
metadata_date TIMESTAMP ,
metadata_datetime TIMESTAMP
)
;
INSERT INTO metadata (name, value_as_string) --Added cdm version record
VALUES ('CDM Version', '6.0')
;
/************************
Standardized clinical data
************************/
--HINT DISTRIBUTE_ON_KEY(person_id)
CREATE TABLE person
(
person_id BIGINT , --BIGINTs added
gender_concept_id INTEGER ,
year_of_birth INTEGER ,
month_of_birth INTEGER ,
day_of_birth INTEGER ,
birth_datetime TIMESTAMP ,
race_concept_id INTEGER ,
ethnicity_concept_id INTEGER ,
location_id INTEGER ,
provider_id INTEGER ,
care_site_id INTEGER ,
person_source_value VARCHAR(50),
gender_source_value VARCHAR(50),
gender_source_concept_id INTEGER ,
race_source_value VARCHAR(50),
race_source_concept_id INTEGER ,
ethnicity_source_value VARCHAR(50),
ethnicity_source_concept_id INTEGER
)
;
--HINT DISTRIBUTE_ON_KEY(person_id)
CREATE TABLE observation_period
(
observation_period_id BIGINT ,
person_id BIGINT ,
observation_period_start_date TIMESTAMP ,
observation_period_end_date TIMESTAMP ,
period_type_concept_id INTEGER
)
;
--HINT DISTRIBUTE_ON_KEY(person_id)
CREATE TABLE specimen
(
specimen_id BIGINT ,
person_id BIGINT ,
specimen_concept_id INTEGER ,
specimen_type_concept_id INTEGER ,
specimen_date TIMESTAMP ,
specimen_datetime TIMESTAMP ,
quantity FLOAT ,
unit_concept_id INTEGER ,
anatomic_site_concept_id INTEGER ,
disease_status_concept_id INTEGER ,
specimen_source_id VARCHAR(50) ,
specimen_source_value VARCHAR(50) ,
unit_source_value VARCHAR(50) ,
anatomic_site_source_value VARCHAR(50) ,
disease_status_source_value VARCHAR(50)
)
;
--HINT DISTRIBUTE_ON_KEY(person_id)
CREATE TABLE death
(
person_id BIGINT ,
death_date TIMESTAMP ,
death_datetime TIMESTAMP ,
death_type_concept_id INTEGER ,
cause_concept_id INTEGER ,
cause_source_value VARCHAR(50),
cause_source_concept_id INTEGER
)
;
--HINT DISTRIBUTE_ON_KEY(person_id)
CREATE TABLE visit_occurrence
(
visit_occurrence_id BIGINT ,
person_id BIGINT ,
visit_concept_id INTEGER ,
visit_start_date TIMESTAMP ,
visit_start_datetime TIMESTAMP ,
visit_end_date TIMESTAMP ,
visit_end_datetime TIMESTAMP ,
visit_type_concept_id INTEGER ,
provider_id INTEGER ,
care_site_id INTEGER ,
visit_source_value VARCHAR(50),
visit_source_concept_id INTEGER ,
admitted_from_concept_id INTEGER , /*Changed from admitting_source_* */
admitted_from_source_value VARCHAR(50),
discharge_to_source_value VARCHAR(50) ,
discharge_to_concept_id INTEGER ,
preceding_visit_occurrence_id INTEGER
)
;
--todo admitting 'source'
--HINT DISTRIBUTE_ON_KEY(person_id)
CREATE TABLE visit_detail
(
visit_detail_id BIGINT ,
person_id BIGINT ,
visit_detail_concept_id INTEGER ,
visit_detail_start_date TIMESTAMP ,
visit_detail_start_datetime TIMESTAMP ,
visit_detail_end_date TIMESTAMP ,
visit_detail_end_datetime TIMESTAMP ,
visit_detail_type_concept_id INTEGER ,
provider_id INTEGER ,
care_site_id INTEGER ,
discharge_to_concept_id INTEGER ,
admitted_from_concept_id INTEGER , /*Changed from admitting_source_* */
admitted_from_source_value VARCHAR(50) ,
visit_detail_source_value VARCHAR(50) ,
visit_detail_source_concept_id INTEGER ,
discharge_to_source_value VARCHAR(50) ,
preceding_visit_detail_id BIGINT ,
visit_detail_parent_id BIGINT ,
visit_occurrence_id BIGINT
)
;
--HINT DISTRIBUTE_ON_KEY(person_id)
CREATE TABLE procedure_occurrence
(
procedure_occurrence_id BIGINT ,
person_id BIGINT ,
procedure_concept_id INTEGER ,
procedure_date TIMESTAMP ,
procedure_datetime TIMESTAMP ,
procedure_type_concept_id INTEGER ,
modifier_concept_id INTEGER ,
quantity INTEGER ,
provider_id INTEGER ,
visit_occurrence_id INTEGER ,
visit_detail_id INTEGER ,
procedure_source_value VARCHAR(50) ,
procedure_source_concept_id INTEGER ,
modifier_source_value VARCHAR(50) ,
)
;
--HINT DISTRIBUTE_ON_KEY(person_id)
CREATE TABLE drug_exposure
(
drug_exposure_id BIGINT ,
person_id BIGINT ,
drug_concept_id INTEGER ,
drug_exposure_start_date TIMESTAMP ,
drug_exposure_start_datetime TIMESTAMP ,
drug_exposure_end_date TIMESTAMP ,
drug_exposure_end_datetime TIMESTAMP ,
verbatim_end_date TIMESTAMP ,
drug_type_concept_id INTEGER ,
stop_reason VARCHAR(20) ,
refills INTEGER ,
quantity FLOAT ,
days_supply INTEGER ,
sig STRING ,
route_concept_id INTEGER ,
lot_number VARCHAR(50) ,
provider_id INTEGER ,
visit_occurrence_id INTEGER ,
visit_detail_id INTEGER ,
drug_source_value VARCHAR(50) ,
drug_source_concept_id INTEGER ,
route_source_value VARCHAR(50) ,
dose_unit_source_value VARCHAR(50)
)
;
--HINT DISTRIBUTE_ON_KEY(person_id)
CREATE TABLE device_exposure
(
device_exposure_id BIGINT ,
person_id BIGINT ,
device_concept_id INTEGER ,
device_exposure_start_date TIMESTAMP ,
device_exposure_start_datetime TIMESTAMP ,
device_exposure_end_date TIMESTAMP ,
device_exposure_end_datetime TIMESTAMP ,
device_type_concept_id INTEGER ,
unique_device_id VARCHAR(50) ,
quantity INTEGER ,
provider_id INTEGER ,
visit_occurrence_id INTEGER ,
visit_detail_id INTEGER ,
device_source_value VARCHAR(100) ,
device_source_concept_id INTEGER
)
;
--HINT DISTRIBUTE_ON_KEY(person_id)
CREATE TABLE condition_occurrence
(
condition_occurrence_id BIGINT ,
person_id BIGINT ,
condition_concept_id INTEGER ,
condition_start_date TIMESTAMP ,
condition_start_datetime TIMESTAMP ,
condition_end_date TIMESTAMP ,
condition_end_datetime TIMESTAMP ,
condition_type_concept_id INTEGER ,
condition_status_concept_id INTEGER ,
stop_reason VARCHAR(20) ,
provider_id INTEGER ,
visit_occurrence_id INTEGER ,
visit_detail_id INTEGER ,
condition_source_value VARCHAR(50) ,
condition_source_concept_id INTEGER ,
condition_status_source_value VARCHAR(50)
)
;
--HINT DISTRIBUTE_ON_KEY(person_id)
CREATE TABLE measurement
(
measurement_id BIGINT ,
person_id BIGINT ,
measurement_concept_id INTEGER ,
measurement_date TIMESTAMP ,
measurement_datetime TIMESTAMP ,
measurement_time VARCHAR(10),
measurement_type_concept_id INTEGER ,
operator_concept_id INTEGER ,
value_as_number FLOAT ,
value_as_concept_id INTEGER ,
unit_concept_id INTEGER ,
range_low FLOAT ,
range_high FLOAT ,
provider_id INTEGER ,
visit_occurrence_id INTEGER ,
visit_detail_id INTEGER ,
measurement_source_value VARCHAR(50) ,
measurement_source_concept_id INTEGER ,
unit_source_value VARCHAR(50) ,
value_source_value VARCHAR(50)
)
;
--HINT DISTRIBUTE_ON_KEY(person_id)
CREATE TABLE note
(
note_id BIGINT ,
person_id BIGINT ,
note_event_id BIGINT , --This and the field below added
note_domain_id VARCHAR(20) , --This field may be removed in favor of the one below
--note_event_table_concept_id INTEGER , --This may be added based on 9/4 meeting
note_date TIMESTAMP ,
note_datetime TIMESTAMP ,
note_type_concept_id INTEGER ,
note_class_concept_id INTEGER ,
note_title VARCHAR(250) ,
note_text STRING ,
encoding_concept_id INTEGER ,
language_concept_id INTEGER ,
provider_id INTEGER ,
visit_occurrence_id INTEGER ,
visit_detail_id INTEGER ,
note_source_value VARCHAR(50)
)
;
CREATE TABLE note_nlp
(
note_nlp_id BIGINT ,
note_id BIGINT ,
section_concept_id INTEGER ,
snippet VARCHAR(250) ,
`offset` VARCHAR(250) ,
lexical_variant VARCHAR(250) ,
note_nlp_concept_id INTEGER ,
nlp_system VARCHAR(250) ,
nlp_date TIMESTAMP ,
nlp_datetime TIMESTAMP ,
term_exists VARCHAR(1) ,
term_temporal VARCHAR(50) ,
term_modifiers VARCHAR(2000) ,
note_nlp_source_concept_id INTEGER
)
;
--HINT DISTRIBUTE_ON_KEY(person_id)
CREATE TABLE observation
(
observation_id BIGINT ,
person_id BIGINT ,
observation_concept_id INTEGER ,
observation_date TIMESTAMP ,
observation_datetime TIMESTAMP ,
observation_type_concept_id INTEGER ,
value_as_number FLOAT ,
value_as_string VARCHAR(60) ,
value_as_concept_id INTEGER ,
qualifier_concept_id INTEGER ,
unit_concept_id INTEGER ,
provider_id INTEGER ,
visit_occurrence_id BIGINT ,
visit_detail_id BIGINT ,
observation_source_value VARCHAR(50) ,
observation_source_concept_id INTEGER ,
unit_source_value VARCHAR(50) ,
qualifier_source_value VARCHAR(50) ,
observation_event_id BIGINT , /* This will link back to the event table (SURVEY) on SURVEY_OCCURRENCE_ID, changed name to comply with COST and NOTE*/
observation_event_domain_id VARCHAR(20) ,
--obs_event_table_concept_id INTEGER , /* Changed name to comply with COST and NOTE. This should be 'Survey'. Had to use 'obs' for oracle restriction, may be added on 9/4*/
value_as_datetime TIMESTAMP
)
;
CREATE TABLE survey_conduct
(
survey_conduct_id BIGINT ,
person_id BIGINT ,
survey_concept_id INTEGER ,
survey_start_date TIMESTAMP ,
survey_start_datetime TIMESTAMP ,
survey_end_date TIMESTAMP ,
survey_end_datetime TIMESTAMP ,
provider_id BIGINT ,
assisted_concept_id INTEGER ,
respondent_type_concept_id INTEGER ,
timing_concept_id INTEGER ,
collection_method_concept_id INTEGER ,
assisted_source_value VARCHAR(50) ,
respondent_type_source_value VARCHAR(100) ,
timing_source_value VARCHAR(100) ,
collection_method_source_value VARCHAR(100) ,
survey_source_value VARCHAR(100) ,
survey_source_concept_id INTEGER ,
survey_source_identifier VARCHAR(100) ,
validated_survey_concept_id INTEGER ,
validated_survey_source_value VARCHAR(100) ,
survey_version_number VARCHAR(20) ,
visit_occurrence_id BIGINT ,
visit_detail_id BIGINT ,
response_to_visit_occurrence_id BIGINT
)
;
CREATE TABLE fact_relationship
(
domain_concept_id_1 INTEGER ,
fact_id_1 BIGINT ,
domain_concept_id_2 INTEGER ,
fact_id_2 BIGINT ,
relationship_concept_id INTEGER
)
;
/************************
Standardized health system data
************************/
CREATE TABLE location
(
location_id BIGINT ,
address_1 VARCHAR(50) ,
address_2 VARCHAR(50) ,
city VARCHAR(50) ,
state VARCHAR(2) ,
zip VARCHAR(9) ,
county VARCHAR(20) ,
country VARCHAR(100) ,
location_source_value VARCHAR(50) ,
latitude FLOAT ,
longitude FLOAT
)
;
CREATE TABLE location_history
(
location_history_id BIGINT ,
location_id BIGINT ,
relationship_type_concept_id INTEGER , --Recent addition based on github discussion
domain_id VARCHAR(50) ,
entity_id BIGINT ,
start_date TIMESTAMP ,
end_date TIMESTAMP
)
;
CREATE TABLE care_site
(
care_site_id BIGINT ,
care_site_name VARCHAR(255) ,
place_of_service_concept_id INTEGER ,
location_id BIGINT ,
care_site_source_value VARCHAR(50) ,
place_of_service_source_value VARCHAR(50)
)
;
CREATE TABLE provider
(
provider_id BIGINT ,
provider_name VARCHAR(255) ,
NPI VARCHAR(20) ,
DEA VARCHAR(20) ,
specialty_concept_id INTEGER ,
care_site_id BIGINT ,
year_of_birth INTEGER ,
gender_concept_id INTEGER ,
provider_source_value VARCHAR(50) ,
specialty_source_value VARCHAR(50) ,
specialty_source_concept_id INTEGER ,
gender_source_value VARCHAR(50) ,
gender_source_concept_id INTEGER
)
;
/************************
Standardized health economics
************************/
--HINT DISTRIBUTE_ON_KEY(person_id)
CREATE TABLE payer_plan_period
(
payer_plan_period_id BIGINT ,
person_id BIGINT ,
contract_person_id BIGINT ,
payer_plan_period_start_date TIMESTAMP ,
payer_plan_period_end_date TIMESTAMP ,
payer_concept_id INTEGER ,
plan_concept_id INTEGER ,
contract_concept_id INTEGER ,
sponsor_concept_id INTEGER ,
stop_reason_concept_id INTEGER ,
payer_source_value VARCHAR(50) ,
payer_source_concept_id INTEGER ,
plan_source_value VARCHAR(50) ,
plan_source_concept_id INTEGER ,
contract_source_value VARCHAR(50) ,
contract_source_concept_id INTEGER ,
sponsor_source_value VARCHAR(50) ,
sponsor_source_concept_id INTEGER ,
family_source_value VARCHAR(50) ,
stop_reason_source_value VARCHAR(50) ,
stop_reason_source_concept_id INTEGER
)
;
CREATE TABLE cost
(
cost_id BIGINT ,
person_id BIGINT,
cost_event_id BIGINT ,
cost_domain_id VARCHAR(20) ,
--cost_event_table_concept_id INTEGER , /*This is still in discussion and most likely will replace cost_domain_id at 9/4 meeting*/
cost_concept_id INTEGER ,
cost_type_concept_id INTEGER ,
currency_concept_id INTEGER ,
cost FLOAT ,
incurred_date TIMESTAMP ,
billed_date TIMESTAMP ,
paid_date TIMESTAMP ,
revenue_code_concept_id INTEGER ,
drg_concept_id INTEGER ,
cost_source_value VARCHAR(50) ,
cost_source_concept_id INTEGER ,
revenue_code_source_value VARCHAR(50) ,
drg_source_value VARCHAR(3) ,
payer_plan_period_id BIGINT
)
;
/************************
Standardized derived elements
************************/
--HINT DISTRIBUTE_ON_KEY(subject_id)
CREATE TABLE cohort
(
cohort_definition_id BIGINT ,
subject_id BIGINT ,
cohort_start_date TIMESTAMP ,
cohort_end_date TIMESTAMP
)
;
--HINT DISTRIBUTE_ON_KEY(subject_id)
CREATE TABLE cohort_attribute
(
cohort_definition_id BIGINT ,
subject_id BIGINT ,
cohort_start_date TIMESTAMP ,
cohort_end_date TIMESTAMP ,
attribute_definition_id BIGINT ,
value_as_number FLOAT ,
value_as_concept_id INTEGER
)
;
--HINT DISTRIBUTE_ON_KEY(person_id)
CREATE TABLE drug_era
(
drug_era_id BIGINT ,
person_id BIGINT ,
drug_concept_id INTEGER ,
drug_era_start_date TIMESTAMP ,
drug_era_end_date TIMESTAMP ,
drug_exposure_count INTEGER ,
gap_days INTEGER
)
;
--HINT DISTRIBUTE_ON_KEY(person_id)
CREATE TABLE dose_era
(
dose_era_id BIGINT ,
person_id BIGINT ,
drug_concept_id INTEGER ,
unit_concept_id INTEGER ,
dose_value FLOAT ,
dose_era_start_date TIMESTAMP ,
dose_era_end_date TIMESTAMP
)
;
--HINT DISTRIBUTE_ON_KEY(person_id)
CREATE TABLE condition_era
(
condition_era_id BIGINT ,
person_id BIGINT ,
condition_concept_id INTEGER ,
condition_era_start_date TIMESTAMP ,
condition_era_end_date TIMESTAMP ,
condition_occurrence_count INTEGER
)
;