Merge pull request #37 from tomwhite/impala
Add scripts for creating a schema and importing data into Impala.
This commit is contained in:
commit
d99820c5e2
|
@ -0,0 +1,15 @@
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_SYNPUF_PATH}/CDM_CARE_SITE.csv' OVERWRITE INTO TABLE care_site;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_SYNPUF_PATH}/CDM_CONDITION_OCCURRENCE.csv' OVERWRITE INTO TABLE condition_occurrence;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_SYNPUF_PATH}/CDM_DEATH.csv' OVERWRITE INTO TABLE death;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_SYNPUF_PATH}/CDM_DRUG_EXPOSURE.csv' OVERWRITE INTO TABLE drug_exposure;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_SYNPUF_PATH}/CDM_DEVICE_EXPOSURE.csv' OVERWRITE INTO TABLE device_exposure;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_SYNPUF_PATH}/CDM_LOCATION.csv' OVERWRITE INTO TABLE `location`;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_SYNPUF_PATH}/CDM_MEASUREMENT.csv' OVERWRITE INTO TABLE measurement;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_SYNPUF_PATH}/CDM_OBSERVATION.csv' OVERWRITE INTO TABLE observation;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_SYNPUF_PATH}/CDM_OBSERVATION_PERIOD.csv' OVERWRITE INTO TABLE observation_period;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_SYNPUF_PATH}/CDM_PERSON.csv' OVERWRITE INTO TABLE person;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_SYNPUF_PATH}/CDM_PROCEDURE_OCCURRENCE.csv' OVERWRITE INTO TABLE procedure_occurrence;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_SYNPUF_PATH}/CDM_PROVIDER.csv' OVERWRITE INTO TABLE provider;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_SYNPUF_PATH}/CDM_VISIT_OCCURRENCE.csv' OVERWRITE INTO TABLE visit_occurrence;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_SYNPUF_PATH}/CDM_DRUG_ERA.csv' OVERWRITE INTO TABLE drug_era;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_SYNPUF_PATH}/CDM_CONDITION_ERA.csv' OVERWRITE INTO TABLE condition_era;
|
|
@ -0,0 +1,763 @@
|
||||||
|
/*********************************************************************************
|
||||||
|
# Copyright 2014-6 Observational Health Data Sciences and Informatics
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
********************************************************************************/
|
||||||
|
|
||||||
|
/************************
|
||||||
|
|
||||||
|
####### # # ####### ###### ##### ###### # # #######
|
||||||
|
# # ## ## # # # # # # # # ## ## # # #
|
||||||
|
# # # # # # # # # # # # # # # # # # # #
|
||||||
|
# # # # # # # ###### # # # # # # # # ######
|
||||||
|
# # # # # # # # # # # # # # #
|
||||||
|
# # # # # # # # # # # # # # # # #
|
||||||
|
####### # # ####### # ##### ###### # # ## #####
|
||||||
|
|
||||||
|
|
||||||
|
script to create OMOP common data model, version 5.0 for Hadoop (Hive/Impala) database
|
||||||
|
|
||||||
|
Based on the PostgreSQL version, with the following changes:
|
||||||
|
* NULL/NOT NULL is not used.
|
||||||
|
* Dates are stored as VARCHAR(8). See http://stackoverflow.com/questions/33024309/convert-yyyymmdd-string-to-date-in-impala for how to treat date columns as dates.
|
||||||
|
* PostgreSQL NUMERIC is stored as DOUBLE for amounts, and DECIMAL(19,4) for prices.
|
||||||
|
* PostgreSQL TEXT is stored as STRING.
|
||||||
|
|
||||||
|
*************************/
|
||||||
|
|
||||||
|
|
||||||
|
/************************
|
||||||
|
|
||||||
|
Standardized vocabulary
|
||||||
|
|
||||||
|
************************/
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE concept (
|
||||||
|
concept_id INTEGER,
|
||||||
|
concept_name VARCHAR(255),
|
||||||
|
domain_id VARCHAR(20),
|
||||||
|
vocabulary_id VARCHAR(20),
|
||||||
|
concept_class_id VARCHAR(20),
|
||||||
|
standard_concept VARCHAR(1),
|
||||||
|
concept_code VARCHAR(50),
|
||||||
|
valid_start_date VARCHAR(8), -- DATE
|
||||||
|
valid_end_date VARCHAR(8), -- DATE
|
||||||
|
invalid_reason VARCHAR(1)
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE vocabulary (
|
||||||
|
vocabulary_id VARCHAR(20),
|
||||||
|
vocabulary_name VARCHAR(255),
|
||||||
|
vocabulary_reference VARCHAR(255),
|
||||||
|
vocabulary_version VARCHAR(255),
|
||||||
|
vocabulary_concept_id INTEGER
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE domain (
|
||||||
|
domain_id VARCHAR(20),
|
||||||
|
domain_name VARCHAR(255),
|
||||||
|
domain_concept_id INTEGER
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE concept_class (
|
||||||
|
concept_class_id VARCHAR(20),
|
||||||
|
concept_class_name VARCHAR(255),
|
||||||
|
concept_class_concept_id INTEGER
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE concept_relationship (
|
||||||
|
concept_id_1 INTEGER,
|
||||||
|
concept_id_2 INTEGER,
|
||||||
|
relationship_id VARCHAR(20),
|
||||||
|
valid_start_date VARCHAR(8), -- DATE
|
||||||
|
valid_end_date VARCHAR(8), -- DATE
|
||||||
|
invalid_reason VARCHAR(1)
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE relationship (
|
||||||
|
relationship_id VARCHAR(20),
|
||||||
|
relationship_name VARCHAR(255),
|
||||||
|
is_hierarchical VARCHAR(1),
|
||||||
|
defines_ancestry VARCHAR(1),
|
||||||
|
reverse_relationship_id VARCHAR(20),
|
||||||
|
relationship_concept_id INTEGER
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE concept_synonym (
|
||||||
|
concept_id INTEGER,
|
||||||
|
concept_synonym_name VARCHAR(1000),
|
||||||
|
language_concept_id INTEGER
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE concept_ancestor (
|
||||||
|
ancestor_concept_id INTEGER,
|
||||||
|
descendant_concept_id INTEGER,
|
||||||
|
min_levels_of_separation INTEGER,
|
||||||
|
max_levels_of_separation INTEGER
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE source_to_concept_map (
|
||||||
|
source_code VARCHAR(50),
|
||||||
|
source_concept_id INTEGER,
|
||||||
|
source_vocabulary_id VARCHAR(20),
|
||||||
|
source_code_description VARCHAR(255),
|
||||||
|
target_concept_id INTEGER,
|
||||||
|
target_vocabulary_id VARCHAR(20),
|
||||||
|
valid_start_date VARCHAR(8), -- DATE
|
||||||
|
valid_end_date VARCHAR(8), -- DATE
|
||||||
|
invalid_reason VARCHAR(1)
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE drug_strength (
|
||||||
|
drug_concept_id INTEGER,
|
||||||
|
ingredient_concept_id INTEGER,
|
||||||
|
amount_value DOUBLE, -- NUMERIC
|
||||||
|
amount_unit_concept_id INTEGER,
|
||||||
|
numerator_value DOUBLE, -- NUMERIC
|
||||||
|
numerator_unit_concept_id INTEGER,
|
||||||
|
denominator_value DOUBLE, -- NUMERIC
|
||||||
|
denominator_unit_concept_id INTEGER,
|
||||||
|
box_size INTEGER,
|
||||||
|
valid_start_date VARCHAR(8), -- DATE
|
||||||
|
valid_end_date VARCHAR(8), -- DATE
|
||||||
|
invalid_reason VARCHAR(1)
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE cohort_definition (
|
||||||
|
cohort_definition_id INTEGER,
|
||||||
|
cohort_definition_name VARCHAR(255),
|
||||||
|
cohort_definition_description STRING, -- TEXT
|
||||||
|
definition_type_concept_id INTEGER,
|
||||||
|
cohort_definition_syntax STRING, -- TEXT
|
||||||
|
subject_concept_id INTEGER,
|
||||||
|
cohort_initiation_date VARCHAR(8) -- DATE
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE attribute_definition (
|
||||||
|
attribute_definition_id INTEGER,
|
||||||
|
attribute_name VARCHAR(255),
|
||||||
|
attribute_description STRING, -- TEXT
|
||||||
|
attribute_type_concept_id INTEGER,
|
||||||
|
attribute_syntax STRING -- TEXT
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
/**************************
|
||||||
|
|
||||||
|
Standardized meta-data
|
||||||
|
|
||||||
|
***************************/
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE cdm_source
|
||||||
|
(
|
||||||
|
cdm_source_name VARCHAR(255),
|
||||||
|
cdm_source_abbreviation VARCHAR(25),
|
||||||
|
cdm_holder VARCHAR(255),
|
||||||
|
source_description STRING, -- TEXT
|
||||||
|
source_documentation_reference VARCHAR(255),
|
||||||
|
cdm_etl_reference VARCHAR(255),
|
||||||
|
source_release_date VARCHAR(8), -- DATE
|
||||||
|
cdm_release_date VARCHAR(8), -- DATE
|
||||||
|
cdm_version VARCHAR(10),
|
||||||
|
vocabulary_version VARCHAR(20)
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/************************
|
||||||
|
|
||||||
|
Standardized clinical data
|
||||||
|
|
||||||
|
************************/
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE person (
|
||||||
|
person_id INTEGER,
|
||||||
|
gender_concept_id INTEGER,
|
||||||
|
year_of_birth INTEGER,
|
||||||
|
month_of_birth INTEGER,
|
||||||
|
day_of_birth INTEGER,
|
||||||
|
time_of_birth VARCHAR(10),
|
||||||
|
race_concept_id INTEGER,
|
||||||
|
ethnicity_concept_id INTEGER,
|
||||||
|
location_id INTEGER,
|
||||||
|
provider_id INTEGER,
|
||||||
|
care_site_id INTEGER,
|
||||||
|
person_source_value VARCHAR(50),
|
||||||
|
gender_source_value VARCHAR(50),
|
||||||
|
gender_source_concept_id INTEGER,
|
||||||
|
race_source_value VARCHAR(50),
|
||||||
|
race_source_concept_id INTEGER,
|
||||||
|
ethnicity_source_value VARCHAR(50),
|
||||||
|
ethnicity_source_concept_id INTEGER
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE observation_period (
|
||||||
|
observation_period_id INTEGER,
|
||||||
|
person_id INTEGER,
|
||||||
|
observation_period_start_date VARCHAR(8), -- DATE
|
||||||
|
observation_period_end_date VARCHAR(8), -- DATE
|
||||||
|
period_type_concept_id INTEGER
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE specimen (
|
||||||
|
specimen_id INTEGER,
|
||||||
|
person_id INTEGER,
|
||||||
|
specimen_concept_id INTEGER,
|
||||||
|
specimen_type_concept_id INTEGER,
|
||||||
|
specimen_date VARCHAR(8), -- DATE
|
||||||
|
specimen_time VARCHAR(10),
|
||||||
|
quantity DOUBLE, -- NUMERIC
|
||||||
|
unit_concept_id INTEGER,
|
||||||
|
anatomic_site_concept_id INTEGER,
|
||||||
|
disease_status_concept_id INTEGER,
|
||||||
|
specimen_source_id VARCHAR(50),
|
||||||
|
specimen_source_value VARCHAR(50),
|
||||||
|
unit_source_value VARCHAR(50),
|
||||||
|
anatomic_site_source_value VARCHAR(50),
|
||||||
|
disease_status_source_value VARCHAR(50)
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE death (
|
||||||
|
person_id INTEGER,
|
||||||
|
death_date VARCHAR(8), -- DATE
|
||||||
|
death_type_concept_id INTEGER,
|
||||||
|
cause_concept_id INTEGER,
|
||||||
|
cause_source_value VARCHAR(50),
|
||||||
|
cause_source_concept_id INTEGER
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE visit_occurrence (
|
||||||
|
visit_occurrence_id INTEGER,
|
||||||
|
person_id INTEGER,
|
||||||
|
visit_concept_id INTEGER,
|
||||||
|
visit_start_date VARCHAR(8), -- DATE
|
||||||
|
visit_start_time VARCHAR(10),
|
||||||
|
visit_end_date VARCHAR(8), -- DATE
|
||||||
|
visit_end_time VARCHAR(10),
|
||||||
|
visit_type_concept_id INTEGER,
|
||||||
|
provider_id INTEGER,
|
||||||
|
care_site_id INTEGER,
|
||||||
|
visit_source_value VARCHAR(50),
|
||||||
|
visit_source_concept_id INTEGER
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE procedure_occurrence (
|
||||||
|
procedure_occurrence_id INTEGER,
|
||||||
|
person_id INTEGER,
|
||||||
|
procedure_concept_id INTEGER,
|
||||||
|
procedure_date VARCHAR(8), -- DATE
|
||||||
|
procedure_type_concept_id INTEGER,
|
||||||
|
modifier_concept_id INTEGER,
|
||||||
|
quantity INTEGER,
|
||||||
|
provider_id INTEGER,
|
||||||
|
visit_occurrence_id INTEGER,
|
||||||
|
procedure_source_value VARCHAR(50),
|
||||||
|
procedure_source_concept_id INTEGER,
|
||||||
|
qualifier_source_value VARCHAR(50)
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE drug_exposure (
|
||||||
|
drug_exposure_id INTEGER,
|
||||||
|
person_id INTEGER,
|
||||||
|
drug_concept_id INTEGER,
|
||||||
|
drug_exposure_start_date VARCHAR(8), -- DATE
|
||||||
|
drug_exposure_end_date VARCHAR(8), -- DATE
|
||||||
|
drug_type_concept_id INTEGER,
|
||||||
|
stop_reason VARCHAR(20),
|
||||||
|
refills INTEGER,
|
||||||
|
quantity DOUBLE, -- NUMERIC
|
||||||
|
days_supply INTEGER,
|
||||||
|
sig STRING, -- TEXT
|
||||||
|
route_concept_id INTEGER,
|
||||||
|
effective_drug_dose DOUBLE, -- NUMERIC
|
||||||
|
dose_unit_concept_id INTEGER,
|
||||||
|
lot_number VARCHAR(50),
|
||||||
|
provider_id INTEGER,
|
||||||
|
visit_occurrence_id INTEGER,
|
||||||
|
drug_source_value VARCHAR(50),
|
||||||
|
drug_source_concept_id INTEGER,
|
||||||
|
route_source_value VARCHAR(50),
|
||||||
|
dose_unit_source_value VARCHAR(50)
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE device_exposure (
|
||||||
|
device_exposure_id INTEGER,
|
||||||
|
person_id INTEGER,
|
||||||
|
device_concept_id INTEGER,
|
||||||
|
device_exposure_start_date VARCHAR(8), -- DATE
|
||||||
|
device_exposure_end_date VARCHAR(8), -- DATE
|
||||||
|
device_type_concept_id INTEGER,
|
||||||
|
unique_device_id VARCHAR(50),
|
||||||
|
quantity INTEGER,
|
||||||
|
provider_id INTEGER,
|
||||||
|
visit_occurrence_id INTEGER,
|
||||||
|
device_source_value VARCHAR(100),
|
||||||
|
device_source_concept_id INTEGER
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE condition_occurrence (
|
||||||
|
condition_occurrence_id INTEGER,
|
||||||
|
person_id INTEGER,
|
||||||
|
condition_concept_id INTEGER,
|
||||||
|
condition_start_date VARCHAR(8), -- DATE
|
||||||
|
condition_end_date VARCHAR(8), -- DATE
|
||||||
|
condition_type_concept_id INTEGER,
|
||||||
|
stop_reason VARCHAR(20),
|
||||||
|
provider_id INTEGER,
|
||||||
|
visit_occurrence_id INTEGER,
|
||||||
|
condition_source_value VARCHAR(50),
|
||||||
|
condition_source_concept_id INTEGER
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE measurement (
|
||||||
|
measurement_id INTEGER,
|
||||||
|
person_id INTEGER,
|
||||||
|
measurement_concept_id INTEGER,
|
||||||
|
measurement_date VARCHAR(8), -- DATE
|
||||||
|
measurement_time VARCHAR(10),
|
||||||
|
measurement_type_concept_id INTEGER,
|
||||||
|
operator_concept_id INTEGER,
|
||||||
|
value_as_number DOUBLE, -- NUMERIC
|
||||||
|
value_as_concept_id INTEGER,
|
||||||
|
unit_concept_id INTEGER,
|
||||||
|
range_low DOUBLE, -- NUMERIC
|
||||||
|
range_high DOUBLE, -- NUMERIC
|
||||||
|
provider_id INTEGER,
|
||||||
|
visit_occurrence_id INTEGER,
|
||||||
|
measurement_source_value VARCHAR(50),
|
||||||
|
measurement_source_concept_id INTEGER,
|
||||||
|
unit_source_value VARCHAR(50),
|
||||||
|
value_source_value VARCHAR(50)
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE note (
|
||||||
|
note_id INTEGER,
|
||||||
|
person_id INTEGER,
|
||||||
|
note_date VARCHAR(8), -- DATE
|
||||||
|
note_time VARCHAR(10),
|
||||||
|
note_type_concept_id INTEGER,
|
||||||
|
note_text STRING, -- TEXT
|
||||||
|
provider_id INTEGER,
|
||||||
|
visit_occurrence_id INTEGER,
|
||||||
|
note_source_value VARCHAR(50)
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE observation (
|
||||||
|
observation_id INTEGER,
|
||||||
|
person_id INTEGER,
|
||||||
|
observation_concept_id INTEGER,
|
||||||
|
observation_date VARCHAR(8), -- DATE
|
||||||
|
observation_time VARCHAR(10),
|
||||||
|
observation_type_concept_id INTEGER,
|
||||||
|
value_as_number DOUBLE, -- NUMERIC
|
||||||
|
value_as_string VARCHAR(60),
|
||||||
|
value_as_concept_id INTEGER,
|
||||||
|
qualifier_concept_id INTEGER,
|
||||||
|
unit_concept_id INTEGER,
|
||||||
|
provider_id INTEGER,
|
||||||
|
visit_occurrence_id INTEGER,
|
||||||
|
observation_source_value VARCHAR(50),
|
||||||
|
observation_source_concept_id INTEGER,
|
||||||
|
unit_source_value VARCHAR(50),
|
||||||
|
qualifier_source_value VARCHAR(50)
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE fact_relationship (
|
||||||
|
domain_concept_id_1 INTEGER,
|
||||||
|
fact_id_1 INTEGER,
|
||||||
|
domain_concept_id_2 INTEGER,
|
||||||
|
fact_id_2 INTEGER,
|
||||||
|
relationship_concept_id INTEGER
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/************************
|
||||||
|
|
||||||
|
Standardized health system data
|
||||||
|
|
||||||
|
************************/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE `location` (
|
||||||
|
location_id INTEGER,
|
||||||
|
address_1 VARCHAR(50),
|
||||||
|
address_2 VARCHAR(50),
|
||||||
|
city VARCHAR(50),
|
||||||
|
state VARCHAR(2),
|
||||||
|
zip VARCHAR(9),
|
||||||
|
county VARCHAR(20),
|
||||||
|
location_source_value VARCHAR(50)
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE care_site (
|
||||||
|
care_site_id INTEGER,
|
||||||
|
care_site_name VARCHAR(255),
|
||||||
|
place_of_service_concept_id INTEGER,
|
||||||
|
location_id INTEGER,
|
||||||
|
care_site_source_value VARCHAR(50),
|
||||||
|
place_of_service_source_value VARCHAR(50)
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE provider (
|
||||||
|
provider_id INTEGER,
|
||||||
|
provider_name VARCHAR(255),
|
||||||
|
NPI VARCHAR(20),
|
||||||
|
DEA VARCHAR(20),
|
||||||
|
specialty_concept_id INTEGER,
|
||||||
|
care_site_id INTEGER,
|
||||||
|
year_of_birth INTEGER,
|
||||||
|
gender_concept_id INTEGER,
|
||||||
|
provider_source_value VARCHAR(50),
|
||||||
|
specialty_source_value VARCHAR(50),
|
||||||
|
specialty_source_concept_id INTEGER,
|
||||||
|
gender_source_value VARCHAR(50),
|
||||||
|
gender_source_concept_id INTEGER
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/************************
|
||||||
|
|
||||||
|
Standardized health economics
|
||||||
|
|
||||||
|
************************/
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE payer_plan_period (
|
||||||
|
payer_plan_period_id INTEGER,
|
||||||
|
person_id INTEGER,
|
||||||
|
payer_plan_period_start_date VARCHAR(8), -- DATE
|
||||||
|
payer_plan_period_end_date VARCHAR(8), -- DATE
|
||||||
|
payer_source_value VARCHAR (50),
|
||||||
|
plan_source_value VARCHAR (50),
|
||||||
|
family_source_value VARCHAR (50)
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
/* The individual cost tables are being phased out and will disappear soon
|
||||||
|
|
||||||
|
CREATE TABLE visit_cost (
|
||||||
|
visit_cost_id INTEGER,
|
||||||
|
visit_occurrence_id INTEGER,
|
||||||
|
currency_concept_id INTEGER,
|
||||||
|
paid_copay DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_coinsurance DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_toward_deductible DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_by_payer DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_by_coordination_benefits DECIMAL(19,4), -- NUMERIC
|
||||||
|
total_out_of_pocket DECIMAL(19,4), -- NUMERIC
|
||||||
|
total_paid DECIMAL(19,4), -- NUMERIC
|
||||||
|
payer_plan_period_id INTEGER
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE procedure_cost (
|
||||||
|
procedure_cost_id INTEGER,
|
||||||
|
procedure_occurrence_id INTEGER,
|
||||||
|
currency_concept_id INTEGER,
|
||||||
|
paid_copay DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_coinsurance DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_toward_deductible DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_by_payer DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_by_coordination_benefits DECIMAL(19,4), -- NUMERIC
|
||||||
|
total_out_of_pocket DECIMAL(19,4), -- NUMERIC
|
||||||
|
total_paid DECIMAL(19,4), -- NUMERIC
|
||||||
|
revenue_code_concept_id INTEGER,
|
||||||
|
payer_plan_period_id INTEGER,
|
||||||
|
revenue_code_source_value VARCHAR(50)
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE drug_cost (
|
||||||
|
drug_cost_id INTEGER,
|
||||||
|
drug_exposure_id INTEGER,
|
||||||
|
currency_concept_id INTEGER,
|
||||||
|
paid_copay DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_coinsurance DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_toward_deductible DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_by_payer DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_by_coordination_benefits DECIMAL(19,4), -- NUMERIC
|
||||||
|
total_out_of_pocket DECIMAL(19,4), -- NUMERIC
|
||||||
|
total_paid DECIMAL(19,4), -- NUMERIC
|
||||||
|
ingredient_cost DECIMAL(19,4), -- NUMERIC
|
||||||
|
dispensing_fee DECIMAL(19,4), -- NUMERIC
|
||||||
|
average_wholesale_price DECIMAL(19,4), -- NUMERIC
|
||||||
|
payer_plan_period_id INTEGER
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE device_cost (
|
||||||
|
device_cost_id INTEGER,
|
||||||
|
device_exposure_id INTEGER,
|
||||||
|
currency_concept_id INTEGER,
|
||||||
|
paid_copay DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_coinsurance DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_toward_deductible DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_by_payer DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_by_coordination_benefits DECIMAL(19,4), -- NUMERIC
|
||||||
|
total_out_of_pocket DECIMAL(19,4), -- NUMERIC
|
||||||
|
total_paid DECIMAL(19,4), -- NUMERIC
|
||||||
|
payer_plan_period_id INTEGER
|
||||||
|
)
|
||||||
|
;
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE cost (
|
||||||
|
cost_id INTEGER,
|
||||||
|
cost_event_id INTEGER,
|
||||||
|
cost_domain_id VARCHAR(20),
|
||||||
|
cost_type_concept_id INTEGER,
|
||||||
|
currency_concept_id INTEGER,
|
||||||
|
total_charge DECIMAL(19,4), -- NUMERIC
|
||||||
|
total_cost DECIMAL(19,4), -- NUMERIC
|
||||||
|
total_paid DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_by_payer DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_by_patient DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_patient_copay DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_patient_coinsurance DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_patient_deductible DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_by_primary DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_ingredient_cost DECIMAL(19,4), -- NUMERIC
|
||||||
|
paid_dispensing_fee DECIMAL(19,4), -- NUMERIC
|
||||||
|
payer_plan_period_id INTEGER,
|
||||||
|
amount_allowed DECIMAL(19,4), -- NUMERIC
|
||||||
|
revenue_code_concept_id INTEGER,
|
||||||
|
reveue_code_source_value VARCHAR(50)
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/************************
|
||||||
|
|
||||||
|
Standardized derived elements
|
||||||
|
|
||||||
|
************************/
|
||||||
|
|
||||||
|
CREATE TABLE cohort (
|
||||||
|
cohort_definition_id INTEGER,
|
||||||
|
subject_id INTEGER,
|
||||||
|
cohort_start_date VARCHAR(8), -- DATE
|
||||||
|
cohort_end_date VARCHAR(8) -- DATE
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE cohort_attribute (
|
||||||
|
cohort_definition_id INTEGER,
|
||||||
|
cohort_start_date VARCHAR(8), -- DATE
|
||||||
|
cohort_end_date VARCHAR(8), -- DATE
|
||||||
|
subject_id INTEGER,
|
||||||
|
attribute_definition_id INTEGER,
|
||||||
|
value_as_number DOUBLE, -- NUMERIC
|
||||||
|
value_as_concept_id INTEGER
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE drug_era (
|
||||||
|
drug_era_id INTEGER,
|
||||||
|
person_id INTEGER,
|
||||||
|
drug_concept_id INTEGER,
|
||||||
|
drug_era_start_date VARCHAR(8), -- DATE
|
||||||
|
drug_era_end_date VARCHAR(8), -- DATE
|
||||||
|
drug_exposure_count INTEGER,
|
||||||
|
gap_days INTEGER
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE dose_era (
|
||||||
|
dose_era_id INTEGER,
|
||||||
|
person_id INTEGER,
|
||||||
|
drug_concept_id INTEGER,
|
||||||
|
unit_concept_id INTEGER,
|
||||||
|
dose_value DOUBLE, -- NUMERIC
|
||||||
|
dose_era_start_date VARCHAR(8), -- DATE
|
||||||
|
dose_era_end_date VARCHAR(8) -- DATE
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE condition_era (
|
||||||
|
condition_era_id INTEGER,
|
||||||
|
person_id INTEGER,
|
||||||
|
condition_concept_id INTEGER,
|
||||||
|
condition_era_start_date VARCHAR(8), -- DATE
|
||||||
|
condition_era_end_date VARCHAR(8), -- DATE
|
||||||
|
condition_occurrence_count INTEGER
|
||||||
|
)
|
||||||
|
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
|
||||||
|
TBLPROPERTIES ("skip.header.line.count"="1")
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,51 @@
|
||||||
|
Common-Data-Model / Impala
|
||||||
|
=================
|
||||||
|
|
||||||
|
This folder contains the SQL scripts for Impala.
|
||||||
|
|
||||||
|
In order to create your instantiation of the Common Data Model, we recommend following these steps:
|
||||||
|
|
||||||
|
1. Create an empty schema.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
impala-shell -q 'CREATE DATABASE omop_cdm'
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Execute the script `OMOP_CDM_ddl_Impala.sql` to create the tables and fields.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
impala-shell -d omop_cdm -f OMOP_CDM_ddl_Impala.sql
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Load your data into the schema.
|
||||||
|
|
||||||
|
a. Load the vocabulary tables.
|
||||||
|
|
||||||
|
First, download the data from
|
||||||
|
[http://www.ohdsi.org/web/athena/](http://www.ohdsi.org/web/athena/)
|
||||||
|
and unzip into a _cdmv5vocab_ directory, then run
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop fs -put cdmv5vocab cdmv5vocab
|
||||||
|
hadoop fs -chmod +w cdmv5vocab
|
||||||
|
impala-shell -d omop_cdm -f VocabImport/OMOP_CDM_vocabulary_load_Impala.sql --var=OMOP_VOCAB_PATH=/user/$USER/cdmv5vocab
|
||||||
|
```
|
||||||
|
|
||||||
|
b. Load the patient data.
|
||||||
|
|
||||||
|
For example, download the 1000 person sample of simulated CMS SynPUF patient data from
|
||||||
|
[http://www.ltscomputingllc.com/downloads/](http://www.ltscomputingllc.com/downloads/)
|
||||||
|
and unzip into a _synpuf_ directory, then run
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop fs -put synpuf synpuf
|
||||||
|
hadoop fs -chmod +w synpuf
|
||||||
|
impala-shell -d omop_cdm -f DataImport/OMOP_CDM_synpuf_load_Impala.sql --var=OMOP_SYNPUF_PATH=/user/$USER/synpuf
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Run simple queries to sanity check.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
impala-shell -d omop_cdm -q 'SELECT COUNT(1) FROM concept'
|
||||||
|
impala-shell -d omop_cdm -q 'SELECT COUNT(1) FROM person'
|
||||||
|
```
|
|
@ -0,0 +1,9 @@
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_VOCAB_PATH}/CONCEPT.csv' OVERWRITE INTO TABLE concept;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_VOCAB_PATH}/VOCABULARY.csv' OVERWRITE INTO TABLE vocabulary;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_VOCAB_PATH}/DOMAIN.csv' OVERWRITE INTO TABLE domain;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_VOCAB_PATH}/CONCEPT_CLASS.csv' OVERWRITE INTO TABLE concept_class;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_VOCAB_PATH}/CONCEPT_RELATIONSHIP.csv' OVERWRITE INTO TABLE concept_relationship;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_VOCAB_PATH}/RELATIONSHIP.csv' OVERWRITE INTO TABLE relationship;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_VOCAB_PATH}/CONCEPT_SYNONYM.csv' OVERWRITE INTO TABLE concept_synonym;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_VOCAB_PATH}/CONCEPT_ANCESTOR.csv' OVERWRITE INTO TABLE concept_ancestor;
|
||||||
|
LOAD DATA INPATH '${VAR:OMOP_VOCAB_PATH}/DRUG_STRENGTH.csv' OVERWRITE INTO TABLE drug_strength;
|
Loading…
Reference in New Issue