-- Use the search/replace regex in an editor to fix DATE columns:
-- ([^ ]+) VARCHAR\(8\), \-\- DATE
-- TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST($1 AS STRING), 1, 4), SUBSTR(CAST($1 AS STRING), 5, 2), SUBSTR(CAST($1 AS STRING), 7, 2)), 'UTC') AS $1,

CREATE TABLE omop_cdm_parquet.concept
STORED AS PARQUET
AS
SELECT
 concept_id,
 concept_name,
 domain_id,
 vocabulary_id,
 concept_class_id,
 standard_concept,
 concept_code,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_start_date AS STRING), 1, 4), SUBSTR(CAST(valid_start_date AS STRING), 5, 2), SUBSTR(CAST(valid_start_date AS STRING), 7, 2)), 'UTC') AS valid_start_date,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_end_date AS STRING), 1, 4), SUBSTR(CAST(valid_end_date AS STRING), 5, 2), SUBSTR(CAST(valid_end_date AS STRING), 7, 2)), 'UTC') AS valid_end_date,
 invalid_reason
FROM omop_cdm.concept;

CREATE TABLE omop_cdm_parquet.vocabulary
STORED AS PARQUET
AS
SELECT * from omop_cdm.vocabulary;

CREATE TABLE omop_cdm_parquet.domain
STORED AS PARQUET
AS
SELECT * from omop_cdm.domain;

CREATE TABLE omop_cdm_parquet.concept_class
STORED AS PARQUET
AS
SELECT * from omop_cdm.concept_class;

CREATE TABLE omop_cdm_parquet.concept_relationship
STORED AS PARQUET
AS
SELECT
 concept_id_1,
 concept_id_2,
 relationship_id,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_start_date AS STRING), 1, 4), SUBSTR(CAST(valid_start_date AS STRING), 5, 2), SUBSTR(CAST(valid_start_date AS STRING), 7, 2)), 'UTC') AS valid_start_date,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_end_date AS STRING), 1, 4), SUBSTR(CAST(valid_end_date AS STRING), 5, 2), SUBSTR(CAST(valid_end_date AS STRING), 7, 2)), 'UTC') AS valid_end_date,
 invalid_reason
FROM omop_cdm.concept_relationship;

CREATE TABLE omop_cdm_parquet.relationship
STORED AS PARQUET
AS
SELECT * from omop_cdm.relationship;

CREATE TABLE omop_cdm_parquet.concept_synonym
STORED AS PARQUET
AS
SELECT * from omop_cdm.concept_synonym;

CREATE TABLE omop_cdm_parquet.concept_ancestor
STORED AS PARQUET
AS
SELECT * from omop_cdm.concept_ancestor;

CREATE TABLE omop_cdm_parquet.source_to_concept_map
STORED AS PARQUET
AS
SELECT
 source_code,
 source_concept_id,
 source_vocabulary_id,
 source_code_description,
 target_concept_id,
 target_vocabulary_id,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_start_date AS STRING), 1, 4), SUBSTR(CAST(valid_start_date AS STRING), 5, 2), SUBSTR(CAST(valid_start_date AS STRING), 7, 2)), 'UTC') AS valid_start_date,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_end_date AS STRING), 1, 4), SUBSTR(CAST(valid_end_date AS STRING), 5, 2), SUBSTR(CAST(valid_end_date AS STRING), 7, 2)), 'UTC') AS valid_end_date,
 invalid_reason
FROM omop_cdm.source_to_concept_map;

CREATE TABLE omop_cdm_parquet.drug_strength
STORED AS PARQUET
AS
SELECT
 drug_concept_id,
 ingredient_concept_id,
 amount_value, -- NUMERIC
 amount_unit_concept_id,
 numerator_value, -- NUMERIC
 numerator_unit_concept_id,
 denominator_value, -- NUMERIC
 denominator_unit_concept_id,
 box_size,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_start_date AS STRING), 1, 4), SUBSTR(CAST(valid_start_date AS STRING), 5, 2), SUBSTR(CAST(valid_start_date AS STRING), 7, 2)), 'UTC') AS valid_start_date,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(valid_end_date AS STRING), 1, 4), SUBSTR(CAST(valid_end_date AS STRING), 5, 2), SUBSTR(CAST(valid_end_date AS STRING), 7, 2)), 'UTC') AS valid_end_date,
 invalid_reason
FROM omop_cdm.drug_strength;

CREATE TABLE omop_cdm_parquet.cohort_definition
STORED AS PARQUET
AS
SELECT
 cohort_definition_id,
 cohort_definition_name,
 cohort_definition_description, -- TEXT
 definition_type_concept_id,
 cohort_definition_syntax, -- TEXT
 subject_concept_id,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(cohort_initiation_date AS STRING), 1, 4), SUBSTR(CAST(cohort_initiation_date AS STRING), 5, 2), SUBSTR(CAST(cohort_initiation_date AS STRING), 7, 2)), 'UTC') AS cohort_initiation_date
FROM omop_cdm.cohort_definition;

CREATE TABLE omop_cdm_parquet.attribute_definition
STORED AS PARQUET
AS
SELECT * from omop_cdm.attribute_definition;

CREATE TABLE omop_cdm_parquet.cdm_source
STORED AS PARQUET
AS
SELECT
 cdm_source_name,
 cdm_source_abbreviation,
 cdm_holder,
 source_description, -- TEXT
 source_documentation_reference,
 cdm_etl_reference,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(source_release_date AS STRING), 1, 4), SUBSTR(CAST(source_release_date AS STRING), 5, 2), SUBSTR(CAST(source_release_date AS STRING), 7, 2)), 'UTC') AS source_release_date,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(cdm_release_date AS STRING), 1, 4), SUBSTR(CAST(cdm_release_date AS STRING), 5, 2), SUBSTR(CAST(cdm_release_date AS STRING), 7, 2)), 'UTC') AS cdm_release_date,
 cdm_version,
 vocabulary_version
FROM omop_cdm.cdm_source;

CREATE TABLE omop_cdm_parquet.person
STORED AS PARQUET
AS
SELECT * from omop_cdm.person;

CREATE TABLE omop_cdm_parquet.observation_period
STORED AS PARQUET
AS
SELECT
 observation_period_id,
 person_id,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(observation_period_start_date AS STRING), 1, 4), SUBSTR(CAST(observation_period_start_date AS STRING), 5, 2), SUBSTR(CAST(observation_period_start_date AS STRING), 7, 2)), 'UTC') AS observation_period_start_date,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(observation_period_end_date AS STRING), 1, 4), SUBSTR(CAST(observation_period_end_date AS STRING), 5, 2), SUBSTR(CAST(observation_period_end_date AS STRING), 7, 2)), 'UTC') AS observation_period_end_date,
 period_type_concept_id
FROM omop_cdm.observation_period;

CREATE TABLE omop_cdm_parquet.specimen
STORED AS PARQUET
AS
SELECT
 specimen_id,
 person_id,
 specimen_concept_id,
 specimen_type_concept_id,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(specimen_date AS STRING), 1, 4), SUBSTR(CAST(specimen_date AS STRING), 5, 2), SUBSTR(CAST(specimen_date AS STRING), 7, 2)), 'UTC') AS specimen_date,
 specimen_time,
 quantity, -- NUMERIC
 unit_concept_id,
 anatomic_site_concept_id,
 disease_status_concept_id,
 specimen_source_id,
 specimen_source_value,
 unit_source_value,
 anatomic_site_source_value,
 disease_status_source_value
FROM omop_cdm.specimen;

CREATE TABLE omop_cdm_parquet.death
STORED AS PARQUET
AS
SELECT
 person_id,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(death_date AS STRING), 1, 4), SUBSTR(CAST(death_date AS STRING), 5, 2), SUBSTR(CAST(death_date AS STRING), 7, 2)), 'UTC') AS death_date,
 death_type_concept_id,
 cause_concept_id,
 cause_source_value,
 cause_source_concept_id
FROM omop_cdm.death;

CREATE TABLE omop_cdm_parquet.visit_occurrence
STORED AS PARQUET
AS
SELECT
 visit_occurrence_id,
 person_id,
 visit_concept_id,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(visit_start_date AS STRING), 1, 4), SUBSTR(CAST(visit_start_date AS STRING), 5, 2), SUBSTR(CAST(visit_start_date AS STRING), 7, 2)), 'UTC') AS visit_start_date,
 visit_start_time,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(visit_end_date AS STRING), 1, 4), SUBSTR(CAST(visit_end_date AS STRING), 5, 2), SUBSTR(CAST(visit_end_date AS STRING), 7, 2)), 'UTC') AS visit_end_date,
 visit_end_time,
 visit_type_concept_id,
 provider_id,
 care_site_id,
 visit_source_value,
 visit_source_concept_id
FROM omop_cdm.visit_occurrence;

CREATE TABLE omop_cdm_parquet.procedure_occurrence
STORED AS PARQUET
AS
SELECT
 procedure_occurrence_id,
 person_id,
 procedure_concept_id,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(procedure_date AS STRING), 1, 4), SUBSTR(CAST(procedure_date AS STRING), 5, 2), SUBSTR(CAST(procedure_date AS STRING), 7, 2)), 'UTC') AS procedure_date,
 procedure_type_concept_id,
 modifier_concept_id,
 quantity,
 provider_id,
 visit_occurrence_id,
 procedure_source_value,
 procedure_source_concept_id,
 qualifier_source_value
FROM omop_cdm.procedure_occurrence;

CREATE TABLE omop_cdm_parquet.drug_exposure
STORED AS PARQUET
AS
SELECT
 drug_exposure_id,
 person_id,
 drug_concept_id,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(drug_exposure_start_date AS STRING), 1, 4), SUBSTR(CAST(drug_exposure_start_date AS STRING), 5, 2), SUBSTR(CAST(drug_exposure_start_date AS STRING), 7, 2)), 'UTC') AS drug_exposure_start_date,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(drug_exposure_end_date AS STRING), 1, 4), SUBSTR(CAST(drug_exposure_end_date AS STRING), 5, 2), SUBSTR(CAST(drug_exposure_end_date AS STRING), 7, 2)), 'UTC') AS drug_exposure_end_date,
 drug_type_concept_id,
 stop_reason,
 refills,
 quantity, -- NUMERIC
 days_supply,
 sig, -- TEXT
 route_concept_id,
 effective_drug_dose, -- NUMERIC
 dose_unit_concept_id,
 lot_number,
 provider_id,
 visit_occurrence_id,
 drug_source_value,
 drug_source_concept_id,
 route_source_value,
 dose_unit_source_value
FROM omop_cdm.drug_exposure;

CREATE TABLE omop_cdm_parquet.device_exposure
STORED AS PARQUET
AS
SELECT
 device_exposure_id,
 person_id,
 device_concept_id,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(device_exposure_start_date AS STRING), 1, 4), SUBSTR(CAST(device_exposure_start_date AS STRING), 5, 2), SUBSTR(CAST(device_exposure_start_date AS STRING), 7, 2)), 'UTC') AS device_exposure_start_date,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(device_exposure_end_date AS STRING), 1, 4), SUBSTR(CAST(device_exposure_end_date AS STRING), 5, 2), SUBSTR(CAST(device_exposure_end_date AS STRING), 7, 2)), 'UTC') AS device_exposure_end_date,
 device_type_concept_id,
 unique_device_id,
 quantity,
 provider_id,
 visit_occurrence_id,
 device_source_value,
 device_source_concept_id
FROM omop_cdm.device_exposure;

CREATE TABLE omop_cdm_parquet.condition_occurrence
STORED AS PARQUET
AS
SELECT
 condition_occurrence_id,
 person_id,
 condition_concept_id,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(condition_start_date AS STRING), 1, 4), SUBSTR(CAST(condition_start_date AS STRING), 5, 2), SUBSTR(CAST(condition_start_date AS STRING), 7, 2)), 'UTC') AS condition_start_date,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(condition_end_date AS STRING), 1, 4), SUBSTR(CAST(condition_end_date AS STRING), 5, 2), SUBSTR(CAST(condition_end_date AS STRING), 7, 2)), 'UTC') AS condition_end_date,
 condition_type_concept_id,
 stop_reason,
 provider_id,
 visit_occurrence_id,
 condition_source_value,
 condition_source_concept_id
FROM omop_cdm.condition_occurrence;

CREATE TABLE omop_cdm_parquet.measurement
STORED AS PARQUET
AS
SELECT
 measurement_id,
 person_id,
 measurement_concept_id,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(measurement_date AS STRING), 1, 4), SUBSTR(CAST(measurement_date AS STRING), 5, 2), SUBSTR(CAST(measurement_date AS STRING), 7, 2)), 'UTC') AS measurement_date,
 measurement_time,
 measurement_type_concept_id,
 operator_concept_id,
 value_as_number, -- NUMERIC
 value_as_concept_id,
 unit_concept_id,
 range_low, -- NUMERIC
 range_high, -- NUMERIC
 provider_id,
 visit_occurrence_id,
 measurement_source_value,
 measurement_source_concept_id,
 unit_source_value,
 value_source_value
FROM omop_cdm.measurement;

CREATE TABLE omop_cdm_parquet.note
STORED AS PARQUET
AS
SELECT
 note_id,
 person_id,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(note_date AS STRING), 1, 4), SUBSTR(CAST(note_date AS STRING), 5, 2), SUBSTR(CAST(note_date AS STRING), 7, 2)), 'UTC') AS note_date,
 note_time,
 note_type_concept_id,
 note_text, -- TEXT
 provider_id,
 visit_occurrence_id,
 note_source_value
FROM omop_cdm.note;

CREATE TABLE omop_cdm_parquet.observation
STORED AS PARQUET
AS
SELECT
 observation_id,
 person_id,
 observation_concept_id,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(observation_date AS STRING), 1, 4), SUBSTR(CAST(observation_date AS STRING), 5, 2), SUBSTR(CAST(observation_date AS STRING), 7, 2)), 'UTC') AS observation_date,
 observation_time,
 observation_type_concept_id,
 value_as_number, -- NUMERIC
 value_as_string,
 value_as_concept_id,
 qualifier_concept_id,
 unit_concept_id,
 provider_id,
 visit_occurrence_id,
 observation_source_value,
 observation_source_concept_id ,
 unit_source_value,
 qualifier_source_value
FROM omop_cdm.observation;

CREATE TABLE omop_cdm_parquet.fact_relationship
STORED AS PARQUET
AS
SELECT * from omop_cdm.fact_relationship;

CREATE TABLE omop_cdm_parquet.`location`
STORED AS PARQUET
AS
SELECT * from omop_cdm.`location`;

CREATE TABLE omop_cdm_parquet.care_site
STORED AS PARQUET
AS
SELECT * from omop_cdm.care_site;

CREATE TABLE omop_cdm_parquet.provider
STORED AS PARQUET
AS
SELECT * from omop_cdm.provider;

CREATE TABLE omop_cdm_parquet.payer_plan_period
STORED AS PARQUET
AS
SELECT
 payer_plan_period_id,
 person_id,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(payer_plan_period_start_date AS STRING), 1, 4), SUBSTR(CAST(payer_plan_period_start_date AS STRING), 5, 2), SUBSTR(CAST(payer_plan_period_start_date AS STRING), 7, 2)), 'UTC') AS payer_plan_period_start_date,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(payer_plan_period_end_date AS STRING), 1, 4), SUBSTR(CAST(payer_plan_period_end_date AS STRING), 5, 2), SUBSTR(CAST(payer_plan_period_end_date AS STRING), 7, 2)), 'UTC') AS payer_plan_period_end_date,
 payer_source_value,
 plan_source_value,
 family_source_value
FROM omop_cdm.payer_plan_period;


/* The individual cost tables are being phased out and will disappear soon

CREATE TABLE omop_cdm_parquet.visit_cost
STORED AS PARQUET
AS
SELECT * from omop_cdm.visit_cost;

CREATE TABLE omop_cdm_parquet.procedure_cost
STORED AS PARQUET
AS
SELECT * from omop_cdm.procedure_cost;

CREATE TABLE omop_cdm_parquet.drug_cost
STORED AS PARQUET
AS
SELECT * from omop_cdm.drug_cost;

CREATE TABLE omop_cdm_parquet.device_cost
STORED AS PARQUET
AS
SELECT * from omop_cdm.device_cost;
*/

CREATE TABLE omop_cdm_parquet.cost
STORED AS PARQUET
AS
SELECT * from omop_cdm.cost;

CREATE TABLE omop_cdm_parquet.cohort
STORED AS PARQUET
AS
SELECT
 cohort_definition_id,
 subject_id,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(cohort_start_date AS STRING), 1, 4), SUBSTR(CAST(cohort_start_date AS STRING), 5, 2), SUBSTR(CAST(cohort_start_date AS STRING), 7, 2)), 'UTC') AS cohort_start_date,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(cohort_end_date AS STRING), 1, 4), SUBSTR(CAST(cohort_end_date AS STRING), 5, 2), SUBSTR(CAST(cohort_end_date AS STRING), 7, 2)), 'UTC') AS cohort_end_date
FROM omop_cdm.cohort;

CREATE TABLE omop_cdm_parquet.cohort_attribute
STORED AS PARQUET
AS
SELECT
 cohort_definition_id,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(cohort_start_date AS STRING), 1, 4), SUBSTR(CAST(cohort_start_date AS STRING), 5, 2), SUBSTR(CAST(cohort_start_date AS STRING), 7, 2)), 'UTC') AS cohort_start_date,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(cohort_end_date AS STRING), 1, 4), SUBSTR(CAST(cohort_end_date AS STRING), 5, 2), SUBSTR(CAST(cohort_end_date AS STRING), 7, 2)), 'UTC') AS cohort_end_date,
 subject_id,
 attribute_definition_id,
 value_as_number, -- NUMERIC
 value_as_concept_id
FROM omop_cdm.cohort_attribute;

CREATE TABLE omop_cdm_parquet.drug_era
STORED AS PARQUET
AS
SELECT
 drug_era_id,
 person_id,
 drug_concept_id,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(drug_era_start_date AS STRING), 1, 4), SUBSTR(CAST(drug_era_start_date AS STRING), 5, 2), SUBSTR(CAST(drug_era_start_date AS STRING), 7, 2)), 'UTC') AS drug_era_start_date,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(drug_era_end_date AS STRING), 1, 4), SUBSTR(CAST(drug_era_end_date AS STRING), 5, 2), SUBSTR(CAST(drug_era_end_date AS STRING), 7, 2)), 'UTC') AS drug_era_end_date,
 drug_exposure_count,
 gap_days
FROM omop_cdm.drug_era;

CREATE TABLE omop_cdm_parquet.dose_era
STORED AS PARQUET
AS
SELECT
 dose_era_id,
 person_id,
 drug_concept_id,
 unit_concept_id,
 dose_value, -- NUMERIC
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(dose_era_start_date AS STRING), 1, 4), SUBSTR(CAST(dose_era_start_date AS STRING), 5, 2), SUBSTR(CAST(dose_era_start_date AS STRING), 7, 2)), 'UTC') AS dose_era_start_date,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(dose_era_end_date AS STRING), 1, 4), SUBSTR(CAST(dose_era_end_date AS STRING), 5, 2), SUBSTR(CAST(dose_era_end_date AS STRING), 7, 2)), 'UTC') AS dose_era_end_date
FROM omop_cdm.dose_era;

CREATE TABLE omop_cdm_parquet.condition_era
STORED AS PARQUET
AS
SELECT
 condition_era_id,
 person_id,
 condition_concept_id,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(condition_era_start_date AS STRING), 1, 4), SUBSTR(CAST(condition_era_start_date AS STRING), 5, 2), SUBSTR(CAST(condition_era_start_date AS STRING), 7, 2)), 'UTC') AS condition_era_start_date,
 TO_UTC_TIMESTAMP(CONCAT_WS('-', SUBSTR(CAST(condition_era_end_date AS STRING), 1, 4), SUBSTR(CAST(condition_era_end_date AS STRING), 5, 2), SUBSTR(CAST(condition_era_end_date AS STRING), 7, 2)), 'UTC') AS condition_era_end_date,
 condition_occurrence_count
FROM omop_cdm.condition_era;