diff --git a/DESCRIPTION b/DESCRIPTION index 9e8b93e..5b35522 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,5 +24,5 @@ Suggests: RSQLite, withr NeedsCompilation: no -RoxygenNote: 7.1.1 +RoxygenNote: 7.1.2 Config/testthat/edition: 3 diff --git a/docs/background.html b/docs/background.html index dd2e97f..466ce36 100644 --- a/docs/background.html +++ b/docs/background.html @@ -13,7 +13,7 @@ background.knit - + @@ -63,6 +63,7 @@ if (window.hljs) { + @@ -88,6 +89,9 @@ button.code-folding-btn:focus { summary { display: list-item; } +details > summary > p:only-child { + display: inline; +} pre code { padding: 0; } @@ -312,7 +316,7 @@ div.tocify {
@@ -8545,499 +740,6 @@ CONCEPT

The PAYER_PLAN_PERIOD table captures details of the period of time that a Person is continuously enrolled under a specific health Plan benefit structure from a given Payer. Each Person receiving healthcare is typically covered by a health benefit plan, which pays for (fully or partially), or directly provides, the care. These benefit plans are provided by payers, such as health insurances or state or government agencies. In each plan the details of the health benefits are defined for the Person or her family, and the health benefit Plan might change over time typically with increasing utilization (reaching certain cost thresholds such as deductibles), plan availability and purchasing choices of the Person. The unique combinations of Payer organizations, health benefit Plans and time periods in which they are valid for a Person are recorded in this table.

User Guide

A Person can have multiple, overlapping, Payer_Plan_Periods in this table. For example, medical and drug coverage in the US can be represented by two Payer_Plan_Periods. The details of the benefit structure of the Plan is rarely known, the idea is just to identify that the Plans are different.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-payer_plan_period_id - -A unique identifier for each unique combination of a Person, Payer, Plan, and Period of time. - - -integer - -Yes - -Yes - -Yes - -PERSON - -
-person_id - -The Person covered by the Plan. - -A single Person can have multiple, overlapping, PAYER_PLAN_PERIOD records - -integer - -Yes - -No - -Yes - -PERSON - -
-payer_plan_period_start_date - -Start date of Plan coverage. - - -date - -Yes - -No - -No - - -
-payer_plan_period_end_date - -End date of Plan coverage. - - -date - -Yes - -No - -No - - -
-payer_concept_id - -This field represents the organization who reimburses the provider which administers care to the Person. - -Map the Payer directly to a standard CONCEPT_ID. If one does not exists please contact the vocabulary team. There is no global controlled vocabulary available for this information. The point is to stratify on this information and identify if Persons have the same payer, though the name of the Payer is not necessary. Accepted Concepts. - -integer - -No - -No - -Yes - -CONCEPT - -
-payer_source_value - -This is the Payer as it appears in the source data. - - -varchar(50) - -No - -No - -No - - -
-payer_source_concept_id - - -If the source data codes the Payer in an OMOP supported vocabulary store the concept_id here. - -integer - -No - -No - -Yes - -CONCEPT - -
-plan_concept_id - -This field represents the specific health benefit Plan the Person is enrolled in. - -Map the Plan directly to a standard CONCEPT_ID. If one does not exists please contact the vocabulary team. There is no global controlled vocabulary available for this information. The point is to stratify on this information and identify if Persons have the same health benefit Plan though the name of the Plan is not necessary. Accepted Concepts. - -integer - -No - -No - -Yes - -CONCEPT - -
-plan_source_value - -This is the health benefit Plan of the Person as it appears in the source data. - - -varchar(50) - -No - -No - -No - - -
-plan_source_concept_id - - -If the source data codes the Plan in an OMOP supported vocabulary store the concept_id here. - -integer - -No - -No - -Yes - -CONCEPT - -
-sponsor_concept_id - -This field represents the sponsor of the Plan who finances the Plan. This includes self-insured, small group health plan and large group health plan. - -Map the sponsor directly to a standard CONCEPT_ID. If one does not exists please contact the vocabulary team. There is no global controlled vocabulary available for this information. The point is to stratify on this information and identify if Persons have the same sponsor though the name of the sponsor is not necessary. Accepted Concepts. - -integer - -No - -No - -Yes - -CONCEPT - -
-sponsor_source_value - -The Plan sponsor as it appears in the source data. - - -varchar(50) - -No - -No - -No - - -
-sponsor_source_concept_id - - -If the source data codes the sponsor in an OMOP supported vocabulary store the concept_id here. - -integer - -No - -No - -Yes - -CONCEPT - -
-family_source_value - -The common identifier for all people (often a family) that covered by the same policy. - -Often these are the common digits of the enrollment id of the policy members. - -varchar(50) - -No - -No - -No - - -
-stop_reason_concept_id - -This field represents the reason the Person left the Plan, if known. - -Map the stop reason directly to a standard CONCEPT_ID. If one does not exists please contact the vocabulary team. There is no global controlled vocabulary available for this information. Accepted Concepts. - -integer - -No - -No - -Yes - -CONCEPT - -
-stop_reason_source_value - -The Plan stop reason as it appears in the source data. - - -varchar(50) - -No - -No - -No - - -
-stop_reason_source_concept_id - - -If the source data codes the stop reason in an OMOP supported vocabulary store the concept_id here. - -integer - -No - -No - -Yes - -CONCEPT - -

COST

@@ -9048,598 +750,6 @@ CONCEPT

When dealing with summary costs, the cost of the goods or services the provider provides is often not known directly, but derived from the hospital charges multiplied by an average cost-to-charge ratio.

ETL Conventions

One cost record is generated for each response by a payer. In a claims databases, the payment and payment terms reported by the payer for the goods or services billed will generate one cost record. If the source data has payment information for more than one payer (i.e. primary insurance and secondary insurance payment for one entity), then a cost record is created for each reporting payer. Therefore, it is possible for one procedure to have multiple cost records for each payer, but typically it contains one or no record per entity. Payer reimbursement cost records will be identified by using the PAYER_PLAN_ID field. Drug costs are composed of ingredient cost (the amount charged by the wholesale distributor or manufacturer), the dispensing fee (the amount charged by the pharmacy and the sales tax).

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-cost_id - - - -integer - -Yes - -Yes - -No - - -
-cost_event_id - - - -integer - -Yes - -No - -No - - -
-cost_domain_id - - - -varchar(20) - -Yes - -No - -Yes - -DOMAIN - -
-cost_type_concept_id - - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-currency_concept_id - - - -integer - -No - -No - -Yes - -CONCEPT - -
-total_charge - - - -float - -No - -No - -No - - -
-total_cost - - - -float - -No - -No - -No - - -
-total_paid - - - -float - -No - -No - -No - - -
-paid_by_payer - - - -float - -No - -No - -No - - -
-paid_by_patient - - - -float - -No - -No - -No - - -
-paid_patient_copay - - - -float - -No - -No - -No - - -
-paid_patient_coinsurance - - - -float - -No - -No - -No - - -
-paid_patient_deductible - - - -float - -No - -No - -No - - -
-paid_by_primary - - - -float - -No - -No - -No - - -
-paid_ingredient_cost - - - -float - -No - -No - -No - - -
-paid_dispensing_fee - - - -float - -No - -No - -No - - -
-payer_plan_period_id - - - -integer - -No - -No - -No - - -
-amount_allowed - - - -float - -No - -No - -No - - -
-revenue_code_concept_id - - - -integer - -No - -No - -Yes - -CONCEPT - -
-revenue_code_source_value - -Revenue codes are a method to charge for a class of procedures and conditions in the U.S. hospital system. - - -varchar(50) - -No - -No - -No - - -
-drg_concept_id - - - -integer - -No - -No - -Yes - -CONCEPT - -
-drg_source_value - -Diagnosis Related Groups are US codes used to classify hospital cases into one of approximately 500 groups. - - -varchar(3) - -No - -No - -No - - -
@@ -9650,223 +760,6 @@ No

A Drug Era is defined as a span of time when the Person is assumed to be exposed to a particular active ingredient. A Drug Era is not the same as a Drug Exposure: Exposures are individual records corresponding to the source when Drug was delivered to the Person, while successive periods of Drug Exposures are combined under certain rules to produce continuous Drug Eras.

ETL Conventions

The SQL script for generating DRUG_ERA records can be found here.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-drug_era_id - - - -integer - -Yes - -Yes - -No - - -
-person_id - - - -integer - -Yes - -No - -Yes - -PERSON - -
-drug_concept_id - -The Concept Id representing the specific drug ingredient. - - -integer - -Yes - -No - -Yes - -CONCEPT - -Drug -
-drug_era_start_date - - -The Drug Era Start Date is the start date of the first Drug Exposure for a given ingredient, with at least 31 days since the previous exposure. - -date - -Yes - -No - -No - - -
-drug_era_end_date - - -The Drug Era End Date is the end date of the last Drug Exposure. The End Date of each Drug Exposure is either taken from the field drug_exposure_end_date or, as it is typically not available, inferred using the following rules: For pharmacy prescription data, the date when the drug was dispensed plus the number of days of supply are used to extrapolate the End Date for the Drug Exposure. Depending on the country-specific healthcare system, this supply information is either explicitly provided in the day_supply field or inferred from package size or similar information. For Procedure Drugs, usually the drug is administered on a single date (i.e., the administration date). A standard Persistence Window of 30 days (gap, slack) is permitted between two subsequent such extrapolated DRUG_EXPOSURE records to be considered to be merged into a single Drug Era. - -date - -Yes - -No - -No - - -
-drug_exposure_count - - - -integer - -No - -No - -No - - -
-gap_days - - -The Gap Days determine how many total drug-free days are observed between all Drug Exposure events that contribute to a DRUG_ERA record. It is assumed that the drugs are “not stockpiled” by the patient, i.e. that if a new drug prescription or refill is observed (a new DRUG_EXPOSURE record is written), the remaining supply from the previous events is abandoned. The difference between Persistence Window and Gap Days is that the former is the maximum drug-free time allowed between two subsequent DRUG_EXPOSURE records, while the latter is the sum of actual drug-free days for the given Drug Era under the above assumption of non-stockpiling. - -integer - -No - -No - -No - - -

DOSE_ERA

@@ -9874,226 +767,6 @@ No

A Dose Era is defined as a span of time when the Person is assumed to be exposed to a constant dose of a specific active ingredient.

ETL Conventions

Dose Eras will be derived from records in the DRUG_EXPOSURE table and the Dose information from the DRUG_STRENGTH table using a standardized algorithm. Dose Form information is not taken into account. So, if the patient changes between different formulations, or different manufacturers with the same formulation, the Dose Era is still spanning the entire time of exposure to the Ingredient.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-dose_era_id - - - -integer - -Yes - -Yes - -No - - -
-person_id - - - -integer - -Yes - -No - -Yes - -PERSON - -
-drug_concept_id - -The Concept Id representing the specific drug ingredient. - - -integer - -Yes - -No - -Yes - -CONCEPT - -Drug -
-unit_concept_id - -The Concept Id representing the unit of the specific drug ingredient. - - -integer - -Yes - -No - -Yes - -CONCEPT - -Unit -
-dose_value - -The numeric value of the dosage of the drug_ingredient. - - -float - -Yes - -No - -No - - -
-dose_era_start_date - -The date the Person started on the specific dosage, with at least 31 days since any prior exposure. - - -date - -Yes - -No - -No - - -
-dose_era_end_date - - -The date the Person was no longer exposed to the dosage of the specific drug ingredient. An era is ended if there are 31 days or more between dosage records. - -date - -Yes - -No - -No - - -

CONDITION_ERA

@@ -10105,198 +778,6 @@ No

ETL Conventions

Each Condition Era corresponds to one or many Condition Occurrence records that form a continuous interval. The condition_concept_id field contains Concepts that are identical to those of the CONDITION_OCCURRENCE table records that make up the Condition Era. In contrast to Drug Eras, Condition Eras are not aggregated to contain Conditions of different hierarchical layers. The SQl Script for generating CONDITION_ERA records can be found here The Condition Era Start Date is the start date of the first Condition Occurrence. The Condition Era End Date is the end date of the last Condition Occurrence. Condition Eras are built with a Persistence Window of 30 days, meaning, if no occurrence of the same condition_concept_id happens within 30 days of any one occurrence, it will be considered the condition_era_end_date.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-condition_era_id - - - -integer - -Yes - -Yes - -No - - -
-person_id - - - -integer - -Yes - -No - -Yes - -PERSON - -
-condition_concept_id - -The Concept Id representing the Condition. - - -integer - -Yes - -No - -Yes - -CONCEPT - -Condition -
-condition_era_start_date - -The start date for the Condition Era constructed from the individual instances of Condition Occurrences. It is the start date of the very first chronologically recorded instance of the condition with at least 31 days since any prior record of the same Condition. - - -date - -Yes - -No - -No - - -
-condition_era_end_date - -The end date for the Condition Era constructed from the individual instances of Condition Occurrences. It is the end date of the final continuously recorded instance of the Condition. - - -date - -Yes - -No - -No - - -
-condition_occurrence_count - -The number of individual Condition Occurrences used to construct the condition era. - - -integer - -No - -No - -No - - -

EPISODE

@@ -10304,395 +785,6 @@ No

The EPISODE table aggregates lower-level clinical events (VISIT_OCCURRENCE, DRUG_EXPOSURE, PROCEDURE_OCCURRENCE, DEVICE_EXPOSURE) into a higher-level abstraction representing clinically and analytically relevant disease phases,outcomes and treatments. The EPISODE_EVENT table connects qualifying clinical events (VISIT_OCCURRENCE, DRUG_EXPOSURE, PROCEDURE_OCCURRENCE, DEVICE_EXPOSURE) to the appropriate EPISODE entry. For example cancers including their development over time, their treatment, and final resolution.

User Guide

Valid Episode Concepts belong to the ‘Episode’ domain. For cancer episodes please see [article], for non-cancer episodes please see [article]. If your source data does not have all episodes that are relevant to the therapeutic area, write only those you can easily derive from the data. It is understood that that table is not currently expected to be comprehensive.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-episode_id - -A unique identifier for each Episode. - - -integer - -Yes - -Yes - -No - - -
-person_id - -The PERSON_ID of the PERSON for whom the episode is recorded. - - -integer - -Yes - -No - -Yes - -PERSON - -
-episode_concept_id - -The EPISODE_CONCEPT_ID represents the kind abstraction related to the disease phase, outcome or treatment. - -Choose a concept in the Episode domain that best represents the ongoing disease phase, outcome, or treatment. Please see [article] for cancers and [article] for non-cancers describing how these are defined. Accepted Concepts - -integer - -Yes - -No - -Yes - -CONCEPT - -Episode -
-episode_start_date - -The date when the Episode beings. - -Please see [article] for how to define an Episode start date. - -date - -Yes - -No - -No - - -
-episode_start_datetime - -The date and time when the Episode begins. - - -datetime - -No - -No - -No - - -
-episode_end_date - -The date when the instance of the Episode is considered to have ended. - -Please see [article] for how to define an Episode end date. - -date - -No - -No - -No - - -
-episode_end_datetime - -The date when the instance of the Episode is considered to have ended. - - -datetime - -No - -No - -No - - -
-episode_parent_id - -Use this field to find the Episode that subsumes the given Episode record. This is used in the case that an Episode are nested into each other. - -If there are multiple nested levels to how Episodes are represented, the EPISODE_PARENT_ID can be used to record this relationship. - -integer - -No - -No - -No - - -
-episode_number - -For sequences of episodes, this is used to indicate the order the episodes occurred. For example, lines of treatment could be indicated here. - -Please see [article] for the details of how to count episodes. - -integer - -No - -No - -No - - -
-episode_object_concept_id - -A Standard Concept representing the disease phase, outcome, or other abstraction of which the episode consists. For example, if the EPISODE_CONCEPT_ID is treatment regimen then the EPISODE_OBJECT_CONCEPT_ID should contain the chemotherapy regimen concept, like Afatinib monotherapy. - -Episode entries from the ‘Disease Episode’ concept class should have an episode_object_concept_id that comes from the Condition domain. Episode entries from the ‘Treatment Episode’ concept class should have an episode_object_concept_id that scome from the ‘Procedure’ domain or ‘Regimen’ concept class. - -integer - -Yes - -No - -Yes - -CONCEPT - -Procedure, Regimen -
-episode_type_concept_id - -This field can be used to determine the provenance of the Episode record, as in whether the episode was from an EHR system, insurance claim, registry, or other sources. - -Choose the EPISODE_TYPE_CONCEPT_ID that best represents the provenance of the record. Accepted Concepts. A more detailed explanation of each Type Concept can be found on the vocabulary wiki. - -integer - -Yes - -No - -Yes - -CONCEPT - -Type Concept -
-episode_source_value - -The source code for the Episdoe as it appears in the source data. This code is mapped to a Standard Condition Concept in the Standardized Vocabularies and the original code is stored here for reference. - - -varchar(50) - -No - -No - -No - - -
-episode_source_concept_id - -A foreign key to a Episode Concept that refers to the code used in the source. - -Given that the Episodes are user-defined it is unlikely that there will be a Source Concept available. If that is the case then set this field to zero. - -integer - -No - -No - -Yes - -CONCEPT - -

EPISODE_EVENT

@@ -10702,125 +794,6 @@ CONCEPT

This connecting table is used instead of the FACT_RELATIONSHIP table for linking low-level events to abstracted Episodes.

ETL Conventions

Some episodes may not have links to any underlying clinical events. For such episodes, the EPISODE_EVENT table is not populated.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-episode_id - -Use this field to link the EPISODE_EVENT record to its EPISODE. - -Put the EPISODE_ID that subsumes the EPISODE_EVENT record here. - -integer - -Yes - -No - -Yes - -EPISODE - -
-event_id - -This field is the primary key of the linked record in the database. For example, if the Episode Event is a Condition Occurrence, then the CONDITION_OCCURRENCE_ID of the linked record goes in this field. - -Put the primary key of the linked record here. - -integer - -Yes - -No - -No - - -
-episode_event_field_concept_id - -This field is the CONCEPT_ID that identifies which table the primary key of the linked record came from. - -Put the CONCEPT_ID that identifies which table and field the EVENT_ID came from. Accepted Concepts - -integer - -Yes - -No - -Yes - -CONCEPT - -Metadata -
@@ -10829,598 +802,11 @@ Metadata

METADATA

Table Description

The METADATA table contains metadata information about a dataset that has been transformed to the OMOP Common Data Model.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-metadata_id - -The unique key given to a Metadata record. - -Attribute value is auto-generated - -integer - -Yes - -Yes - -No - - -
-metadata_concept_id - - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-metadata_type_concept_id - - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-name - - - -varchar(250) - -Yes - -No - -No - - -
-value_as_string - - - -varchar(250) - -No - -No - -No - - -
-value_as_concept_id - - - -integer - -No - -No - -Yes - -CONCEPT - -
-value_as_number - -This is the numerical value of the result of the Metadata, if applicable and available. It is not expected that all Metadata will have numeric results, rather, this field is here to house values should they exist. - - -float - -No - -No - -No - - -
-metadata_date - - - -date - -No - -No - -No - - -
-metadata_datetime - - - -datetime - -No - -No - -No - - -

CDM_SOURCE

Table Description

The CDM_SOURCE table contains detail about the source database and the process used to transform the data into the OMOP Common Data Model.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-cdm_source_name - -The name of the CDM instance. - - -varchar(255) - -Yes - -No - -No - - -
-cdm_source_abbreviation - -The abbreviation of the CDM instance. - - -varchar(25) - -Yes - -No - -No - - -
-cdm_holder - -The holder of the CDM instance. - - -varchar(255) - -Yes - -No - -No - - -
-source_description - -The description of the CDM instance. - - -varchar(MAX) - -No - -No - -No - - -
-source_documentation_reference - - - -varchar(255) - -No - -No - -No - - -
-cdm_etl_reference - - -Put the link to the CDM version used. - -varchar(255) - -No - -No - -No - - -
-source_release_date - -The release date of the source data. - - -date - -Yes - -No - -No - - -
-cdm_release_date - -The release data of the CDM instance. - - -date - -Yes - -No - -No - - -
-cdm_version - - - -varchar(10) - -No - -No - -No - - -
-cdm_version_concept_id - -The Concept Id representing the version of the CDM. - -You can find all concepts that represent the CDM versions using the query: SELECT * FROM CONCEPT WHERE VOCABULARY_ID = ‘CDM’ AND CONCEPT_CLASS = ‘CDM’ - -integer - -Yes - -No - -Yes - -CONCEPT - -
-vocabulary_version - - -You can find the version of your Vocabulary using the query: SELECT vocabulary_version from vocabulary where vocabulary_id = ‘None’ - -varchar(20) - -Yes - -No - -No - - -
@@ -11431,2006 +817,52 @@ No

The Standardized Vocabularies contains records, or Concepts, that uniquely identify each fundamental unit of meaning used to express clinical information in all domain tables of the CDM. Concepts are derived from vocabularies, which represent clinical information across a domain (e.g. conditions, drugs, procedures) through the use of codes and associated descriptions. Some Concepts are designated Standard Concepts, meaning these Concepts can be used as normative expressions of a clinical entity within the OMOP Common Data Model and within standardized analytics. Each Standard Concept belongs to one domain, which defines the location where the Concept would be expected to occur within data tables of the CDM.

Concepts can represent broad categories (like ‘Cardiovascular disease’), detailed clinical elements (‘Myocardial infarction of the anterolateral wall’) or modifying characteristics and attributes that define Concepts at various levels of detail (severity of a disease, associated morphology, etc.).

Records in the Standardized Vocabularies tables are derived from national or international vocabularies such as SNOMED-CT, RxNorm, and LOINC, or custom Concepts defined to cover various aspects of observational data analysis.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-concept_id - -A unique identifier for each Concept across all domains. - - -integer - -Yes - -Yes - -No - - -
-concept_name - -An unambiguous, meaningful and descriptive name for the Concept. - - -varchar(255) - -Yes - -No - -No - - -
-domain_id - -A foreign key to the DOMAIN table the Concept belongs to. - - -varchar(20) - -Yes - -No - -Yes - -DOMAIN - -
-vocabulary_id - -A foreign key to the VOCABULARY table indicating from which source the Concept has been adapted. - - -varchar(20) - -Yes - -No - -Yes - -VOCABULARY - -
-concept_class_id - -The attribute or concept class of the Concept. Examples are ‘Clinical Drug’, ‘Ingredient’, ‘Clinical Finding’ etc. - - -varchar(20) - -Yes - -No - -Yes - -CONCEPT_CLASS - -
-standard_concept - -This flag determines where a Concept is a Standard Concept, i.e. is used in the data, a Classification Concept, or a non-standard Source Concept. The allowable values are ‘S’ (Standard Concept) and ‘C’ (Classification Concept), otherwise the content is NULL. - - -varchar(1) - -No - -No - -No - - -
-concept_code - -The concept code represents the identifier of the Concept in the source vocabulary, such as SNOMED-CT concept IDs, RxNorm RXCUIs etc. Note that concept codes are not unique across vocabularies. - - -varchar(50) - -Yes - -No - -No - - -
-valid_start_date - -The date when the Concept was first recorded. The default value is 1-Jan-1970, meaning, the Concept has no (known) date of inception. - - -date - -Yes - -No - -No - - -
-valid_end_date - -The date when the Concept became invalid because it was deleted or superseded (updated) by a new concept. The default value is 31-Dec-2099, meaning, the Concept is valid until it becomes deprecated. - - -date - -Yes - -No - -No - - -
-invalid_reason - -Reason the Concept was invalidated. Possible values are D (deleted), U (replaced with an update) or NULL when valid_end_date has the default value. - - -varchar(1) - -No - -No - -No - - -

VOCABULARY

Table Description

The VOCABULARY table includes a list of the Vocabularies collected from various sources or created de novo by the OMOP community. This reference table is populated with a single record for each Vocabulary source and includes a descriptive name and other associated attributes for the Vocabulary.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-vocabulary_id - -A unique identifier for each Vocabulary, such as ICD9CM, SNOMED, Visit. - - -varchar(20) - -Yes - -Yes - -No - - -
-vocabulary_name - -The name describing the vocabulary, for example, International Classification of Diseases, Ninth Revision, Clinical Modification, Volume 1 and 2 (NCHS) etc. - - -varchar(255) - -Yes - -No - -No - - -
-vocabulary_reference - -External reference to documentation or available download of the about the vocabulary. - - -varchar(255) - -No - -No - -No - - -
-vocabulary_version - -Version of the Vocabulary as indicated in the source. - - -varchar(255) - -No - -No - -No - - -
-vocabulary_concept_id - -A Concept that represents the Vocabulary the VOCABULARY record belongs to. - - -integer - -Yes - -No - -Yes - -CONCEPT - -

DOMAIN

Table Description

The DOMAIN table includes a list of OMOP-defined Domains the Concepts of the Standardized Vocabularies can belong to. A Domain defines the set of allowable Concepts for the standardized fields in the CDM tables. For example, the “Condition” Domain contains Concepts that describe a condition of a patient, and these Concepts can only be stored in the condition_concept_id field of the CONDITION_OCCURRENCE and CONDITION_ERA tables. This reference table is populated with a single record for each Domain and includes a descriptive name for the Domain.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-domain_id - -A unique key for each domain. - - -varchar(20) - -Yes - -Yes - -No - - -
-domain_name - -The name describing the Domain, e.g. Condition, Procedure, Measurement etc. - - -varchar(255) - -Yes - -No - -No - - -
-domain_concept_id - -A Concept representing the Domain Concept the DOMAIN record belongs to. - - -integer - -Yes - -No - -Yes - -CONCEPT - -

CONCEPT_CLASS

Table Description

The CONCEPT_CLASS table is a reference table, which includes a list of the classifications used to differentiate Concepts within a given Vocabulary. This reference table is populated with a single record for each Concept Class.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-concept_class_id - -A unique key for each class. - - -varchar(20) - -Yes - -Yes - -No - - -
-concept_class_name - -The name describing the Concept Class, e.g. Clinical Finding, Ingredient, etc. - - -varchar(255) - -Yes - -No - -No - - -
-concept_class_concept_id - -A Concept that represents the Concept Class. - - -integer - -Yes - -No - -Yes - -CONCEPT - -

CONCEPT_RELATIONSHIP

Table Description

The CONCEPT_RELATIONSHIP table contains records that define direct relationships between any two Concepts and the nature or type of the relationship. Each type of a relationship is defined in the RELATIONSHIP table.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-concept_id_1 - - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-concept_id_2 - - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-relationship_id - -The relationship between CONCEPT_ID_1 and CONCEPT_ID_2. Please see the Vocabulary Conventions. for more information. - - -varchar(20) - -Yes - -No - -Yes - -RELATIONSHIP - -
-valid_start_date - -The date when the relationship is first recorded. - - -date - -Yes - -No - -No - - -
-valid_end_date - -The date when the relationship is invalidated. - - -date - -Yes - -No - -No - - -
-invalid_reason - -Reason the relationship was invalidated. Possible values are ‘D’ (deleted), ‘U’ (updated) or NULL. - - -varchar(1) - -No - -No - -No - - -

RELATIONSHIP

Table Description

The RELATIONSHIP table provides a reference list of all types of relationships that can be used to associate any two concepts in the CONCEPT_RELATIONSHP table.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-relationship_id - -The type of relationship captured by the relationship record. - - -varchar(20) - -Yes - -Yes - -No - - -
-relationship_name - - - -varchar(255) - -Yes - -No - -No - - -
-is_hierarchical - -Defines whether a relationship defines concepts into classes or hierarchies. Values are 1 for hierarchical relationship or 0 if not. - - -varchar(1) - -Yes - -No - -No - - -
-defines_ancestry - -Defines whether a hierarchical relationship contributes to the concept_ancestor table. These are subsets of the hierarchical relationships. Valid values are 1 or 0. - - -varchar(1) - -Yes - -No - -No - - -
-reverse_relationship_id - -The identifier for the relationship used to define the reverse relationship between two concepts. - - -varchar(20) - -Yes - -No - -No - - -
-relationship_concept_id - -A foreign key that refers to an identifier in the CONCEPT table for the unique relationship concept. - - -integer - -Yes - -No - -Yes - -CONCEPT - -

CONCEPT_SYNONYM

Table Description

The CONCEPT_SYNONYM table is used to store alternate names and descriptions for Concepts.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-concept_id - - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-concept_synonym_name - - - -varchar(1000) - -Yes - -No - -No - - -
-language_concept_id - - - -integer - -Yes - -No - -Yes - -CONCEPT - -

CONCEPT_ANCESTOR

Table Description

The CONCEPT_ANCESTOR table is designed to simplify observational analysis by providing the complete hierarchical relationships between Concepts. Only direct parent-child relationships between Concepts are stored in the CONCEPT_RELATIONSHIP table. To determine higher level ancestry connections, all individual direct relationships would have to be navigated at analysis time. The CONCEPT_ANCESTOR table includes records for all parent-child relationships, as well as grandparent-grandchild relationships and those of any other level of lineage. Using the CONCEPT_ANCESTOR table allows for querying for all descendants of a hierarchical concept. For example, drug ingredients and drug products are all descendants of a drug class ancestor.

This table is entirely derived from the CONCEPT, CONCEPT_RELATIONSHIP and RELATIONSHIP tables.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-ancestor_concept_id - -The Concept Id for the higher-level concept that forms the ancestor in the relationship. - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-descendant_concept_id - -The Concept Id for the lower-level concept that forms the descendant in the relationship. - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-min_levels_of_separation - -The minimum separation in number of levels of hierarchy between ancestor and descendant concepts. This is an attribute that is used to simplify hierarchic analysis. - - -integer - -Yes - -No - -No - - -
-max_levels_of_separation - -The maximum separation in number of levels of hierarchy between ancestor and descendant concepts. This is an attribute that is used to simplify hierarchic analysis. - - -integer - -Yes - -No - -No - - -

SOURCE_TO_CONCEPT_MAP

Table Description

The source to concept map table is a legacy data structure within the OMOP Common Data Model, recommended for use in ETL processes to maintain local source codes which are not available as Concepts in the Standardized Vocabularies, and to establish mappings for each source code into a Standard Concept as target_concept_ids that can be used to populate the Common Data Model tables. The SOURCE_TO_CONCEPT_MAP table is no longer populated with content within the Standardized Vocabularies published to the OMOP community.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-source_code - -The source code being translated into a Standard Concept. - - -varchar(50) - -Yes - -No - -No - - -
-source_concept_id - -A foreign key to the Source Concept that is being translated into a Standard Concept. - -This is either 0 or should be a number above 2 billion, which are the Concepts reserved for site-specific codes and mappings. - -integer - -Yes - -No - -Yes - -CONCEPT - -
-source_vocabulary_id - -A foreign key to the VOCABULARY table defining the vocabulary of the source code that is being translated to a Standard Concept. - - -varchar(20) - -Yes - -No - -No - - -
-source_code_description - -An optional description for the source code. This is included as a convenience to compare the description of the source code to the name of the concept. - - -varchar(255) - -No - -No - -No - - -
-target_concept_id - -The target Concept to which the source code is being mapped. - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-target_vocabulary_id - -The Vocabulary of the target Concept. - - -varchar(20) - -Yes - -No - -Yes - -VOCABULARY - -
-valid_start_date - -The date when the mapping instance was first recorded. - - -date - -Yes - -No - -No - - -
-valid_end_date - -The date when the mapping instance became invalid because it was deleted or superseded (updated) by a new relationship. Default value is 31-Dec-2099. - - -date - -Yes - -No - -No - - -
-invalid_reason - -Reason the mapping instance was invalidated. Possible values are D (deleted), U (replaced with an update) or NULL when valid_end_date has the default value. - - -varchar(1) - -No - -No - -No - - -

DRUG_STRENGTH

Table Description

The DRUG_STRENGTH table contains structured content about the amount or concentration and associated units of a specific ingredient contained within a particular drug product. This table is supplemental information to support standardized analysis of drug utilization.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-drug_concept_id - -The Concept representing the Branded Drug or Clinical Drug Product. - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-ingredient_concept_id - -The Concept representing the active ingredient contained within the drug product. - -Combination Drugs will have more than one record in this table, one for each active Ingredient. - -integer - -Yes - -No - -Yes - -CONCEPT - -
-amount_value - -The numeric value or the amount of active ingredient contained within the drug product. - - -float - -No - -No - -No - - -
-amount_unit_concept_id - -The Concept representing the Unit of measure for the amount of active ingredient contained within the drug product. - - -integer - -No - -No - -Yes - -CONCEPT - -
-numerator_value - -The concentration of the active ingredient contained within the drug product. - - -float - -No - -No - -No - - -
-numerator_unit_concept_id - -The Concept representing the Unit of measure for the concentration of active ingredient. - - -integer - -No - -No - -Yes - -CONCEPT - -
-denominator_value - -The amount of total liquid (or other divisible product, such as ointment, gel, spray, etc.). - - -float - -No - -No - -No - - -
-denominator_unit_concept_id - -The Concept representing the denominator unit for the concentration of active ingredient. - - -integer - -No - -No - -Yes - -CONCEPT - -
-box_size - -The number of units of Clinical Branded Drug or Quantified Clinical or Branded Drug contained in a box as dispensed to the patient. - - -integer - -No - -No - -No - - -
-valid_start_date - -The date when the Concept was first recorded. The default value is 1-Jan-1970. - - -date - -Yes - -No - -No - - -
-valid_end_date - -The date when then Concept became invalid. - - -date - -Yes - -No - -No - - -
-invalid_reason - -Reason the concept was invalidated. Possible values are D (deleted), U (replaced with an update) or NULL when valid_end_date has the default value. - - -varchar(1) - -No - -No - -No - - -

COHORT

@@ -13438,365 +870,11 @@ No

The COHORT table contains records of subjects that satisfy a given set of criteria for a duration of time. The definition of the cohort is contained within the COHORT_DEFINITION table. It is listed as part of the RESULTS schema because it is a table that users of the database as well as tools such as ATLAS need to be able to write to. The CDM and Vocabulary tables are all read-only so it is suggested that the COHORT and COHORT_DEFINTION tables are kept in a separate schema to alleviate confusion.

ETL Conventions

Cohorts typically include patients diagnosed with a specific condition, patients exposed to a particular drug, but can also be Providers who have performed a specific Procedure. Cohort records must have a Start Date and an End Date, but the End Date may be set to Start Date or could have an applied censor date using the Observation Period Start Date. Cohort records must contain a Subject Id, which can refer to the Person, Provider, Visit record or Care Site though they are most often Person Ids. The Cohort Definition will define the type of subject through the subject concept id. A subject can belong (or not belong) to a cohort at any moment in time. A subject can only have one record in the cohort table for any moment of time, i.e. it is not possible for a person to contain multiple records indicating cohort membership that are overlapping in time

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-cohort_definition_id - - - -integer - -Yes - -No - -No - - -
-subject_id - - - -integer - -Yes - -No - -No - - -
-cohort_start_date - - - -date - -Yes - -No - -No - - -
-cohort_end_date - - - -date - -Yes - -No - -No - - -

COHORT_DEFINITION

Table Description

The COHORT_DEFINITION table contains records defining a Cohort derived from the data through the associated description and syntax and upon instantiation (execution of the algorithm) placed into the COHORT table. Cohorts are a set of subjects that satisfy a given combination of inclusion criteria for a duration of time. The COHORT_DEFINITION table provides a standardized structure for maintaining the rules governing the inclusion of a subject into a cohort, and can store operational programming code to instantiate the cohort within the OMOP Common Data Model.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-cohort_definition_id - -This is the identifier given to the cohort, usually by the ATLAS application - - -integer - -Yes - -No - -No - - -
-cohort_definition_name - -A short description of the cohort - - -varchar(255) - -Yes - -No - -No - - -
-cohort_definition_description - -A complete description of the cohort. - - -varchar(MAX) - -No - -No - -No - - -
-definition_type_concept_id - -Type defining what kind of Cohort Definition the record represents and how the syntax may be executed. - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-cohort_definition_syntax - -Syntax or code to operationalize the Cohort Definition. - - -varchar(MAX) - -No - -No - -No - - -
-subject_concept_id - -This field contains a Concept that represents the domain of the subjects that are members of the cohort (e.g., Person, Provider, Visit). - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-cohort_initiation_date - -A date to indicate when the Cohort was initiated in the COHORT table. - - -date - -No - -No - -No - - -
diff --git a/docs/cdm54Changes.html b/docs/cdm54Changes.html index 9c3e86a..a0aca28 100644 --- a/docs/cdm54Changes.html +++ b/docs/cdm54Changes.html @@ -13,7 +13,7 @@ Changes by Table - + @@ -63,6 +63,7 @@ if (window.hljs) { + @@ -88,6 +89,9 @@ button.code-folding-btn:focus { summary { display: list-item; } +details > summary > p:only-child { + display: inline; +} pre code { padding: 0; } @@ -312,7 +316,7 @@ div.tocify {
@@ -8859,606 +691,6 @@ CONCEPT

The PAYER_PLAN_PERIOD table captures details of the period of time that a Person is continuously enrolled under a specific health Plan benefit structure from a given Payer. Each Person receiving healthcare is typically covered by a health benefit plan, which pays for (fully or partially), or directly provides, the care. These benefit plans are provided by payers, such as health insurances or state or government agencies. In each plan the details of the health benefits are defined for the Person or her family, and the health benefit Plan might change over time typically with increasing utilization (reaching certain cost thresholds such as deductibles), plan availability and purchasing choices of the Person. The unique combinations of Payer organizations, health benefit Plans and time periods in which they are valid for a Person are recorded in this table.

User Guide

A Person can have multiple, overlapping, Payer_Plan_Periods in this table. For example, medical and drug coverage in the US can be represented by two Payer_Plan_Periods. The details of the benefit structure of the Plan is rarely known, the idea is just to identify that the Plans are different.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-payer_plan_period_id - -A unique identifier for each unique combination of a Person, Payer, Plan, and Period of time. - - -bigint - -Yes - -Yes - - - -
-person_id - -The Person covered by the Plan. - -A single Person can have multiple, overlapping, PAYER_PLAN_PERIOD records - -bigint - -Yes - -No - -Yes - -PERSON - -
-contract_person_id - -The Person who is the primary subscriber/contract owner for Plan. - -This may or may not be the same as the PERSON_ID. For example, if a mother has her son on her plan and the PAYER_PLAN_PERIOD record is the for son, the sons’s PERSON_ID would go in PAYER_PLAN_PERIOD.PERSON_ID and the mother’s PERSON_ID would go in PAYER_PLAN_PERIOD.CONTRACT_PERSON_ID. - -bigint - -No - -No - -Yes - -PERSON - -
-payer_plan_period_start_date - -Start date of Plan coverage. - - -date - -Yes - -No - -No - - -
-payer_plan_period_end_date - -End date of Plan coverage. - - -date - -Yes - -No - -No - - -
-payer_concept_id - -This field represents the organization who reimburses the provider which administers care to the Person. - -Map the Payer directly to a standard CONCEPT_ID. If one does not exists please contact the vocabulary team. There is no global controlled vocabulary available for this information. The point is to stratify on this information and identify if Persons have the same payer, though the name of the Payer is not necessary. If not available, set to 0. Accepted Concepts. - -integer - -Yes - -No - -Yes - -CONCEPT - -
-payer_source_value - -This is the Payer as it appears in the source data. - - -varchar(50) - -No - -No - -No - - -
-payer_source_concept_id - - -If the source data codes the Payer in an OMOP supported vocabulary store the concept_id here. If not available, set to 0. - -integer - -Yes - -No - -Yes - -CONCEPT - -
-plan_concept_id - -This field represents the specific health benefit Plan the Person is enrolled in. - -Map the Plan directly to a standard CONCEPT_ID. If one does not exists please contact the vocabulary team. There is no global controlled vocabulary available for this information. The point is to stratify on this information and identify if Persons have the same health benefit Plan though the name of the Plan is not necessary. If not available, set to 0. Accepted Concepts. - -integer - -Yes - -No - -Yes - -CONCEPT - -
-plan_source_value - -This is the health benefit Plan of the Person as it appears in the source data. - - -varchar(50) - -No - -No - -No - - -
-plan_source_concept_id - - -If the source data codes the Plan in an OMOP supported vocabulary store the concept_id here. If not available, set to 0. - -integer - -Yes - -No - -Yes - -CONCEPT - -
-contract_concept_id - -This field represents the relationship between the PERSON_ID and CONTRACT_PERSON_ID. It should be read as PERSON_ID is the CONTRACT_CONCEPT_ID of the CONTRACT_PERSON_ID. So if CONTRACT_CONCEPT_ID represents the relationship ‘Stepdaughter’ then the Person for whom PAYER_PLAN_PERIOD record was recorded is the stepdaughter of the CONTRACT_PERSON_ID. - -If available, use this field to represent the relationship between the PERSON_ID and the CONTRACT_PERSON_ID. If the Person for whom the PAYER_PLAN_PERIOD record was recorded is the stepdaughter of the CONTRACT_PERSON_ID then CONTRACT_CONCEPT_ID would be 4330864. If not available, set to 0. Accepted Concepts. - -integer - -Yes - -No - -Yes - -CONCEPT - -
-contract_source_value - -This is the relationship of the PERSON_ID to CONTRACT_PERSON_ID as it appears in the source data. - - -varchar(50) - -Yes - -No - -No - - -
-contract_source_concept_id - - -If the source data codes the relationship between the PERSON_ID and CONTRACT_PERSON_ID in an OMOP supported vocabulary store the concept_id here. If not available, set to 0. - -integer - -Yes - -No - -Yes - -CONCEPT - -
-sponsor_concept_id - -This field represents the sponsor of the Plan who finances the Plan. This includes self-insured, small group health plan and large group health plan. - -Map the sponsor directly to a standard CONCEPT_ID. If one does not exists please contact the vocabulary team. There is no global controlled vocabulary available for this information. The point is to stratify on this information and identify if Persons have the same sponsor though the name of the sponsor is not necessary. If not available, set to 0. Accepted Concepts. - -integer - -Yes - -No - -Yes - -CONCEPT - -
-sponsor_source_value - -The Plan sponsor as it appears in the source data. - - -varchar(50) - -No - -No - -No - - -
-sponsor_source_concept_id - - -If the source data codes the sponsor in an OMOP supported vocabulary store the concept_id here. - -integer - -No - -No - -Yes - -CONCEPT - -
-family_source_value - -The common identifier for all people (often a family) that covered by the same policy. - -Often these are the common digits of the enrollment id of the policy members. - -varchar(50) - -No - -No - -No - - -
-stop_reason_concept_id - -This field represents the reason the Person left the Plan, if known. - -Map the stop reason directly to a standard CONCEPT_ID. If one does not exists please contact the vocabulary team. There is no global controlled vocabulary available for this information. Accepted Concepts. - -integer - -No - -No - -Yes - -CONCEPT - -
-stop_reason_source_value - -The Plan stop reason as it appears in the source data. - - -varchar(50) - -No - -No - -No - - -
-stop_reason_source_concept_id - - -If the source data codes the stop reason in an OMOP supported vocabulary store the concept_id here. - -integer - -No - -No - -Yes - -CONCEPT - -

COST

@@ -9469,518 +701,6 @@ CONCEPT

When dealing with summary costs, the cost of the goods or services the provider provides is often not known directly, but derived from the hospital charges multiplied by an average cost-to-charge ratio.

ETL Conventions

One cost record is generated for each response by a payer. In a claims databases, the payment and payment terms reported by the payer for the goods or services billed will generate one cost record. If the source data has payment information for more than one payer (i.e. primary insurance and secondary insurance payment for one entity), then a cost record is created for each reporting payer. Therefore, it is possible for one procedure to have multiple cost records for each payer, but typically it contains one or no record per entity. Payer reimbursement cost records will be identified by using the PAYER_PLAN_ID field. Drug costs are composed of ingredient cost (the amount charged by the wholesale distributor or manufacturer), the dispensing fee (the amount charged by the pharmacy and the sales tax).

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-cost_id - -A unique identifier for each COST record. - - -bigint - -Yes - -Yes - -No - - -
-person_id - - - -bigint - -Yes - -No - -No - - -
-cost_event_id - -If the Cost record is related to another record in the database, this field is the primary key of the linked record. - -Put the primary key of the linked record, if applicable, here. - -bigint - -Yes - -No - -No - - -
-cost_event_field_concept_id - -If the Cost record is related to another record in the database, this field is the CONCEPT_ID that identifies which table the primary key of the linked record came from. - -Put the CONCEPT_ID that identifies which table and field the COST_EVENT_ID came from. - -integer - -Yes - -No - -Yes - -CONCEPT - -
-cost_concept_id - -A foreign key that refers to a Standard Cost Concept identifier in the Standardized Vocabularies belonging to the ‘Cost’ vocabulary. - - -integer - -No - -No - -Yes - -CONCEPT - -
-cost_type_concept_id - -A foreign key identifier to a concept in the CONCEPT table for the provenance or the source of the COST data and belonging to the ‘Type Concept’ vocabulary - - -integer - -No - -No - -Yes - -CONCEPT - -Type Concept -
-cost_source_concept_id - -A foreign key to a Cost Concept that refers to the code used in the source. - - -integer - -No - -No - -Yes - -CONCEPT - -
-cost_source_value - -The source value for the cost as it appears in the source data - - -varchar(50) - -No - -No - -No - - -
-currency_concept_id - -A foreign key identifier to the concept representing the 3-letter code used to delineate international currencies, such as USD for US Dollar. These belong to the ‘Currency’ vocabulary - - -integer - -No - -No - -No - -CONCEPT - -
-cost - -The actual financial cost amount - - -float - -No - -No - -No - - -
-incurred_date - -The first date of service of the clinical event corresponding to the cost as in table capturing the information (e.g. date of visit, date of procedure, date of condition, date of drug etc). - - -date - -No - -No - -No - - -
-billed_date - -The date a bill was generated for a service or encounter - - -date - -No - -No - -No - - -
-paid_date - -The date payment was received for a service or encounter - - -date - -No - -No - -No - - -
-revenue_code_concept_id - -A foreign key referring to a Standard Concept ID in the Standardized Vocabularies for Revenue codes belonging to the ‘Revenue Code’ vocabulary. - - -integer - -No - -No - -Yes - -CONCEPT - -
-drg_concept_id - -A foreign key referring to a Standard Concept ID in the Standardized Vocabularies for DRG codes belonging to the ‘DRG’ vocabulary. - - -integer - -No - -No - -Yes - -CONCEPT - -
-revenue_code_source_value - -The source value for the Revenue code as it appears in the source data, stored here for reference. - - -varchar(50) - -No - -No - -No - - -
-drg_source_value - -The source value for the 3-digit DRG source code as it appears in the source data, stored here for reference. - - -varchar(50) - -No - -No - -No - - -
-payer_plan_period_id - -A foreign key to the PAYER_PLAN_PERIOD table, where the details of the Payer, Plan and Family are stored. Record the payer_plan_id that relates to the payer who contributed to the paid_by_payer field. - - -bigint - -No - -No - -No - - -
@@ -9991,223 +711,6 @@ No

A Drug Era is defined as a span of time when the Person is assumed to be exposed to a particular active ingredient. A Drug Era is not the same as a Drug Exposure: Exposures are individual records corresponding to the source when Drug was delivered to the Person, while successive periods of Drug Exposures are combined under certain rules to produce continuous Drug Eras.

ETL Conventions

The SQL script for generating DRUG_ERA records can be found here.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-drug_era_id - - - -bigint - -Yes - -Yes - -No - - -
-person_id - - - -bigint - -Yes - -No - -Yes - -PERSON - -
-drug_concept_id - -The Concept Id representing the specific drug ingredient. - - -integer - -Yes - -No - -Yes - -CONCEPT - -Drug -
-drug_era_start_datetime - - -The Drug Era Start Date is the start date of the first Drug Exposure for a given ingredient, with at least 31 days since the previous exposure. - -datetime - -Yes - -No - -No - - -
-drug_era_end_datetime - - -The Drug Era End Date is the end date of the last Drug Exposure. The End Date of each Drug Exposure is either taken from the field drug_exposure_end_date or, as it is typically not available, inferred using the following rules: For pharmacy prescription data, the date when the drug was dispensed plus the number of days of supply are used to extrapolate the End Date for the Drug Exposure. Depending on the country-specific healthcare system, this supply information is either explicitly provided in the day_supply field or inferred from package size or similar information. For Procedure Drugs, usually the drug is administered on a single date (i.e., the administration date). A standard Persistence Window of 30 days (gap, slack) is permitted between two subsequent such extrapolated DRUG_EXPOSURE records to be considered to be merged into a single Drug Era. - -datetime - -Yes - -No - -No - - -
-drug_exposure_count - - - -integer - -No - -No - -No - - -
-gap_days - - -The Gap Days determine how many total drug-free days are observed between all Drug Exposure events that contribute to a DRUG_ERA record. It is assumed that the drugs are “not stockpiled” by the patient, i.e. that if a new drug prescription or refill is observed (a new DRUG_EXPOSURE record is written), the remaining supply from the previous events is abandoned. The difference between Persistence Window and Gap Days is that the former is the maximum drug-free time allowed between two subsequent DRUG_EXPOSURE records, while the latter is the sum of actual drug-free days for the given Drug Era under the above assumption of non-stockpiling. - -integer - -No - -No - -No - - -

DOSE_ERA

@@ -10215,226 +718,6 @@ No

A Dose Era is defined as a span of time when the Person is assumed to be exposed to a constant dose of a specific active ingredient.

ETL Conventions

Dose Eras will be derived from records in the DRUG_EXPOSURE table and the Dose information from the DRUG_STRENGTH table using a standardized algorithm. Dose Form information is not taken into account. So, if the patient changes between different formulations, or different manufacturers with the same formulation, the Dose Era is still spanning the entire time of exposure to the Ingredient.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-dose_era_id - - - -bigint - -Yes - -Yes - -No - - -
-person_id - - - -bigint - -Yes - -No - -Yes - -PERSON - -
-drug_concept_id - -The Concept Id representing the specific drug ingredient. - - -integer - -Yes - -No - -Yes - -CONCEPT - -Drug -
-unit_concept_id - -The Concept Id representing the unit of the specific drug ingredient. - - -integer - -Yes - -No - -Yes - -CONCEPT - -Unit -
-dose_value - -The numeric value of the dosage of the drug_ingredient. - - -float - -Yes - -No - -No - - -
-dose_era_start_datetime - -The date the Person started on the specific dosage, with at least 31 days since any prior exposure. - - -datetime - -Yes - -No - -No - - -
-dose_era_end_datetime - - -The date the Person was no longer exposed to the dosage of the specific drug ingredient. An era is ended if there are 31 days or more between dosage records. - -datetime - -Yes - -No - -No - - -

CONDITION_ERA

@@ -10446,198 +729,6 @@ No

ETL Conventions

Each Condition Era corresponds to one or many Condition Occurrence records that form a continuous interval. The condition_concept_id field contains Concepts that are identical to those of the CONDITION_OCCURRENCE table records that make up the Condition Era. In contrast to Drug Eras, Condition Eras are not aggregated to contain Conditions of different hierarchical layers. The SQl Script for generating CONDITION_ERA records can be found here The Condition Era Start Date is the start date of the first Condition Occurrence. The Condition Era End Date is the end date of the last Condition Occurrence. Condition Eras are built with a Persistence Window of 30 days, meaning, if no occurrence of the same condition_concept_id happens within 30 days of any one occurrence, it will be considered the condition_era_end_date.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-condition_era_id - - - -bigint - -Yes - -Yes - -No - - -
-person_id - - - -bigint - -Yes - -No - -No - -PERSON - -
-condition_concept_id - -The Concept Id representing the Condition. - - -integer - -Yes - -No - -Yes - -CONCEPT - -Condition -
-condition_era_start_datetime - -The start date for the Condition Era constructed from the individual instances of Condition Occurrences. It is the start date of the very first chronologically recorded instance of the condition with at least 31 days since any prior record of the same Condition. - - -datetime - -Yes - -No - -No - - -
-condition_era_end_datetime - -The end date for the Condition Era constructed from the individual instances of Condition Occurrences. It is the end date of the final continuously recorded instance of the Condition. - - -datetime - -Yes - -No - -No - - -
-condition_occurrence_count - -The number of individual Condition Occurrences used to construct the condition era. - - -integer - -No - -No - -No - - -
@@ -10646,516 +737,11 @@ No

METADATA

Table Description

The METADATA table contains metadata information about a dataset that has been transformed to the OMOP Common Data Model.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-metadata_concept_id - - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-metadata_type_concept_id - - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-name - - - -varchar(250) - -Yes - -No - -No - - -
-value_as_string - - - -varchar(250) - -No - -No - -No - - -
-value_as_concept_id - - - -integer - -No - -No - -Yes - -CONCEPT - -
-metadata_date - - - -date - -No - -No - -No - - -
-metadata_datetime - - - -datetime - -No - -No - -No - - -

CDM_SOURCE

Table Description

The CDM_SOURCE table contains detail about the source database and the process used to transform the data into the OMOP Common Data Model.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-cdm_source_name - -The name of the CDM instance. - - -varchar(255) - -Yes - -No - -No - - -
-cdm_source_abbreviation - -The abbreviation of the CDM instance. - - -varchar(25) - -No - -No - -No - - -
-cdm_holder - -The holder of the CDM instance. - - -varchar(255) - -No - -No - -No - - -
-source_description - -The description of the CDM instance. - - -varchar(MAX) - -No - -No - -No - - -
-source_documentation_reference - - - -varchar(255) - -No - -No - -No - - -
-cdm_etl_reference - - -Put the link to the CDM version used. - -varchar(255) - -No - -No - -No - - -
-source_release_date - -The release date of the source data. - - -date - -No - -No - -No - - -
-cdm_release_date - -The release data of the CDM instance. - - -date - -No - -No - -No - - -
-cdm_version - - - -varchar(10) - -No - -No - -No - - -
-vocabulary_version - - - -varchar(20) - -No - -No - -No - - -
@@ -11166,2001 +752,52 @@ No

The Standardized Vocabularies contains records, or Concepts, that uniquely identify each fundamental unit of meaning used to express clinical information in all domain tables of the CDM. Concepts are derived from vocabularies, which represent clinical information across a domain (e.g. conditions, drugs, procedures) through the use of codes and associated descriptions. Some Concepts are designated Standard Concepts, meaning these Concepts can be used as normative expressions of a clinical entity within the OMOP Common Data Model and within standardized analytics. Each Standard Concept belongs to one domain, which defines the location where the Concept would be expected to occur within data tables of the CDM.

Concepts can represent broad categories (like ‘Cardiovascular disease’), detailed clinical elements (‘Myocardial infarction of the anterolateral wall’) or modifying characteristics and attributes that define Concepts at various levels of detail (severity of a disease, associated morphology, etc.).

Records in the Standardized Vocabularies tables are derived from national or international vocabularies such as SNOMED-CT, RxNorm, and LOINC, or custom Concepts defined to cover various aspects of observational data analysis.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-concept_id - -A unique identifier for each Concept across all domains. - - -integer - -Yes - -Yes - -No - - -
-concept_name - -An unambiguous, meaningful and descriptive name for the Concept. - - -varchar(255) - -Yes - -No - -No - - -
-domain_id - -A foreign key to the DOMAIN table the Concept belongs to. - - -varchar(20) - -Yes - -No - -Yes - -DOMAIN - -
-vocabulary_id - -A foreign key to the VOCABULARY table indicating from which source the Concept has been adapted. - - -varchar(20) - -Yes - -No - -Yes - -VOCABULARY - -
-concept_class_id - -The attribute or concept class of the Concept. Examples are ‘Clinical Drug’, ‘Ingredient’, ‘Clinical Finding’ etc. - - -varchar(20) - -Yes - -No - -Yes - -CONCEPT_CLASS - -
-standard_concept - -This flag determines where a Concept is a Standard Concept, i.e. is used in the data, a Classification Concept, or a non-standard Source Concept. The allowable values are ‘S’ (Standard Concept) and ‘C’ (Classification Concept), otherwise the content is NULL. - - -varchar(1) - -No - -No - -No - - -
-concept_code - -The concept code represents the identifier of the Concept in the source vocabulary, such as SNOMED-CT concept IDs, RxNorm RXCUIs etc. Note that concept codes are not unique across vocabularies. - - -varchar(50) - -Yes - -No - -No - - -
-valid_start_date - -The date when the Concept was first recorded. The default value is 1-Jan-1970, meaning, the Concept has no (known) date of inception. - - -date - -Yes - -No - -No - - -
-valid_end_date - -The date when the Concept became invalid because it was deleted or superseded (updated) by a new concept. The default value is 31-Dec-2099, meaning, the Concept is valid until it becomes deprecated. - - -date - -Yes - -No - -No - - -
-invalid_reason - -Reason the Concept was invalidated. Possible values are D (deleted), U (replaced with an update) or NULL when valid_end_date has the default value. - - -varchar(1) - -No - -No - -No - - -

VOCABULARY

Table Description

The VOCABULARY table includes a list of the Vocabularies collected from various sources or created de novo by the OMOP community. This reference table is populated with a single record for each Vocabulary source and includes a descriptive name and other associated attributes for the Vocabulary.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-vocabulary_id - -A unique identifier for each Vocabulary, such as ICD9CM, SNOMED, Visit. - - -varchar(20) - -Yes - -Yes - -No - - -
-vocabulary_name - -The name describing the vocabulary, for example International Classification of Diseases, Ninth Revision, Clinical Modification, Volume 1 and 2 (NCHS) etc. - - -varchar(255) - -Yes - -No - -No - - -
-vocabulary_reference - -External reference to documentation or available download of the about the vocabulary. - - -varchar(255) - -Yes - -No - -No - - -
-vocabulary_version - -Version of the Vocabulary as indicated in the source. - - -varchar(255) - -No - -No - -No - - -
-vocabulary_concept_id - -A Concept that represents the Vocabulary the VOCABULARY record belongs to. - - -integer - -Yes - -No - -Yes - -CONCEPT - -

DOMAIN

Table Description

The DOMAIN table includes a list of OMOP-defined Domains the Concepts of the Standardized Vocabularies can belong to. A Domain defines the set of allowable Concepts for the standardized fields in the CDM tables. For example, the “Condition” Domain contains Concepts that describe a condition of a patient, and these Concepts can only be stored in the condition_concept_id field of the CONDITION_OCCURRENCE and CONDITION_ERA tables. This reference table is populated with a single record for each Domain and includes a descriptive name for the Domain.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-domain_id - -A unique key for each domain. - - -varchar(20) - -Yes - -Yes - -No - - -
-domain_name - -The name describing the Domain, e.g. Condition, Procedure, Measurement etc. - - -varchar(255) - -Yes - -No - -No - - -
-domain_concept_id - -A Concept representing the Domain Concept the DOMAIN record belongs to. - - -integer - -Yes - -No - -Yes - -CONCEPT - -

CONCEPT_CLASS

Table Description

The CONCEPT_CLASS table is a reference table, which includes a list of the classifications used to differentiate Concepts within a given Vocabulary. This reference table is populated with a single record for each Concept Class.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-concept_class_id - -A unique key for each class. - - -varchar(20) - -Yes - -Yes - -No - - -
-concept_class_name - -The name describing the Concept Class, e.g. Clinical Finding, Ingredient, etc. - - -varchar(255) - -Yes - -No - -No - - -
-concept_class_concept_id - -A Concept that represents the Concept Class. - - -integer - -Yes - -No - -Yes - -CONCEPT - -

CONCEPT_RELATIONSHIP

Table Description

The CONCEPT_RELATIONSHIP table contains records that define direct relationships between any two Concepts and the nature or type of the relationship. Each type of a relationship is defined in the RELATIONSHIP table.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-concept_id_1 - - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-concept_id_2 - - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-relationship_id - -The relationship between CONCEPT_ID_1 and CONCEPT_ID_2. Please see the Vocabulary Conventions. for more information. - - -varchar(20) - -Yes - -No - -Yes - -RELATIONSHIP - -
-valid_start_date - -The date when the relationship is first recorded. - - -date - -Yes - -No - -No - - -
-valid_end_date - -The date when the relationship is invalidated. - - -date - -Yes - -No - -No - - -
-invalid_reason - -Reason the relationship was invalidated. Possible values are ‘D’ (deleted), ‘U’ (updated) or NULL. - - -varchar(1) - -No - -No - -No - - -

RELATIONSHIP

Table Description

The RELATIONSHIP table provides a reference list of all types of relationships that can be used to associate any two concepts in the CONCEPT_RELATIONSHP table.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-relationship_id - - - -varchar(20) - -Yes - -Yes - -No - - -
-relationship_name - - - -varchar(255) - -Yes - -No - -No - - -
-is_hierarchical - - - -varchar(1) - -Yes - -No - -No - - -
-defines_ancestry - - - -varchar(1) - -Yes - -No - -No - - -
-reverse_relationship_id - - - -varchar(20) - -Yes - -No - -No - - -
-relationship_concept_id - - - -integer - -Yes - -No - -Yes - -CONCEPT - -

CONCEPT_SYNONYM

Table Description

The CONCEPT_SYNONYM table is used to store alternate names and descriptions for Concepts.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-concept_id - - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-concept_synonym_name - - - -varchar(1000) - -Yes - -No - -No - - -
-language_concept_id - - - -integer - -Yes - -No - -Yes - -CONCEPT - -

CONCEPT_ANCESTOR

Table Description

The CONCEPT_ANCESTOR table is designed to simplify observational analysis by providing the complete hierarchical relationships between Concepts. Only direct parent-child relationships between Concepts are stored in the CONCEPT_RELATIONSHIP table. To determine higher level ancestry connections, all individual direct relationships would have to be navigated at analysis time. The CONCEPT_ANCESTOR table includes records for all parent-child relationships, as well as grandparent-grandchild relationships and those of any other level of lineage. Using the CONCEPT_ANCESTOR table allows for querying for all descendants of a hierarchical concept. For example, drug ingredients and drug products are all descendants of a drug class ancestor.

This table is entirely derived from the CONCEPT, CONCEPT_RELATIONSHIP and RELATIONSHIP tables.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-ancestor_concept_id - -The Concept Id for the higher-level concept that forms the ancestor in the relationship. - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-descendant_concept_id - -The Concept Id for the lower-level concept that forms the descendant in the relationship. - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-min_levels_of_separation - -The minimum separation in number of levels of hierarchy between ancestor and descendant concepts. This is an attribute that is used to simplify hierarchic analysis. - - -integer - -Yes - -No - -No - - -
-max_levels_of_separation - -The maximum separation in number of levels of hierarchy between ancestor and descendant concepts. This is an attribute that is used to simplify hierarchic analysis. - - -integer - -Yes - -No - -No - - -

SOURCE_TO_CONCEPT_MAP

Table Description

The source to concept map table is a legacy data structure within the OMOP Common Data Model, recommended for use in ETL processes to maintain local source codes which are not available as Concepts in the Standardized Vocabularies, and to establish mappings for each source code into a Standard Concept as target_concept_ids that can be used to populate the Common Data Model tables. The SOURCE_TO_CONCEPT_MAP table is no longer populated with content within the Standardized Vocabularies published to the OMOP community.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-source_code - -The source code being translated into a Standard Concept. - - -varchar(50) - -Yes - -No - -No - - -
-source_concept_id - -A foreign key to the Source Concept that is being translated into a Standard Concept. - -This is either 0 or should be a number above 2 billion, which are the Concepts reserved for site-specific codes and mappings. - -integer - -Yes - -No - -Yes - -CONCEPT - -
-source_vocabulary_id - -A foreign key to the VOCABULARY table defining the vocabulary of the source code that is being translated to a Standard Concept. - - -varchar(20) - -Yes - -No - -No - - -
-source_code_description - -An optional description for the source code. This is included as a convenience to compare the description of the source code to the name of the concept. - - -varchar(255) - -No - -No - -No - - -
-target_concept_id - -The target Concept to which the source code is being mapped. - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-target_vocabulary_id - -The Vocabulary of the target Concept. - - -varchar(20) - -Yes - -No - -Yes - -VOCABULARY - -
-valid_start_date - -The date when the mapping instance was first recorded. - - -date - -Yes - -No - -No - - -
-valid_end_date - -The date when the mapping instance became invalid because it was deleted or superseded (updated) by a new relationship. Default value is 31-Dec-2099. - - -date - -Yes - -No - -No - - -
-invalid_reason - -Reason the mapping instance was invalidated. Possible values are D (deleted), U (replaced with an update) or NULL when valid_end_date has the default value. - - -varchar(1) - -No - -No - -No - - -

DRUG_STRENGTH

Table Description

The DRUG_STRENGTH table contains structured content about the amount or concentration and associated units of a specific ingredient contained within a particular drug product. This table is supplemental information to support standardized analysis of drug utilization.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-drug_concept_id - -The Concept representing the Branded Drug or Clinical Drug Product. - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-ingredient_concept_id - -The Concept representing the active ingredient contained within the drug product. - -Combination Drugs will have more than one record in this table, one for each active Ingredient. - -integer - -Yes - -No - -Yes - -CONCEPT - -
-amount_value - -The numeric value or the amount of active ingredient contained within the drug product. - - -float - -No - -No - -No - - -
-amount_unit_concept_id - -The Concept representing the Unit of measure for the amount of active ingredient contained within the drug product. - - -integer - -No - -No - -Yes - -CONCEPT - -
-numerator_value - -The concentration of the active ingredient contained within the drug product. - - -float - -No - -No - -No - - -
-numerator_unit_concept_id - -The Concept representing the Unit of measure for the concentration of active ingredient. - - -integer - -No - -No - -Yes - -CONCEPT - -
-denominator_value - -The amount of total liquid (or other divisible product, such as ointment, gel, spray, etc.). - - -float - -No - -No - -No - - -
-denominator_unit_concept_id - -The Concept representing the denominator unit for the concentration of active ingredient. - - -integer - -No - -No - -Yes - -CONCEPT - -
-box_size - -The number of units of Clinical Branded Drug or Quantified Clinical or Branded Drug contained in a box as dispensed to the patient. - - -integer - -No - -No - -No - - -
-valid_start_date - -The date when the Concept was first recorded. The default value is 1-Jan-1970. - - -date - -Yes - -No - -No - - -
-valid_end_date - -The date when then Concept became invalid. - - -date - -Yes - -No - -No - - -
-invalid_reason - -Reason the concept was invalidated. Possible values are D (deleted), U (replaced with an update) or NULL when valid_end_date has the default value. - - -varchar(1) - -No - -No - -No - - -

COHORT

@@ -13168,366 +805,11 @@ No

The COHORT table contains records of subjects that satisfy a given set of criteria for a duration of time. The definition of the cohort is contained within the COHORT_DEFINITION table. It is listed as part of the RESULTS schema because it is a table that users of the database as well as tools such as ATLAS need to be able to write to. The CDM and Vocabulary tables are all read-only so it is suggested that the COHORT and COHORT_DEFINTION tables are kept in a separate schema to alleviate confusion.

ETL Conventions

Cohorts typically include patients diagnosed with a specific condition, patients exposed to a particular drug, but can also be Providers who have performed a specific Procedure. Cohort records must have a Start Date and an End Date, but the End Date may be set to Start Date or could have an applied censor date using the Observation Period Start Date. Cohort records must contain a Subject Id, which can refer to the Person, Provider, Visit record or Care Site though they are most often Person Ids. The Cohort Definition will define the type of subject through the subject concept id. A subject can belong (or not belong) to a cohort at any moment in time. A subject can only have one record in the cohort table for any moment of time, i.e. it is not possible for a person to contain multiple records indicating cohort membership that are overlapping in time

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-cohort_definition_id - - - -integer - -Yes - -No - -No - - -
-subject_id - - - -integer - -Yes - -No - -No - - -
-cohort_start_date - - - -date - -Yes - -No - -No - - -
-cohort_end_date - - - -date - -Yes - -No - -No - - -

COHORT_DEFINITION

Table Description

The COHORT_DEFINITION table contains records defining a Cohort derived from the data through the associated description and syntax and upon instantiation (execution of the algorithm) placed into the COHORT table. Cohorts are a set of subjects that satisfy a given combination of inclusion criteria for a duration of time. The COHORT_DEFINITION table provides a standardized structure for maintaining the rules governing the inclusion of a subject into a cohort, and can store operational programming code to instantiate the cohort within the OMOP Common Data Model.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-CDM Field - -User Guide - -ETL Conventions - -Datatype - -Required - -Primary Key - -Foreign Key - -FK Table - -FK Domain -
-cohort_definition_id - -This is the identifier given to the cohort, usually by the ATLAS application - - -integer - -Yes - -No - -Yes - -COHORT - -
-cohort_definition_name - -A short description of the cohort - - -varchar(255) - -Yes - -No - -No - - -
-cohort_definition_description - -A complete description of the cohort. - - -varchar(MAX) - -No - -No - -No - - -
-definition_type_concept_id - -Type defining what kind of Cohort Definition the record represents and how the syntax may be executed. - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-cohort_definition_syntax - -Syntax or code to operationalize the Cohort Definition. - - -varchar(MAX) - -No - -No - -No - - -
-subject_concept_id - -This field contains a Concept that represents the domain of the subjects that are members of the cohort (e.g., Person, Provider, Visit). - - -integer - -Yes - -No - -Yes - -CONCEPT - -
-cohort_initiation_date - -A date to indicate when the Cohort was initiated in the COHORT table. - - -date - -No - -No - -No - - -
diff --git a/docs/cdmPrivacy.html b/docs/cdmPrivacy.html index 971df68..392f369 100644 --- a/docs/cdmPrivacy.html +++ b/docs/cdmPrivacy.html @@ -13,7 +13,7 @@ Preserving Privacy in an OMOP CDM Implementation - + @@ -63,6 +63,7 @@ if (window.hljs) { + @@ -88,6 +89,9 @@ button.code-folding-btn:focus { summary { display: list-item; } +details > summary > p:only-child { + display: inline; +} pre code { padding: 0; } @@ -312,7 +316,7 @@ div.tocify { + + + + +
+

Background

+

The Observational Medical Outcomes Partnership (OMOP) was a public-private partnership established to inform the appropriate use of observational healthcare databases for studying the effects of medical products. Over the course of the 5-year project and through its community of researchers from industry, government, and academia, OMOP successfully achieved its aims to:

+ +

The results of OMOP’s research has been widely published and presented at scientific conferences, including annual symposia.

+

The OMOP Legacy continues…

+

The community is actively using the OMOP Common Data Model for their various research purposes. Those tools will continue to be maintained and supported, and information about this work is available in the public domain.

+

The OMOP Common Data Model will continue to be an open-source community standard for observational healthcare data. The model specifications and associated work products will be placed in the public domain, and the entire research community is encouraged to use these tools to support everybody’s own research activities.

+
+
+

The Role of the Common Data Model

+

No single observational data source provides a comprehensive view of the clinical data a patient accumulates while receiving healthcare, and therefore none can be sufficient to meet all expected outcome analysis needs. This explains the need for assessing and analyzing multiple data sources concurrently using a common data standard. This standard is provided by the OMOP Common Data Model (CDM).

+

The CDM is designed to support the conduct of research to identify and evaluate associations between interventions (drug exposure, procedures, healthcare policy changes etc.) and outcomes caused by these interventions (condition occurrences, procedures, drug exposure etc.). Outcomes can be efficacious (benefit) or adverse (safety risk). Often times, specific patient cohorts (e.g., those taking a certain drug or suffering from a certain disease) may be defined for treatments or outcomes, using clinical events (diagnoses, observations, procedures, etc.) that occur in predefined temporal relationships to each other. The CDM, combined with its standardized content (via the Standardized Vocabularies), will ensure that research methods can be systematically applied to produce meaningfully comparable and reproducible results.

+
+
+

Design Principles

+

The CDM is designed to include all observational health data elements (experiences of the patient receiving health care) that are relevant for analytic use cases to support the generation of reliable scientific evidence about disease natural history, healthcare delivery, effects of medical interventions, the identification of demographic information, health care interventions and outcomes.

+

Therefore, the CDM is designed to store observational data to allow for research, under the following principles:

+ +
+ + + + + + + + + + + + + + + + + + + + + + + diff --git a/rmd/cdm30.html b/rmd/cdm30.html new file mode 100644 index 0000000..7c0a314 --- /dev/null +++ b/rmd/cdm30.html @@ -0,0 +1,568 @@ + + + + + + + + + + + + + +CDM v3 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+
+
+
+ +
+ + + + + + + + + +

You can find the specification for CDM v3 at this link.

+ + + +
+
+ +
+ + + + + + + + + + + + + + + + diff --git a/rmd/cdm53.Rmd b/rmd/cdm53.Rmd index 8daccee..1ba23e4 100644 --- a/rmd/cdm53.Rmd +++ b/rmd/cdm53.Rmd @@ -91,11 +91,9 @@ for(tb in tables) { loopTable <- subset(loopTable, select = -c(1)) print(kable(x = loopTable, align = "l", row.names = FALSE, format = "html", escape = FALSE) %>% - column_spec(1, bold = T) %>% - column_spec(2, width = "3in", include_thead = T) %>% - column_spec(3, width = "4in", include_thead = T) %>% - column_spec(4:9, width = "1in", include_thead = T) %>% - kable_styling(c("condensed","hover"), position = "center", full_width = T, font_size = 13)) + kable_styling(c("condensed","hover"), position = "center", full_width = T, font_size = 13) %>% + column_spec(1, bold = T)) + } diff --git a/rmd/cdm531.html b/rmd/cdm531.html new file mode 100644 index 0000000..f36840a --- /dev/null +++ b/rmd/cdm531.html @@ -0,0 +1,568 @@ + + + + + + + + + + + + + +OMOP CDM v5.3.1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+
+
+
+ +
+ + + + + + + + + +

This documentation previously referenced v5.3.1. During the OHDSI/CommonDataModel Hack-A-Thon that occurred on August 18, 2021 the decision was made to align documentation with the minor releases. This is because only major and minor releases include changes to the model itself while bug fixes address issues with SQL incompatibility, problems in the code, etc. Hot fixes and minor.micro release can be found through the searching of tags. For the CDM v5.3 specification please see the 5.3 page.

+ + + +
+
+ +
+ + + + + + + + + + + + + + + + diff --git a/rmd/cdm54.Rmd b/rmd/cdm54.Rmd index 5b9cc0f..d51e0e0 100644 --- a/rmd/cdm54.Rmd +++ b/rmd/cdm54.Rmd @@ -107,13 +107,19 @@ for(tb in tables) { loopTable <- subset(loopTable, select = -c(1)) print(kable(x = loopTable, align = "l", row.names = FALSE, format = "html", escape = FALSE) %>% - column_spec(1, bold = T) %>% - column_spec(2, width = "3in", include_thead = T) %>% - column_spec(3, width = "4in", include_thead = T) %>% - column_spec(4:9, width = "1in", include_thead = T) %>% - kable_styling(c("condensed","hover"), position = "center", full_width = T, font_size = 13)) - + kable_styling(c("condensed","hover"), position = "center", full_width = T, font_size = 13) %>% + column_spec(1, bold = T)) + + # column_spec(2, width = "3in", include_thead = T) %>% + # column_spec(3, width = "4in", include_thead = T) %>% + # column_spec(4:9, width = "1in", include_thead = T) %>% + # kable_styling(c("condensed","hover"), position = "center", full_width = T, font_size = 13) ) + + + + + } diff --git a/rmd/cdm54.html b/rmd/cdm54.html new file mode 100644 index 0000000..9f35696 --- /dev/null +++ b/rmd/cdm54.html @@ -0,0 +1,958 @@ + + + + + + + + + + + + + +OMOP CDM v5.4 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+
+
+
+ +
+ + + + + + + + + +

This is the specification document for the OMOP Common Data Model, v5.4. This is the latest version of the OMOP CDM. Each table is represented with a high-level description and ETL conventions that should be followed. This is continued with a discussion of each field in each table, any conventions related to the field, and constraints that should be followed (like primary key, foreign key, etc). Should you have questions please feel free to visit the forums or the github issue page.

+
+

Current Support for CDM v5.4

+

The table below details which OHDSI tools support CDM v5.4. There are two levels of support: legacy support means that the tool supports all tables and fields that were present in CDM v5.3 and feature support indicates that the tool supports any new tables and fields in CDM v5.4 that were not present in CDM v5.3. A green check ✅ indicates that the support level for the listed tool is in place, has been tested, and released. A warning sign ⚠️ indicates that the support level for the listed tool has been initiated but has not yet been tested and released.

+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ToolDescriptionLegacy SupportFeature Support
CDM R packageThis package can be downloaded from https://github.com/OHDSI/CommonDataModel/. It functions to dynamically create the OMOP CDM documentation and DDL scripts to instantiate the CDM tables.
Data Quality DashboardThis package can be downloaded from https://github.com/OHDSI/DataQualityDashboard. It runs a set of > 3500 data quality checks against an OMOP CDM instance and is required to be run on all databases prior to participating in an OHDSI network research study.⚠️
AchillesThis package can be downloaded from https://github.com/OHDSI/Achilles, performing a set of broad database characterizations agains an OMOP CDM instance.⚠️
ARESThis package can be downloaded from https://github.com/OHDSI/Ares and is designed to display the results from both the ACHILLES and DataQualityDashboard packages to support data quality and characterization research.⚠️
ATLASATLAS is an open source software tool for researchers to conduct scientific analyses on standardized observational data. Demo⚠️
Rabbit-In-A-HatThis package can be downloaded from https://github.com/OHDSI/WhiteRabbit and is an application for interactive design of an ETL to the OMOP Common Data Model with the help of the the scan report generated by White Rabbit.
Feature ExtractionThis package can be downloaded from https://github.com/OHDSI/FeatureExtraction. It is designed to generate features (covariates) for a cohort generated using the OMOP CDM.✅*
Cohort DiagnosticsThis package can be downloaded from https://github.com/OHDSI/CohortDiagnostics and is used to critically evaluate cohort phenotypes.⚠️
+


* The Feature Extraction package supports all relevant new features in CDM v5.4. For example, it was decided that, from a methodological perspective, the EPISODE and EPISODE_EVENT tables should not be included to define cohort covariates because the events that make up episodes are already pulled in as potential covariates.

+


+

Looking to send us a pull request for a bug fix? Please see the readme on the main github page.

+
+
+

Clinical Data Tables

+
+

PERSON

+

Table Description

+

This table serves as the central identity management for all Persons in the database. It contains records that uniquely identify each person or patient, and some demographic information.

+

User Guide

+

All records in this table are independent Persons.

+

ETL Conventions

+

All Persons in a database needs one record in this table, unless they fail data quality requirements specified in the ETL. Persons with no Events should have a record nonetheless. If more than one data source contributes Events to the database, Persons must be reconciled, if possible, across the sources to create one single record per Person. The content of the BIRTH_DATETIME must be equivalent to the content of BIRTH_DAY, BIRTH_MONTH and BIRTH_YEAR.

+
+
+

OBSERVATION_PERIOD

+

Table Description

+

This table contains records which define spans of time during which two conditions are expected to hold: (i) Clinical Events that happened to the Person are recorded in the Event tables, and (ii) absense of records indicate such Events did not occur during this span of time.

+

User Guide

+

For each Person, one or more OBSERVATION_PERIOD records may be present, but they will not overlap or be back to back to each other. Events may exist outside all of the time spans of the OBSERVATION_PERIOD records for a patient, however, absence of an Event outside these time spans cannot be construed as evidence of absence of an Event. Incidence or prevalence rates should only be calculated for the time of active OBSERVATION_PERIOD records. When constructing cohorts, outside Events can be used for inclusion criteria definition, but without any guarantee for the performance of these criteria. Also, OBSERVATION_PERIOD records can be as short as a single day, greatly disturbing the denominator of any rate calculation as part of cohort characterizations. To avoid that, apply minimal observation time as a requirement for any cohort definition.

+

ETL Conventions

+

Each Person needs to have at least one OBSERVATION_PERIOD record, which should represent time intervals with a high capture rate of Clinical Events. Some source data have very similar concepts, such as enrollment periods in insurance claims data. In other source data such as most EHR systems these time spans need to be inferred under a set of assumptions. It is the discretion of the ETL developer to define these assumptions. In many ETL solutions the start date of the first occurrence or the first high quality occurrence of a Clinical Event (Condition, Drug, Procedure, Device, Measurement, Visit) is defined as the start of the OBSERVATION_PERIOD record, and the end date of the last occurrence of last high quality occurrence of a Clinical Event, or the end of the database period becomes the end of the OBSERVATOIN_PERIOD for each Person. If a Person only has a single Clinical Event the OBSERVATION_PERIOD record can be as short as one day. Depending on these definitions it is possible that Clinical Events fall outside the time spans defined by OBSERVATION_PERIOD records. Family history or history of Clinical Events generally are not used to generate OBSERVATION_PERIOD records around the time they are referring to. Any two overlapping or adjacent OBSERVATION_PERIOD records have to be merged into one.

+
+
+

VISIT_OCCURRENCE

+

Table Description

+

This table contains Events where Persons engage with the healthcare system for a duration of time. They are often also called “Encounters”. Visits are defined by a configuration of circumstances under which they occur, such as (i) whether the patient comes to a healthcare institution, the other way around, or the interaction is remote, (ii) whether and what kind of trained medical staff is delivering the service during the Visit, and (iii) whether the Visit is transient or for a longer period involving a stay in bed.

+

User Guide

+

The configuration defining the Visit are described by Concepts in the Visit Domain, which form a hierarchical structure, but rolling up to generally familiar Visits adopted in most healthcare systems worldwide:

+
    +
  • Inpatient Visit: Person visiting hospital, at a Care Site, in bed, for duration of more than one day, with physicians and other Providers permanently available to deliver service around the clock
  • +
  • Emergency Room Visit: Person visiting dedicated healthcare institution for treating emergencies, at a Care Site, within one day, with physicians and Providers permanently available to deliver service around the clock
  • +
  • Emergency Room and Inpatient Visit: Person visiting ER followed by a subsequent Inpatient Visit, where Emergency department is part of hospital, and transition from the ER to other hospital departments is undefined
  • +
  • Non-hospital institution Visit: Person visiting dedicated institution for reasons of poor health, at a Care Site, long-term or permanently, with no physician but possibly other Providers permanently available to deliver service around the clock
  • +
  • Outpatient Visit: Person visiting dedicated ambulatory healthcare institution, at a Care Site, within one day, without bed, with physicians or medical Providers delivering service during Visit
  • +
  • Home Visit: Provider visiting Person, without a Care Site, within one day, delivering service
  • +
  • Telehealth Visit: Patient engages with Provider through communication media
  • +
  • Pharmacy Visit: Person visiting pharmacy for dispensing of Drug, at a Care Site, within one day
  • +
  • Laboratory Visit: Patient visiting dedicated institution, at a Care Site, within one day, for the purpose of a Measurement.
  • +
  • Ambulance Visit: Person using transportation service for the purpose of initiating one of the other Visits, without a Care Site, within one day, potentially with Providers accompanying the Visit and delivering service
  • +
  • Case Management Visit: Person interacting with healthcare system, without a Care Site, within a day, with no Providers involved, for administrative purposes
  • +
+

The Visit duration, or ‘length of stay’, is defined as VISIT_END_DATE - VISIT_START_DATE. For all Visits this is <1 day, except Inpatient Visits and Non-hospital institution Visits. The CDM also contains the VISIT_DETAIL table where additional information about the Visit is stored, for example, transfers between units during an inpatient Visit.

+

ETL Conventions

+

Visits can be derived easily if the source data contain coding systems for Place of Service or Procedures, like CPT codes for well visits. In those cases, the codes can be looked up and mapped to a Standard Visit Concept. Otherwise, Visit Concepts have to be identified in the ETL process. This table will contain concepts in the Visit domain. These concepts are arranged in a hierarchical structure to facilitate cohort definitions by rolling up to generally familiar Visits adopted in most healthcare systems worldwide. Visits can be adjacent to each other, i.e. the end date of one can be identical with the start date of the other. As a consequence, more than one-day Visits or their descendants can be recorded for the same day. Multi-day visits must not overlap, i.e. share days other than start and end days. It is often the case that some logic should be written for how to define visits and how to assign Visit_Concept_Id. For example, in US claims outpatient visits that appear to occur within the time period of an inpatient visit can be rolled into one with the same Visit_Occurrence_Id. In EHR data inpatient visits that are within one day of each other may be strung together to create one visit. It will all depend on the source data and how encounter records should be translated to visit occurrences. Providers can be associated with a Visit through the PROVIDER_ID field, or indirectly through PROCEDURE_OCCURRENCE records linked both to the VISIT and PROVIDER tables.

+
+
+

VISIT_DETAIL

+

Table Description

+

The VISIT_DETAIL table is an optional table used to represents details of each record in the parent VISIT_OCCURRENCE table. A good example of this would be the movement between units in a hospital during an inpatient stay or claim lines associated with a one insurance claim. For every record in the VISIT_OCCURRENCE table there may be 0 or more records in the VISIT_DETAIL table with a 1:n relationship where n may be 0. The VISIT_DETAIL table is structurally very similar to VISIT_OCCURRENCE table and belongs to the visit domain.

+

User Guide

+

The configuration defining the Visit Detail is described by Concepts in the Visit Domain, which form a hierarchical structure. The Visit Detail record will have an associated to the Visit Occurrence record in two ways:
1. The Visit Detail record will have the VISIT_OCCURRENCE_ID it is associated to 2. The VISIT_DETAIL_CONCEPT_ID will be a descendant of the VISIT_CONCEPT_ID for the Visit.

+

ETL Conventions

+

It is not mandatory that the VISIT_DETAIL table be filled in, but if you find that the logic to create VISIT_OCCURRENCE records includes the roll-up of multiple smaller records to create one picture of a Visit then it is a good idea to use VISIT_DETAIL. In EHR data, for example, a Person may be in the hospital but instead of one over-arching Visit their encounters are recorded as times they interacted with a health care provider. A Person in the hospital interacts with multiple providers multiple times a day so the encounters must be strung together using some heuristic (defined by the ETL) to identify the entire Visit. In this case the encounters would be considered Visit Details and the entire Visit would be the Visit Occurrence. In this example it is also possible to use the Vocabulary to distinguish Visit Details from a Visit Occurrence by setting the VISIT_CONCEPT_ID to 9201 and the VISIT_DETAIL_CONCEPT_IDs either to 9201 or its children to indicate where the patient was in the hospital at the time of care.

+
+
+

CONDITION_OCCURRENCE

+

Table Description

+

This table contains records of Events of a Person suggesting the presence of a disease or medical condition stated as a diagnosis, a sign, or a symptom, which is either observed by a Provider or reported by the patient.

+

User Guide

+

Conditions are defined by Concepts from the Condition domain, which form a complex hierarchy. As a result, the same Person with the same disease may have multiple Condition records, which belong to the same hierarchical family. Most Condition records are mapped from diagnostic codes, but recorded signs, symptoms and summary descriptions also contribute to this table. Rule out diagnoses should not be recorded in this table, but in reality their negating nature is not always captured in the source data, and other precautions must be taken when when identifying Persons who should suffer from the recorded Condition. Record all conditions as they exist in the source data. Any decisions about diagnosis/phenotype definitions would be done through cohort specifications. These cohorts can be housed in the COHORT table. Conditions span a time interval from start to end, but are typically recorded as single snapshot records with no end date. The reason is twofold: (i) At the time of the recording the duration is not known and later not recorded, and (ii) the Persons typically cease interacting with the healthcare system when they feel better, which leads to incomplete capture of resolved Conditions. The CONDITION_ERA table addresses this issue. Family history and past diagnoses (‘history of’) are not recorded in this table. Instead, they are listed in the OBSERVATION table. Codes written in the process of establishing the diagnosis, such as ‘question of’ of and ‘rule out’, should not represented here. Instead, they should be recorded in the OBSERVATION table, if they are used for analyses. However, this information is not always available.

+

ETL Conventions

+

Source codes and source text fields mapped to Standard Concepts of the Condition Domain have to be recorded here.

+
+
+

DRUG_EXPOSURE

+

Table Description

+

This table captures records about the exposure to a Drug ingested or otherwise introduced into the body. A Drug is a biochemical substance formulated in such a way that when administered to a Person it will exert a certain biochemical effect on the metabolism. Drugs include prescription and over-the-counter medicines, vaccines, and large-molecule biologic therapies. Radiological devices ingested or applied locally do not count as Drugs.

+

User Guide

+

The purpose of records in this table is to indicate an exposure to a certain drug as best as possible. In this context a drug is defined as an active ingredient. Drug Exposures are defined by Concepts from the Drug domain, which form a complex hierarchy. As a result, one DRUG_SOURCE_CONCEPT_ID may map to multiple standard concept ids if it is a combination product. Records in this table represent prescriptions written, prescriptions dispensed, and drugs administered by a provider to name a few. The DRUG_TYPE_CONCEPT_ID can be used to find and filter on these types. This table includes additional information about the drug products, the quantity given, and route of administration.

+

ETL Conventions

+

Information about quantity and dose is provided in a variety of different ways and it is important for the ETL to provide as much information as possible from the data. Depending on the provenance of the data fields may be captured differently i.e. quantity for drugs administered may have a separate meaning from quantity for prescriptions dispensed. If a patient has multiple records on the same day for the same drug or procedures the ETL should not de-dupe them unless there is probable reason to believe the item is a true data duplicate. Take note on how to handle refills for prescriptions written.

+
+
+

PROCEDURE_OCCURRENCE

+

Table Description

+

This table contains records of activities or processes ordered by, or carried out by, a healthcare provider on the patient with a diagnostic or therapeutic purpose.

+

User Guide

+

Lab tests are not a procedure, if something is observed with an expected resulting amount and unit then it should be a measurement. Phlebotomy is a procedure but so trivial that it tends to be rarely captured. It can be assumed that there is a phlebotomy procedure associated with many lab tests, therefore it is unnecessary to add them as separate procedures. If the user finds the same procedure over concurrent days, it is assumed those records are part of a procedure lasting more than a day. This logic is in lieu of the procedure_end_date, which will be added in a future version of the CDM.

+

ETL Conventions

+

If a procedure lasts more than a day, then it should be recorded as a separate record for each day the procedure occurred, this logic is in lieu of the PROCEDURE_END_DATE, which will be added in a future version of the CDM. When dealing with duplicate records, the ETL must determine whether to sum them up into one record or keep them separate. Things to consider are: - Same Procedure - Same PROCEDURE_DATETIME - Same Visit Occurrence or Visit Detail - Same Provider - Same Modifier for Procedures. Source codes and source text fields mapped to Standard Concepts of the Procedure Domain have to be recorded here.

+
+
+

DEVICE_EXPOSURE

+

Table Description

+

The Device domain captures information about a person’s exposure to a foreign physical object or instrument which is used for diagnostic or therapeutic purposes through a mechanism beyond chemical action. Devices include implantable objects (e.g. pacemakers, stents, artificial joints), medical equipment and supplies (e.g. bandages, crutches, syringes), other instruments used in medical procedures (e.g. sutures, defibrillators) and material used in clinical care (e.g. adhesives, body material, dental material, surgical material).

+

User Guide

+

The distinction between Devices or supplies and Procedures are sometimes blurry, but the former are physical objects while the latter are actions, often to apply a Device or supply.

+

ETL Conventions

+

Source codes and source text fields mapped to Standard Concepts of the Device Domain have to be recorded here.

+
+
+

MEASUREMENT

+

Table Description

+

The MEASUREMENT table contains records of Measurements, i.e. structured values (numerical or categorical) obtained through systematic and standardized examination or testing of a Person or Person’s sample. The MEASUREMENT table contains both orders and results of such Measurements as laboratory tests, vital signs, quantitative findings from pathology reports, etc. Measurements are stored as attribute value pairs, with the attribute as the Measurement Concept and the value representing the result. The value can be a Concept (stored in VALUE_AS_CONCEPT), or a numerical value (VALUE_AS_NUMBER) with a Unit (UNIT_CONCEPT_ID). The Procedure for obtaining the sample is housed in the PROCEDURE_OCCURRENCE table, though it is unnecessary to create a PROCEDURE_OCCURRENCE record for each measurement if one does not exist in the source data. Measurements differ from Observations in that they require a standardized test or some other activity to generate a quantitative or qualitative result. If there is no result, it is assumed that the lab test was conducted but the result was not captured.

+

User Guide

+

Measurements are predominately lab tests with a few exceptions, like blood pressure or function tests. Results are given in the form of a value and unit combination. When investigating measurements, look for operator_concept_ids (<, >, etc.).

+

ETL Conventions

+

Only records where the source value maps to a Concept in the measurement domain should be included in this table. Even though each Measurement always has a result, the fields VALUE_AS_NUMBER and VALUE_AS_CONCEPT_ID are not mandatory as often the result is not given in the source data. When the result is not known, the Measurement record represents just the fact that the corresponding Measurement was carried out, which in itself is already useful information for some use cases. For some Measurement Concepts, the result is included in the test. For example, ICD10 CONCEPT_ID 45548980 ‘Abnormal level of unspecified serum enzyme’ indicates a Measurement and the result (abnormal). In those situations, the CONCEPT_RELATIONSHIP table in addition to the ‘Maps to’ record contains a second record with the relationship_id set to ‘Maps to value’. In this example, the ‘Maps to’ relationship directs to 4046263 ‘Enzyme measurement’ as well as a ‘Maps to value’ record to 4135493 ‘Abnormal’.

+
+
+

OBSERVATION

+

Table Description

+

The OBSERVATION table captures clinical facts about a Person obtained in the context of examination, questioning or a procedure. Any data that cannot be represented by any other domains, such as social and lifestyle facts, medical history, family history, etc. are recorded here.

+

User Guide

+

Observations differ from Measurements in that they do not require a standardized test or some other activity to generate clinical fact. Typical observations are medical history, family history, the stated need for certain treatment, social circumstances, lifestyle choices, healthcare utilization patterns, etc. If the generation clinical facts requires a standardized testing such as lab testing or imaging and leads to a standardized result, the data item is recorded in the MEASUREMENT table. If the clinical fact observed determines a sign, symptom, diagnosis of a disease or other medical condition, it is recorded in the CONDITION_OCCURRENCE table. Valid Observation Concepts are not enforced to be from any domain though they still should be Standard Concepts.

+

ETL Conventions

+

Records whose Source Values map to any domain besides Condition, Procedure, Drug, Measurement or Device should be stored in the Observation table. Observations can be stored as attribute value pairs, with the attribute as the Observation Concept and the value representing the clinical fact. This fact can be a Concept (stored in VALUE_AS_CONCEPT), a numerical value (VALUE_AS_NUMBER), a verbatim string (VALUE_AS_STRING), or a datetime (VALUE_AS_DATETIME). Even though Observations do not have an explicit result, the clinical fact can be stated separately from the type of Observation in the VALUE_AS_* fields. It is recommended for Observations that are suggestive statements of positive assertion should have a value of ‘Yes’ (concept_id=4188539), recorded, even though the null value is the equivalent.

+
+
+

DEATH

+

Table Description

+

The death domain contains the clinical event for how and when a Person dies. A person can have up to one record if the source system contains evidence about the Death, such as: Condition in an administrative claim, status of enrollment into a health plan, or explicit record in EHR data.

+
+
+

NOTE

+

Table Description

+

The NOTE table captures unstructured information that was recorded by a provider about a patient in free text (in ASCII, or preferably in UTF8 format) notes on a given date. The type of note_text is CLOB or varchar(MAX) depending on RDBMS.

+

ETL Conventions

+

HL7/LOINC CDO is a standard for consistent naming of documents to support a range of use cases: retrieval, organization, display, and exchange. It guides the creation of LOINC codes for clinical notes. CDO annotates each document with 5 dimensions:

+
    +
  • Kind of Document: Characterizes the general structure of the document at a macro level (e.g. Anesthesia Consent)
  • +
  • Type of Service: Characterizes the kind of service or activity (e.g. evaluations, consultations, and summaries). The notion of time sequence, e.g., at the beginning (admission) at the end (discharge) is subsumed in this axis. Example: Discharge Teaching.
  • +
  • Setting: Setting is an extension of CMS’s definitions (e.g. Inpatient, Outpatient)
  • +
  • Subject Matter Domain (SMD): Characterizes the subject matter domain of a note (e.g. Anesthesiology)
  • +
  • Role: Characterizes the training or professional level of the author of the document, but does not break down to specialty or subspecialty (e.g. Physician) Each combination of these 5 dimensions rolls up to a unique LOINC code.
  • +
+

According to CDO requirements, only 2 of the 5 dimensions are required to properly annotate a document; Kind of Document and any one of the other 4 dimensions. However, not all the permutations of the CDO dimensions will necessarily yield an existing LOINC code. Each of these dimensions are contained in the OMOP Vocabulary under the domain of ‘Meas Value’ with each dimension represented as a Concept Class.

+
+
+

NOTE_NLP

+

Table Description

+

The NOTE_NLP table encodes all output of NLP on clinical notes. Each row represents a single extracted term from a note.

+
+
+

SPECIMEN

+

Table Description

+

The specimen domain contains the records identifying biological samples from a person.

+

ETL Conventions

+

Anatomic site is coded at the most specific level of granularity possible, such that higher level classifications can be derived using the Standardized Vocabularies.

+
+
+

FACT_RELATIONSHIP

+

Table Description

+

The FACT_RELATIONSHIP table contains records about the relationships between facts stored as records in any table of the CDM. Relationships can be defined between facts from the same domain, or different domains. Examples of Fact Relationships include: Person relationships (parent-child), care site relationships (hierarchical organizational structure of facilities within a health system), indication relationship (between drug exposures and associated conditions), usage relationships (of devices during the course of an associated procedure), or facts derived from one another (measurements derived from an associated specimen).

+

ETL Conventions

+

All relationships are directional, and each relationship is represented twice symmetrically within the FACT_RELATIONSHIP table. For example, two persons if person_id = 1 is the mother of person_id = 2 two records are in the FACT_RELATIONSHIP table (all strings in fact concept_id records in the Concept table: - Person, 1, Person, 2, parent of - Person, 2, Person, 1, child of

+
+
+
+

Health System Data Tables

+
+

LOCATION

+

Table Description

+

The LOCATION table represents a generic way to capture physical location or address information of Persons and Care Sites.

+

User Guide

+

The current iteration of the LOCATION table is US centric. Until a major release to correct this, certain fields can be used to represent different international values.

- STATE can also be used for province or district
- ZIP is also the postal code or postcode
- COUNTY can also be used to represent region

+

ETL Conventions

+

Each address or Location is unique and is present only once in the table. Locations do not contain names, such as the name of a hospital. In order to construct a full address that can be used in the postal service, the address information from the Location needs to be combined with information from the Care Site.

+
+
+

CARE_SITE

+

Table Description

+

The CARE_SITE table contains a list of uniquely identified institutional (physical or organizational) units where healthcare delivery is practiced (offices, wards, hospitals, clinics, etc.).

+

ETL Conventions

+

Care site is a unique combination of location_id and place_of_service_source_value. Care site does not take into account the provider (human) information such a specialty. Many source data do not make a distinction between individual and institutional providers. The CARE_SITE table contains the institutional providers. If the source, instead of uniquely identifying individual Care Sites, only provides limited information such as Place of Service, generic or “pooled” Care Site records are listed in the CARE_SITE table. There can be hierarchical and business relationships between Care Sites. For example, wards can belong to clinics or departments, which can in turn belong to hospitals, which in turn can belong to hospital systems, which in turn can belong to HMOs.The relationships between Care Sites are defined in the FACT_RELATIONSHIP table.

+
+
+

PROVIDER

+

Table Description

+

The PROVIDER table contains a list of uniquely identified healthcare providers. These are individuals providing hands-on healthcare to patients, such as physicians, nurses, midwives, physical therapists etc.

+

User Guide

+

Many sources do not make a distinction between individual and institutional providers. The PROVIDER table contains the individual providers. If the source, instead of uniquely identifying individual providers, only provides limited information such as specialty, generic or ‘pooled’ Provider records are listed in the PROVIDER table.

+
+
+
+

Health Economics Data Tables

+
+

PAYER_PLAN_PERIOD

+

Table Description

+

The PAYER_PLAN_PERIOD table captures details of the period of time that a Person is continuously enrolled under a specific health Plan benefit structure from a given Payer. Each Person receiving healthcare is typically covered by a health benefit plan, which pays for (fully or partially), or directly provides, the care. These benefit plans are provided by payers, such as health insurances or state or government agencies. In each plan the details of the health benefits are defined for the Person or her family, and the health benefit Plan might change over time typically with increasing utilization (reaching certain cost thresholds such as deductibles), plan availability and purchasing choices of the Person. The unique combinations of Payer organizations, health benefit Plans and time periods in which they are valid for a Person are recorded in this table.

+

User Guide

+

A Person can have multiple, overlapping, Payer_Plan_Periods in this table. For example, medical and drug coverage in the US can be represented by two Payer_Plan_Periods. The details of the benefit structure of the Plan is rarely known, the idea is just to identify that the Plans are different.

+
+
+

COST

+

Table Description

+

The COST table captures records containing the cost of any medical event recorded in one of the OMOP clinical event tables such as DRUG_EXPOSURE, PROCEDURE_OCCURRENCE, VISIT_OCCURRENCE, VISIT_DETAIL, DEVICE_OCCURRENCE, OBSERVATION or MEASUREMENT.

+

Each record in the cost table account for the amount of money transacted for the clinical event. So, the COST table may be used to represent both receivables (charges) and payments (paid), each transaction type represented by its COST_CONCEPT_ID. The COST_TYPE_CONCEPT_ID field will use concepts in the Standardized Vocabularies to designate the source (provenance) of the cost data. A reference to the health plan information in the PAYER_PLAN_PERIOD table is stored in the record for information used for the adjudication system to determine the persons benefit for the clinical event.

+

User Guide

+

When dealing with summary costs, the cost of the goods or services the provider provides is often not known directly, but derived from the hospital charges multiplied by an average cost-to-charge ratio.

+

ETL Conventions

+

One cost record is generated for each response by a payer. In a claims databases, the payment and payment terms reported by the payer for the goods or services billed will generate one cost record. If the source data has payment information for more than one payer (i.e. primary insurance and secondary insurance payment for one entity), then a cost record is created for each reporting payer. Therefore, it is possible for one procedure to have multiple cost records for each payer, but typically it contains one or no record per entity. Payer reimbursement cost records will be identified by using the PAYER_PLAN_ID field. Drug costs are composed of ingredient cost (the amount charged by the wholesale distributor or manufacturer), the dispensing fee (the amount charged by the pharmacy and the sales tax).

+
+
+
+

Standardized Derived Elements

+
+

DRUG_ERA

+

Table Description

+

A Drug Era is defined as a span of time when the Person is assumed to be exposed to a particular active ingredient. A Drug Era is not the same as a Drug Exposure: Exposures are individual records corresponding to the source when Drug was delivered to the Person, while successive periods of Drug Exposures are combined under certain rules to produce continuous Drug Eras.

+

ETL Conventions

+

The SQL script for generating DRUG_ERA records can be found here.

+
+
+

DOSE_ERA

+

Table Description

+

A Dose Era is defined as a span of time when the Person is assumed to be exposed to a constant dose of a specific active ingredient.

+

ETL Conventions

+

Dose Eras will be derived from records in the DRUG_EXPOSURE table and the Dose information from the DRUG_STRENGTH table using a standardized algorithm. Dose Form information is not taken into account. So, if the patient changes between different formulations, or different manufacturers with the same formulation, the Dose Era is still spanning the entire time of exposure to the Ingredient.

+
+
+

CONDITION_ERA

+

Table Description

+

A Condition Era is defined as a span of time when the Person is assumed to have a given condition. Similar to Drug Eras, Condition Eras are chronological periods of Condition Occurrence. Combining individual Condition Occurrences into a single Condition Era serves two purposes:

+
    +
  • It allows aggregation of chronic conditions that require frequent ongoing care, instead of treating each Condition Occurrence as an independent event.
  • +
  • It allows aggregation of multiple, closely timed doctor visits for the same Condition to avoid double-counting the Condition Occurrences. For example, consider a Person who visits her Primary Care Physician (PCP) and who is referred to a specialist. At a later time, the Person visits the specialist, who confirms the PCP’s original diagnosis and provides the appropriate treatment to resolve the condition. These two independent doctor visits should be aggregated into one Condition Era.
  • +
+

ETL Conventions

+

Each Condition Era corresponds to one or many Condition Occurrence records that form a continuous interval. The condition_concept_id field contains Concepts that are identical to those of the CONDITION_OCCURRENCE table records that make up the Condition Era. In contrast to Drug Eras, Condition Eras are not aggregated to contain Conditions of different hierarchical layers. The SQl Script for generating CONDITION_ERA records can be found here The Condition Era Start Date is the start date of the first Condition Occurrence. The Condition Era End Date is the end date of the last Condition Occurrence. Condition Eras are built with a Persistence Window of 30 days, meaning, if no occurrence of the same condition_concept_id happens within 30 days of any one occurrence, it will be considered the condition_era_end_date.

+
+
+

EPISODE

+

Table Description

+

The EPISODE table aggregates lower-level clinical events (VISIT_OCCURRENCE, DRUG_EXPOSURE, PROCEDURE_OCCURRENCE, DEVICE_EXPOSURE) into a higher-level abstraction representing clinically and analytically relevant disease phases,outcomes and treatments. The EPISODE_EVENT table connects qualifying clinical events (VISIT_OCCURRENCE, DRUG_EXPOSURE, PROCEDURE_OCCURRENCE, DEVICE_EXPOSURE) to the appropriate EPISODE entry. For example cancers including their development over time, their treatment, and final resolution.

+

User Guide

+

Valid Episode Concepts belong to the ‘Episode’ domain. For cancer episodes please see [article], for non-cancer episodes please see [article]. If your source data does not have all episodes that are relevant to the therapeutic area, write only those you can easily derive from the data. It is understood that that table is not currently expected to be comprehensive.

+
+
+

EPISODE_EVENT

+

Table Description

+

The EPISODE_EVENT table connects qualifying clinical events (such as CONDITION_OCCURRENCE, DRUG_EXPOSURE, PROCEDURE_OCCURRENCE, MEASUREMENT) to the appropriate EPISODE entry. For example, linking the precise location of the metastasis (cancer modifier in MEASUREMENT) to the disease episode.

+

User Guide

+

This connecting table is used instead of the FACT_RELATIONSHIP table for linking low-level events to abstracted Episodes.

+

ETL Conventions

+

Some episodes may not have links to any underlying clinical events. For such episodes, the EPISODE_EVENT table is not populated.

+
+
+
+

Metadata Tables

+
+

METADATA

+

Table Description

+

The METADATA table contains metadata information about a dataset that has been transformed to the OMOP Common Data Model.

+
+
+

CDM_SOURCE

+

Table Description

+

The CDM_SOURCE table contains detail about the source database and the process used to transform the data into the OMOP Common Data Model.

+
+
+
+

Vocabulary Tables

+
+

CONCEPT

+

Table Description

+

The Standardized Vocabularies contains records, or Concepts, that uniquely identify each fundamental unit of meaning used to express clinical information in all domain tables of the CDM. Concepts are derived from vocabularies, which represent clinical information across a domain (e.g. conditions, drugs, procedures) through the use of codes and associated descriptions. Some Concepts are designated Standard Concepts, meaning these Concepts can be used as normative expressions of a clinical entity within the OMOP Common Data Model and within standardized analytics. Each Standard Concept belongs to one domain, which defines the location where the Concept would be expected to occur within data tables of the CDM.

+

Concepts can represent broad categories (like ‘Cardiovascular disease’), detailed clinical elements (‘Myocardial infarction of the anterolateral wall’) or modifying characteristics and attributes that define Concepts at various levels of detail (severity of a disease, associated morphology, etc.).

+

Records in the Standardized Vocabularies tables are derived from national or international vocabularies such as SNOMED-CT, RxNorm, and LOINC, or custom Concepts defined to cover various aspects of observational data analysis.

+
+
+

VOCABULARY

+

Table Description

+

The VOCABULARY table includes a list of the Vocabularies collected from various sources or created de novo by the OMOP community. This reference table is populated with a single record for each Vocabulary source and includes a descriptive name and other associated attributes for the Vocabulary.

+
+
+

DOMAIN

+

Table Description

+

The DOMAIN table includes a list of OMOP-defined Domains the Concepts of the Standardized Vocabularies can belong to. A Domain defines the set of allowable Concepts for the standardized fields in the CDM tables. For example, the “Condition” Domain contains Concepts that describe a condition of a patient, and these Concepts can only be stored in the condition_concept_id field of the CONDITION_OCCURRENCE and CONDITION_ERA tables. This reference table is populated with a single record for each Domain and includes a descriptive name for the Domain.

+
+
+

CONCEPT_CLASS

+

Table Description

+

The CONCEPT_CLASS table is a reference table, which includes a list of the classifications used to differentiate Concepts within a given Vocabulary. This reference table is populated with a single record for each Concept Class.

+
+
+

CONCEPT_RELATIONSHIP

+

Table Description

+

The CONCEPT_RELATIONSHIP table contains records that define direct relationships between any two Concepts and the nature or type of the relationship. Each type of a relationship is defined in the RELATIONSHIP table.

+
+
+

RELATIONSHIP

+

Table Description

+

The RELATIONSHIP table provides a reference list of all types of relationships that can be used to associate any two concepts in the CONCEPT_RELATIONSHP table.

+
+
+

CONCEPT_SYNONYM

+

Table Description

+

The CONCEPT_SYNONYM table is used to store alternate names and descriptions for Concepts.

+
+
+

CONCEPT_ANCESTOR

+

Table Description

+

The CONCEPT_ANCESTOR table is designed to simplify observational analysis by providing the complete hierarchical relationships between Concepts. Only direct parent-child relationships between Concepts are stored in the CONCEPT_RELATIONSHIP table. To determine higher level ancestry connections, all individual direct relationships would have to be navigated at analysis time. The CONCEPT_ANCESTOR table includes records for all parent-child relationships, as well as grandparent-grandchild relationships and those of any other level of lineage. Using the CONCEPT_ANCESTOR table allows for querying for all descendants of a hierarchical concept. For example, drug ingredients and drug products are all descendants of a drug class ancestor.

+

This table is entirely derived from the CONCEPT, CONCEPT_RELATIONSHIP and RELATIONSHIP tables.

+
+
+

SOURCE_TO_CONCEPT_MAP

+

Table Description

+

The source to concept map table is a legacy data structure within the OMOP Common Data Model, recommended for use in ETL processes to maintain local source codes which are not available as Concepts in the Standardized Vocabularies, and to establish mappings for each source code into a Standard Concept as target_concept_ids that can be used to populate the Common Data Model tables. The SOURCE_TO_CONCEPT_MAP table is no longer populated with content within the Standardized Vocabularies published to the OMOP community.

+
+
+

DRUG_STRENGTH

+

Table Description

+

The DRUG_STRENGTH table contains structured content about the amount or concentration and associated units of a specific ingredient contained within a particular drug product. This table is supplemental information to support standardized analysis of drug utilization.

+
+
+

COHORT

+

Table Description

+

The COHORT table contains records of subjects that satisfy a given set of criteria for a duration of time. The definition of the cohort is contained within the COHORT_DEFINITION table. It is listed as part of the RESULTS schema because it is a table that users of the database as well as tools such as ATLAS need to be able to write to. The CDM and Vocabulary tables are all read-only so it is suggested that the COHORT and COHORT_DEFINTION tables are kept in a separate schema to alleviate confusion.

+

ETL Conventions

+

Cohorts typically include patients diagnosed with a specific condition, patients exposed to a particular drug, but can also be Providers who have performed a specific Procedure. Cohort records must have a Start Date and an End Date, but the End Date may be set to Start Date or could have an applied censor date using the Observation Period Start Date. Cohort records must contain a Subject Id, which can refer to the Person, Provider, Visit record or Care Site though they are most often Person Ids. The Cohort Definition will define the type of subject through the subject concept id. A subject can belong (or not belong) to a cohort at any moment in time. A subject can only have one record in the cohort table for any moment of time, i.e. it is not possible for a person to contain multiple records indicating cohort membership that are overlapping in time

+
+
+

COHORT_DEFINITION

+

Table Description

+

The COHORT_DEFINITION table contains records defining a Cohort derived from the data through the associated description and syntax and upon instantiation (execution of the algorithm) placed into the COHORT table. Cohorts are a set of subjects that satisfy a given combination of inclusion criteria for a duration of time. The COHORT_DEFINITION table provides a standardized structure for maintaining the rules governing the inclusion of a subject into a cohort, and can store operational programming code to instantiate the cohort within the OMOP Common Data Model.

+
+
+ + + +
+
+ +
+ + + + + + + + + + + + + + + + diff --git a/rmd/cdm54Changes.html b/rmd/cdm54Changes.html new file mode 100644 index 0000000..a0aca28 --- /dev/null +++ b/rmd/cdm54Changes.html @@ -0,0 +1,905 @@ + + + + + + + + + + + + + +Changes by Table + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+
+
+
+ +
+ + + + + + + + + +

from CDM v5.3 -> CDM v5.4

+

For a full description of each table and field listed here, please see the CDM specification.

+

Notation:

+
    +
  • a + indicates an addition to the model, either a table or field
  • +
  • a -> indicates an alteration either in naming or specification
  • +
  • a - indicates a subtraction from the model, either a table or field
  • +
+
+

PERSON

+
    +
  • No change
  • +
+
+
+

OBSERVATION_PERIOD

+
    +
  • No change
  • +
+
+
+

VISIT_OCCURRENCE

+
    +
  • Admitting_source_concept_id -> Admitted_from_concept_id
  • +
  • Admitting_source_value -> Admitted_from_source_value
  • +
  • Discharge_to_concept_id -> Discharged_to_concept_id
  • +
  • Discharge_to_source_value -> Discharged_to_source_value
  • +
+
+
+

VISIT_DETAIL

+
    +
  • Admitting_source_concept_id -> Admitted_from_concept_id
  • +
  • Admitting_source_value -> Admitted_from_source_value
  • +
  • Discharge_to_concept_id -> Discharged_to_concept_id
  • +
  • Discharge_to_source_value -> Discharged_to_source_value
  • +
  • Visit_detail_parent_id -> Parent_visit_detail_id
  • +
+
+
+

CONDITION_OCCURRENCE

+
    +
  • No change
  • +
+
+
+

DRUG_EXPOSURE

+
    +
  • No change
  • +
+
+
+

PROCEDURE_OCCURRENCE

+
    +
  • + Procedure_end_date
  • +
  • + Procedure_end_datetime
  • +
+
+
+

DEVICE_EXPOSURE

+
    +
  • Unique_device_id -> Changed to varchar(255)
  • +
  • + Production_id
  • +
  • + Unit_concept_id
  • +
  • + Unit_source_value
  • +
  • + Unit_source_concept_id
  • +
+
+
+

MEASUREMENT

+
    +
  • + Unit_source_concept_id
  • +
  • + Measurement_event_id
  • +
  • + Meas_event_field_concept_id
  • +
+
+
+

OBSERVATION

+
    +
  • + Value_source_value
  • +
  • + Observation_event_id
  • +
  • + Obs_event_field_concept_id
  • +
+
+
+

DEATH

+
    +
  • No change
  • +
+
+
+

NOTE

+
    +
  • + Note_event_id
  • +
  • + Note_event_field_concept_id
  • +
+
+
+

NOTE_NLP

+
    +
  • No change
  • +
+
+
+

SPECIMEN

+
    +
  • No change
  • +
+
+
+

FACT_RELATIONSHIP

+
    +
  • No change
  • +
+
+
+

LOCATION

+
    +
  • + Country_concept_id
  • +
  • + Country_source_value
  • +
  • + Latitude
  • +
  • + Longitude
  • +
+
+
+

CARE_SITE

+
    +
  • No change
  • +
+
+
+

PAYER_PLAN_PERIOD

+
    +
  • No change
  • +
+
+
+

COST

+
    +
  • No change
  • +
+
+
+

DRUG_ERA

+
    +
  • No change
  • +
+
+
+

DOSE_ERA

+
    +
  • No change
  • +
+
+
+

CONDITION_ERA

+
    +
  • No change
  • +
+
+
+

+ EPISODE

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
EPISODE
episode_id
person_id
episode_concept_id
episode_start_date
episode_start_datetime
episode_end_date
episode_end_datetime
episode_parent_id
episode_number
episode_object_concept_id
episode_type_concept_id
episode_source_value
episode_source_concept_id
+
+
+

+ EPISODE_EVENT

+ + + + + + + + + + + + + + + + + +
EPISODE_EVENT
episode_id
event_id
episode_event_field_concept_id
+
+
+

METADATA

+
    +
  • + Metadata_id
  • +
  • + Value_as_number
  • +
+
+
+

CDM_SOURCE

+
    +
  • Cdm_source_name -> Mandatory field
  • +
  • Cdm_source_abbreviation -> Mandatory field
  • +
  • Cdm_holder -> Mandatory field
  • +
  • Source_release_date -> Mandatory field
  • +
  • Cdm_release_date -> Mandatory field
  • +
  • + Cdm_version_concept_id
  • +
+
+
+

CONCEPT

+
    +
  • No change
  • +
+
+
+

VOCABULARY

+
    +
  • Vocabulary_reference -> Non-mandatory field
  • +
  • Vocabulary_version -> Non-mandatory field
  • +
+
+
+

DOMAIN

+
    +
  • No change
  • +
+
+
+

CONCEPT_CLASS

+
    +
  • No change
  • +
+
+
+

CONCEPT_RELATIONSHIP

+
    +
  • No change
  • +
+
+
+

RELATIONSHIP

+
    +
  • No change
  • +
+
+
+

CONCEPT_SYNONYM

+
    +
  • No change
  • +
+
+
+

CONCEPT_ANCESTOR

+
    +
  • No change
  • +
+
+
+

SOURCE_TO_CONCEPT_MAP

+
    +
  • No change
  • +
+
+
+

DRUG_STRENGTH

+
    +
  • No change
  • +
+
+
+

- ATTRIBUTE_DEFINITION

+
+
+

+ COHORT

+ + + + + + + + + + + + + + + + + + + + +
COHORT
cohort_definition_id
subject_id
cohort_start_date
cohort_end_date
+
+ + + +
+
+ +
+ + + + + + + + + + + + + + + + diff --git a/rmd/cdm60.Rmd b/rmd/cdm60.Rmd index 7e8bebe..fd704e7 100644 --- a/rmd/cdm60.Rmd +++ b/rmd/cdm60.Rmd @@ -102,12 +102,10 @@ for(tb in tables) { loopTable <- subset(cdmSpecsClean, `CDM Table` == tb) loopTable <- subset(loopTable, select = -c(1)) - print(kable(x = loopTable, align = "l", row.names = FALSE, format = "html") %>% - column_spec(1, bold = T) %>% - column_spec(2, width = "3in", include_thead = T) %>% - column_spec(3, width = "4in", include_thead = T) %>% - column_spec(4:9, width = "1in", include_thead = T) %>% - kable_styling(c("condensed","hover"), position = "center", full_width = T, font_size = 13)) + print(kable(x = loopTable, align = "l", row.names = FALSE, format = "html", escape = FALSE) %>% + kable_styling(c("condensed","hover"), position = "center", full_width = T, font_size = 13) %>% + column_spec(1, bold = T)) + } diff --git a/rmd/cdm60.html b/rmd/cdm60.html new file mode 100644 index 0000000..a8802f0 --- /dev/null +++ b/rmd/cdm60.html @@ -0,0 +1,893 @@ + + + + + + + + + + + + + +OMOP CDM v6.0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+
+
+
+ +
+ + + + + + + + + +
+

NOTE ABOUT CDM v6.0

+

Please be aware that v6.0 of the OMOP CDM is not fully supported by the OHDSI suite of tools and methods. The major difference in CDM v5.3 and CDM v6.0 involves switching the *_datetime fields to mandatory rather than optional. This switch radically changes the assumptions related to exposure and outcome timing. Rather than move forward with v6.0, CDM v5.4 was designed with additions to the model that have been requested by the community while retaining the date structure of medical events in v5.3. Please see our the specifications for CDM v5.4 and detailed changes from CDM v5.3. For new collaborators to OHDSI, please transform your data to CDM v5.4 until such time that the v6 series of the CDM is ready for mainstream use.

+

Below is the specification document for the OMOP Common Data Model, v6.0. Each table is represented with a high-level description and ETL conventions that should be followed. This is continued with a discussion of each field in each table, any conventions related to the field, and constraints that should be followed (like primary key, foreign key, etc). Should you have questions please feel free to visit the forums or the github issue page.

+

–after regeneration of DDLs link to csv of cdm link to pdf of cdm documentation link to forum on doc page

+
+
+

Changes in v6.0

+
    +
  • Latitude and Longitude added to LOCATION
  • +
  • Contract owner field added to PAYER_PLAN_PERIOD
  • +
  • All primary keys were changed to bigint
  • +
  • The name of ADMISSION_SOURCE_CONCEPT_ID was changed to ADMITTED_FROM_CONCEPT_ID in VISIT_OCCURRENCE and VISIT_DETAIL
  • +
  • All Concept_Ids are now mandatory except for UNIT_CONCEPT_ID, VALUE_AS_CONCEPT_ID, and OPERATOR_CONCEPT_ID. If there is no value available then a Concept_Id should be set to 0 instead of NULL.
  • +
  • DEATH table removed and DEATH_DATETIME field added to the PERSON table. Cause of death is stored in the CONDITION_OCCURRENCE
  • +
  • DATETIME fields were made mandatory and date fields were made optional.
  • +
+
+
+

Clinical Data Tables

+
+

PERSON

+

Table Description

+

This table serves as the central identity management for all Persons in the database. It contains records that uniquely identify each person or patient, and some demographic information.

+

User Guide

+

All records in this table are independent Persons.

+

ETL Conventions

+

All Persons in a database needs one record in this table, unless they fail data quality requirements specified in the ETL. Persons with no Events should have a record nonetheless. If more than one data source contributes Events to the database, Persons must be reconciled, if possible, across the sources to create one single record per Person. The BIRTH_DATETIME must be equivalent to the content of BIRTH_DAY, BIRTH_MONTH and BIRTH_YEAR. There is a helpful rule listed in table below for how to derive BIRTH_DATETIME if it is not available in the source. New to CDM v6.0 The person’s death date is now stored in this table instead of the separate DEATH table. In the case that multiple dates of death are given in the source data the ETL should make a choice as to which death date to put in the PERSON table. Any additional dates can be stored in the OBSERVATION table using the concept 4265167 which stands for ‘Date of death’ . Similarly, the cause of death is stored in the CONDITION_OCCURRENCE table using the CONDITION_STATUS_CONCEPT_ID 32891 for ‘Cause of death’.

+
+
+

OBSERVATION_PERIOD

+

Table Description

+

This table contains records which define spans of time during which two conditions are expected to hold: (i) Clinical Events that happened to the Person are recorded in the Event tables, and (ii) absense of records indicate such Events did not occur during this span of time.

+

User Guide

+

For each Person, one or more OBSERVATION_PERIOD records may be present, but they will not overlap or be back to back to each other. Events may exist outside all of the time spans of the OBSERVATION_PERIOD records for a patient, however, absence of an Event outside these time spans cannot be construed as evidence of absence of an Event. Incidence or prevalence rates should only be calculated for the time of active OBSERVATION_PERIOD records. When constructing cohorts, outside Events can be used for inclusion criteria definition, but without any guarantee for the performance of these criteria. Also, OBSERVATION_PERIOD records can be as short as a single day, greatly disturbing the denominator of any rate calculation as part of cohort characterizations. To avoid that, apply minimal observation time as a requirement for any cohort definition.

+

ETL Conventions

+

Each Person needs to have at least one OBSERVATION_PERIOD record, which should represent time intervals with a high capture rate of Clinical Events. Some source data have very similar concepts, such as enrollment periods in insurance claims data. In other source data such as most EHR systems these time spans need to be inferred under a set of assumptions. It is the discretion of the ETL developer to define these assumptions. In many ETL solutions the start date of the first occurrence or the first high quality occurrence of a Clinical Event (Condition, Drug, Procedure, Device, Measurement, Visit) is defined as the start of the OBSERVATION_PERIOD record, and the end date of the last occurrence of last high quality occurrence of a Clinical Event, or the end of the database period becomes the end of the OBSERVATOIN_PERIOD for each Person. If a Person only has a single Clinical Event the OBSERVATION_PERIOD record can be as short as one day. Depending on these definitions it is possible that Clinical Events fall outside the time spans defined by OBSERVATION_PERIOD records. Family history or history of Clinical Events generally are not used to generate OBSERVATION_PERIOD records around the time they are referring to. Any two overlapping or adjacent OBSERVATION_PERIOD records have to be merged into one.

+
+
+

VISIT_OCCURRENCE

+

Table Description

+

This table contains Events where Persons engage with the healthcare system for a duration of time. They are often also called “Encounters”. Visits are defined by a configuration of circumstances under which they occur, such as (i) whether the patient comes to a healthcare institution, the other way around, or the interaction is remote, (ii) whether and what kind of trained medical staff is delivering the service during the Visit, and (iii) whether the Visit is transient or for a longer period involving a stay in bed.

+

User Guide

+

The configuration defining the Visit are described by Concepts in the Visit Domain, which form a hierarchical structure, but rolling up to generally familiar Visits adopted in most healthcare systems worldwide:

+
    +
  • Inpatient Visit: Person visiting hospital, at a Care Site, in bed, for duration of more than one day, with physicians and other Providers permanently available to deliver service around the clock
  • +
  • Emergency Room Visit: Person visiting dedicated healthcare institution for treating emergencies, at a Care Site, within one day, with physicians and Providers permanently available to deliver service around the clock
  • +
  • Emergency Room and Inpatient Visit: Person visiting ER followed by a subsequent Inpatient Visit, where Emergency department is part of hospital, and transition from the ER to other hospital departments is undefined
  • +
  • Non-hospital institution Visit: Person visiting dedicated institution for reasons of poor health, at a Care Site, long-term or permanently, with no physician but possibly other Providers permanently available to deliver service around the clock
  • +
  • Outpatient Visit: Person visiting dedicated ambulatory healthcare institution, at a Care Site, within one day, without bed, with physicians or medical Providers delivering service during Visit
  • +
  • Home Visit: Provider visiting Person, without a Care Site, within one day, delivering service
  • +
  • Telehealth Visit: Patient engages with Provider through communication media
  • +
  • Pharmacy Visit: Person visiting pharmacy for dispensing of Drug, at a Care Site, within one day
  • +
  • Laboratory Visit: Patient visiting dedicated institution, at a Care Site, within one day, for the purpose of a Measurement.
  • +
  • Ambulance Visit: Person using transportation service for the purpose of initiating one of the other Visits, without a Care Site, within one day, potentially with Providers accompanying the Visit and delivering service
  • +
  • Case Management Visit: Person interacting with healthcare system, without a Care Site, within a day, with no Providers involved, for administrative purposes
  • +
+

The Visit duration, or ‘length of stay’, is defined as VISIT_END_DATE - VISIT_START_DATE. For all Visits this is <1 day, except Inpatient Visits and Non-hospital institution Visits. The CDM also contains the VISIT_DETAIL table where additional information about the Visit is stored, for example, transfers between units during an inpatient Visit.

+

ETL Conventions

+

Visits can be derived easily if the source data contain coding systems for Place of Service or Procedures, like CPT codes for well visits. In those cases, the codes can be looked up and mapped to a Standard Visit Concept. Otherwise, Visit Concepts have to be identified in the ETL process. This table will contain concepts in the Visit domain. These concepts are arranged in a hierarchical structure to facilitate cohort definitions by rolling up to generally familiar Visits adopted in most healthcare systems worldwide. Visits can be adjacent to each other, i.e. the end date of one can be identical with the start date of the other. As a consequence, more than one-day Visits or their descendants can be recorded for the same day. Multi-day visits must not overlap, i.e. share days other than start and end days. It is often the case that some logic should be written for how to define visits and how to assign Visit_Concept_Id. For example, in US claims outpatient visits that appear to occur within the time period of an inpatient visit can be rolled into one with the same Visit_Occurrence_Id. In EHR data inpatient visits that are within one day of each other may be strung together to create one visit. It will all depend on the source data and how encounter records should be translated to visit occurrences. Providers can be associated with a Visit through the PROVIDER_ID field, or indirectly through PROCEDURE_OCCURRENCE records linked both to the VISIT and PROVIDER tables.

+
+
+

VISIT_DETAIL

+

Table Description

+

The VISIT_DETAIL table is an optional table used to represents details of each record in the parent VISIT_OCCURRENCE table. A good example of this would be the movement between units in a hospital during an inpatient stay or claim lines associated with a one insurance claim. For every record in the VISIT_OCCURRENCE table there may be 0 or more records in the VISIT_DETAIL table with a 1:n relationship where n may be 0. The VISIT_DETAIL table is structurally very similar to VISIT_OCCURRENCE table and belongs to the visit domain.

+

User Guide

+

The configuration defining the Visit Detail is described by Concepts in the Visit Domain, which form a hierarchical structure. The Visit Detail record will have an associated to the Visit Occurrence record in two ways:
1. The Visit Detail record will have the VISIT_OCCURRENCE_ID it is associated to 2. The VISIT_DETAIL_CONCEPT_ID will be a descendant of the VISIT_CONCEPT_ID for the Visit.

+

ETL Conventions

+

It is not mandatory that the VISIT_DETAIL table be filled in, but if you find that the logic to create VISIT_OCCURRENCE records includes the roll-up of multiple smaller records to create one picture of a Visit then it is a good idea to use VISIT_DETAIL. In EHR data, for example, a Person may be in the hospital but instead of one over-arching Visit their encounters are recorded as times they interacted with a health care provider. A Person in the hospital interacts with multiple providers multiple times a day so the encounters must be strung together using some heuristic (defined by the ETL) to identify the entire Visit. In this case the encounters would be considered Visit Details and the entire Visit would be the Visit Occurrence. In this example it is also possible to use the Vocabulary to distinguish Visit Details from a Visit Occurrence by setting the VISIT_CONCEPT_ID to 9201 and the VISIT_DETAIL_CONCEPT_IDs either to 9201 or its children to indicate where the patient was in the hospital at the time of care.

+
+
+

CONDITION_OCCURRENCE

+

Table Description

+

This table contains records of Events of a Person suggesting the presence of a disease or medical condition stated as a diagnosis, a sign, or a symptom, which is either observed by a Provider or reported by the patient.

+

User Guide

+

Conditions are defined by Concepts from the Condition domain, which form a complex hierarchy. As a result, the same Person with the same disease may have multiple Condition records, which belong to the same hierarchical family. Most Condition records are mapped from diagnostic codes, but recorded signs, symptoms and summary descriptions also contribute to this table. Rule out diagnoses should not be recorded in this table, but in reality their negating nature is not always captured in the source data, and other precautions must be taken when when identifying Persons who should suffer from the recorded Condition. Record all conditions as they exist in the source data. Any decisions about diagnosis/phenotype definitions would be done through cohort specifications. These cohorts can be housed in the COHORT table. Conditions span a time interval from start to end, but are typically recorded as single snapshot records with no end date. The reason is twofold: (i) At the time of the recording the duration is not known and later not recorded, and (ii) the Persons typically cease interacting with the healthcare system when they feel better, which leads to incomplete capture of resolved Conditions. The CONDITION_ERA table addresses this issue. Family history and past diagnoses (‘history of’) are not recorded in this table. Instead, they are listed in the OBSERVATION table. Codes written in the process of establishing the diagnosis, such as ‘question of’ of and ‘rule out’, should not represented here. Instead, they should be recorded in the OBSERVATION table, if they are used for analyses. However, this information is not always available.

+

ETL Conventions

+

Source codes and source text fields mapped to Standard Concepts of the Condition Domain have to be recorded here.

+
+
+

DRUG_EXPOSURE

+

Table Description

+

This table captures records about the exposure to a Drug ingested or otherwise introduced into the body. A Drug is a biochemical substance formulated in such a way that when administered to a Person it will exert a certain biochemical effect on the metabolism. Drugs include prescription and over-the-counter medicines, vaccines, and large-molecule biologic therapies. Radiological devices ingested or applied locally do not count as Drugs.

+

User Guide

+

The purpose of records in this table is to indicate an exposure to a certain drug as best as possible. In this context a drug is defined as an active ingredient. Drug Exposures are defined by Concepts from the Drug domain, which form a complex hierarchy. As a result, one DRUG_SOURCE_CONCEPT_ID may map to multiple standard concept ids if it is a combination product. Records in this table represent prescriptions written, prescriptions dispensed, and drugs administered by a provider to name a few. The DRUG_TYPE_CONCEPT_ID can be used to find and filter on these types. This table includes additional information about the drug products, the quantity given, and route of administration.

+

ETL Conventions

+

Information about quantity and dose is provided in a variety of different ways and it is important for the ETL to provide as much information as possible from the data. Depending on the provenance of the data fields may be captured differently i.e. quantity for drugs administered may have a separate meaning from quantity for prescriptions dispensed. If a patient has multiple records on the same day for the same drug or procedures the ETL should not de-dupe them unless there is probable reason to believe the item is a true data duplicate. Take note on how to handle refills for prescriptions written.

+
+
+

PROCEDURE_OCCURRENCE

+

Table Description

+

This table contains records of activities or processes ordered by, or carried out by, a healthcare provider on the patient with a diagnostic or therapeutic purpose.

+

User Guide

+

Lab tests are not a procedure, if something is observed with an expected resulting amount and unit then it should be a measurement. Phlebotomy is a procedure but so trivial that it tends to be rarely captured. It can be assumed that there is a phlebotomy procedure associated with many lab tests, therefore it is unnecessary to add them as separate procedures. If the user finds the same procedure over concurrent days, it is assumed those records are part of a procedure lasting more than a day. This logic is in lieu of the procedure_end_date, which will be added in a future version of the CDM.

+

ETL Conventions

+

If a procedure lasts more than a day, then it should be recorded as a separate record for each day the procedure occurred, this logic is in lieu of the PROCEDURE_END_DATE, which will be added in a future version of the CDM. When dealing with duplicate records, the ETL must determine whether to sum them up into one record or keep them separate. Things to consider are: - Same Procedure - Same PROCEDURE_DATETIME - Same Visit Occurrence or Visit Detail - Same Provider - Same Modifier for Procedures. Source codes and source text fields mapped to Standard Concepts of the Procedure Domain have to be recorded here.

+
+
+

DEVICE_EXPOSURE

+

Table Description

+

The Device domain captures information about a person’s exposure to a foreign physical object or instrument which is used for diagnostic or therapeutic purposes through a mechanism beyond chemical action. Devices include implantable objects (e.g. pacemakers, stents, artificial joints), medical equipment and supplies (e.g. bandages, crutches, syringes), other instruments used in medical procedures (e.g. sutures, defibrillators) and material used in clinical care (e.g. adhesives, body material, dental material, surgical material).

+

User Guide

+

The distinction between Devices or supplies and Procedures are sometimes blurry, but the former are physical objects while the latter are actions, often to apply a Device or supply.

+

ETL Conventions

+

Source codes and source text fields mapped to Standard Concepts of the Device Domain have to be recorded here.

+
+
+

MEASUREMENT

+

Table Description

+

The MEASUREMENT table contains records of Measurements, i.e. structured values (numerical or categorical) obtained through systematic and standardized examination or testing of a Person or Person’s sample. The MEASUREMENT table contains both orders and results of such Measurements as laboratory tests, vital signs, quantitative findings from pathology reports, etc. Measurements are stored as attribute value pairs, with the attribute as the Measurement Concept and the value representing the result. The value can be a Concept (stored in VALUE_AS_CONCEPT), or a numerical value (VALUE_AS_NUMBER) with a Unit (UNIT_CONCEPT_ID). The Procedure for obtaining the sample is housed in the PROCEDURE_OCCURRENCE table, though it is unnecessary to create a PROCEDURE_OCCURRENCE record for each measurement if one does not exist in the source data. Measurements differ from Observations in that they require a standardized test or some other activity to generate a quantitative or qualitative result. If there is no result, it is assumed that the lab test was conducted but the result was not captured.

+

User Guide

+

Measurements are predominately lab tests with a few exceptions, like blood pressure or function tests. Results are given in the form of a value and unit combination. When investigating measurements, look for operator_concept_ids (<, >, etc.).

+

ETL Conventions

+

Only records where the source value maps to a Concept in the measurement domain should be included in this table. Even though each Measurement always has a result, the fields VALUE_AS_NUMBER and VALUE_AS_CONCEPT_ID are not mandatory as often the result is not given in the source data. When the result is not known, the Measurement record represents just the fact that the corresponding Measurement was carried out, which in itself is already useful information for some use cases. For some Measurement Concepts, the result is included in the test. For example, ICD10 CONCEPT_ID 45548980 ‘Abnormal level of unspecified serum enzyme’ indicates a Measurement and the result (abnormal). In those situations, the CONCEPT_RELATIONSHIP table in addition to the ‘Maps to’ record contains a second record with the relationship_id set to ‘Maps to value’. In this example, the ‘Maps to’ relationship directs to 4046263 ‘Enzyme measurement’ as well as a ‘Maps to value’ record to 4135493 ‘Abnormal’.

+
+
+

OBSERVATION

+

Table Description

+

The OBSERVATION table captures clinical facts about a Person obtained in the context of examination, questioning or a procedure. Any data that cannot be represented by any other domains, such as social and lifestyle facts, medical history, family history, etc. are recorded here. New to CDM v6.0 An Observation can now be linked to other records in the CDM instance using the fields OBSERVATION_EVENT_ID and OBS_EVENT_FIELD_CONCEPT_ID. To link another record to an Observation, the primary key goes in OBSERVATION_EVENT_ID (CONDITION_OCCURRENCE_ID, DRUG_EXPOSURE_ID, etc.) and the Concept representing the field where the OBSERVATION_EVENT_ID was taken from go in the OBS_EVENT_FIELD_CONCEPT_ID. For example, a CONDITION_OCCURRENCE of Asthma might be linked to an Observation of a family history of Asthma. In this case the CONDITION_OCCURRENCE_ID of the Asthma record would go in OBSERVATION_EVENT_ID of the family history record and the CONCEPT_ID 1147127 would go in OBS_EVENT_FIELD_CONCEPT_ID to denote that the OBSERVATION_EVENT_ID represents a CONDITION_OCCURRENCE_ID.

+

User Guide

+

Observations differ from Measurements in that they do not require a standardized test or some other activity to generate clinical fact. Typical observations are medical history, family history, the stated need for certain treatment, social circumstances, lifestyle choices, healthcare utilization patterns, etc. If the generation clinical facts requires a standardized testing such as lab testing or imaging and leads to a standardized result, the data item is recorded in the MEASUREMENT table. If the clinical fact observed determines a sign, symptom, diagnosis of a disease or other medical condition, it is recorded in the CONDITION_OCCURRENCE table. Valid Observation Concepts are not enforced to be from any domain though they still should be Standard Concepts.

+

ETL Conventions

+

Records whose Source Values map to any domain besides Condition, Procedure, Drug, Measurement or Device should be stored in the Observation table. Observations can be stored as attribute value pairs, with the attribute as the Observation Concept and the value representing the clinical fact. This fact can be a Concept (stored in VALUE_AS_CONCEPT), a numerical value (VALUE_AS_NUMBER), a verbatim string (VALUE_AS_STRING), or a datetime (VALUE_AS_DATETIME). Even though Observations do not have an explicit result, the clinical fact can be stated separately from the type of Observation in the VALUE_AS_* fields. It is recommended for Observations that are suggestive statements of positive assertion should have a value of ‘Yes’ (concept_id=4188539), recorded, even though the null value is the equivalent.

+
+
+

NOTE

+

Table Description

+

The NOTE table captures unstructured information that was recorded by a provider about a patient in free text (in ASCII, or preferably in UTF8 format) notes on a given date. The type of note_text is CLOB or varchar(MAX) depending on RDBMS.

+

ETL Conventions

+

HL7/LOINC CDO is a standard for consistent naming of documents to support a range of use cases: retrieval, organization, display, and exchange. It guides the creation of LOINC codes for clinical notes. CDO annotates each document with 5 dimensions:

+
    +
  • Kind of Document: Characterizes the general structure of the document at a macro level (e.g. Anesthesia Consent)
  • +
  • Type of Service: Characterizes the kind of service or activity (e.g. evaluations, consultations, and summaries). The notion of time sequence, e.g., at the beginning (admission) at the end (discharge) is subsumed in this axis. Example: Discharge Teaching.
  • +
  • Setting: Setting is an extension of CMS’s definitions (e.g. Inpatient, Outpatient)
  • +
  • Subject Matter Domain (SMD): Characterizes the subject matter domain of a note (e.g. Anesthesiology)
  • +
  • Role: Characterizes the training or professional level of the author of the document, but does not break down to specialty or subspecialty (e.g. Physician) Each combination of these 5 dimensions rolls up to a unique LOINC code.
  • +
+

According to CDO requirements, only 2 of the 5 dimensions are required to properly annotate a document; Kind of Document and any one of the other 4 dimensions. However, not all the permutations of the CDO dimensions will necessarily yield an existing LOINC code. Each of these dimensions are contained in the OMOP Vocabulary under the domain of ‘Meas Value’ with each dimension represented as a Concept Class.

+
+
+

NOTE_NLP

+

Table Description

+

The NOTE_NLP table encodes all output of NLP on clinical notes. Each row represents a single extracted term from a note.

+
+
+

SPECIMEN

+

Table Description

+

The specimen domain contains the records identifying biological samples from a person.

+

ETL Conventions

+

Anatomic site is coded at the most specific level of granularity possible, such that higher level classifications can be derived using the Standardized Vocabularies.

+
+
+

FACT_RELATIONSHIP

+

Table Description

+

The FACT_RELATIONSHIP table contains records about the relationships between facts stored as records in any table of the CDM. Relationships can be defined between facts from the same domain, or different domains. Examples of Fact Relationships include: Person relationships (parent-child), care site relationships (hierarchical organizational structure of facilities within a health system), indication relationship (between drug exposures and associated conditions), usage relationships (of devices during the course of an associated procedure), or facts derived from one another (measurements derived from an associated specimen).

+

ETL Conventions

+

All relationships are directional, and each relationship is represented twice symmetrically within the FACT_RELATIONSHIP table. For example, two persons if person_id = 1 is the mother of person_id = 2 two records are in the FACT_RELATIONSHIP table (all strings in fact concept_id records in the Concept table: - Person, 1, Person, 2, parent of - Person, 2, Person, 1, child of

+
+
+

SURVEY_CONDUCT

+

Table Description

+

The SURVEY_CONDUCT table is used to store an instance of a completed survey or questionnaire.

+

User Guide

+

This table captures details of the individual questionnaire such as who completed it, when it was completed and to which patient treatment or visit it relates to (if any).

+

ETL Conventions

+

Each SURVEY has a SURVEY_CONCEPT_ID, a concept in the CONCEPT table identifying the questionnaire e.g. EQ5D, VR12, SF12. Each questionnaire should exist in the CONCEPT table. Each SURVEY can be optionally related to a specific Visit in order to link it both to the Visit during which it was completed and any subsequent Visit where treatment was assigned based on the patient’s responses.

+
+
+
+

Health System Data Tables

+
+

LOCATION

+

Table Description

+

The LOCATION table represents a generic way to capture physical location or address information of Persons and Care Sites. New to CDM v6.0 The LOCATION table now includes latitude and longitude.

+

ETL Conventions

+

Each address or Location is unique and is present only once in the table. Locations do not contain names, such as the name of a hospital. In order to construct a full address that can be used in the postal service, the address information from the Location needs to be combined with information from the Care Site. For standardized geospatial visualization and analysis, addresses need to be, at the minimum be geocoded into latitude and longitude.

+
+
+

LOCATION_HISTORY

+

Table Description

+

The LOCATION HISTORY table stores relationships between Persons or Care Sites and geographic locations over time. This table is new to CDM v6.0

+
+
+

CARE_SITE

+

Table Description

+

The CARE_SITE table contains a list of uniquely identified institutional (physical or organizational) units where healthcare delivery is practiced (offices, wards, hospitals, clinics, etc.).

+

ETL Conventions

+

Care site is a unique combination of location_id and place_of_service_source_value. Care site does not take into account the provider (human) information such a specialty. Many source data do not make a distinction between individual and institutional providers. The CARE_SITE table contains the institutional providers. If the source, instead of uniquely identifying individual Care Sites, only provides limited information such as Place of Service, generic or “pooled” Care Site records are listed in the CARE_SITE table. There can be hierarchical and business relationships between Care Sites. For example, wards can belong to clinics or departments, which can in turn belong to hospitals, which in turn can belong to hospital systems, which in turn can belong to HMOs.The relationships between Care Sites are defined in the FACT_RELATIONSHIP table.

+
+
+

PROVIDER

+

Table Description

+

The PROVIDER table contains a list of uniquely identified healthcare providers. These are individuals providing hands-on healthcare to patients, such as physicians, nurses, midwives, physical therapists etc.

+

User Guide

+

Many sources do not make a distinction between individual and institutional providers. The PROVIDER table contains the individual providers. If the source, instead of uniquely identifying individual providers, only provides limited information such as specialty, generic or ‘pooled’ Provider records are listed in the PROVIDER table.

+
+
+
+

Health Economics Data Tables

+
+

PAYER_PLAN_PERIOD

+

Table Description

+

The PAYER_PLAN_PERIOD table captures details of the period of time that a Person is continuously enrolled under a specific health Plan benefit structure from a given Payer. Each Person receiving healthcare is typically covered by a health benefit plan, which pays for (fully or partially), or directly provides, the care. These benefit plans are provided by payers, such as health insurances or state or government agencies. In each plan the details of the health benefits are defined for the Person or her family, and the health benefit Plan might change over time typically with increasing utilization (reaching certain cost thresholds such as deductibles), plan availability and purchasing choices of the Person. The unique combinations of Payer organizations, health benefit Plans and time periods in which they are valid for a Person are recorded in this table.

+

User Guide

+

A Person can have multiple, overlapping, Payer_Plan_Periods in this table. For example, medical and drug coverage in the US can be represented by two Payer_Plan_Periods. The details of the benefit structure of the Plan is rarely known, the idea is just to identify that the Plans are different.

+
+
+

COST

+

Table Description

+

The COST table captures records containing the cost of any medical event recorded in one of the OMOP clinical event tables such as DRUG_EXPOSURE, PROCEDURE_OCCURRENCE, VISIT_OCCURRENCE, VISIT_DETAIL, DEVICE_OCCURRENCE, OBSERVATION or MEASUREMENT.

+

Each record in the cost table account for the amount of money transacted for the clinical event. So, the COST table may be used to represent both receivables (charges) and payments (paid), each transaction type represented by its COST_CONCEPT_ID. The COST_TYPE_CONCEPT_ID field will use concepts in the Standardized Vocabularies to designate the source (provenance) of the cost data. A reference to the health plan information in the PAYER_PLAN_PERIOD table is stored in the record for information used for the adjudication system to determine the persons benefit for the clinical event.

+

User Guide

+

When dealing with summary costs, the cost of the goods or services the provider provides is often not known directly, but derived from the hospital charges multiplied by an average cost-to-charge ratio.

+

ETL Conventions

+

One cost record is generated for each response by a payer. In a claims databases, the payment and payment terms reported by the payer for the goods or services billed will generate one cost record. If the source data has payment information for more than one payer (i.e. primary insurance and secondary insurance payment for one entity), then a cost record is created for each reporting payer. Therefore, it is possible for one procedure to have multiple cost records for each payer, but typically it contains one or no record per entity. Payer reimbursement cost records will be identified by using the PAYER_PLAN_ID field. Drug costs are composed of ingredient cost (the amount charged by the wholesale distributor or manufacturer), the dispensing fee (the amount charged by the pharmacy and the sales tax).

+
+
+
+

Standardized Derived Elements

+
+

DRUG_ERA

+

Table Description

+

A Drug Era is defined as a span of time when the Person is assumed to be exposed to a particular active ingredient. A Drug Era is not the same as a Drug Exposure: Exposures are individual records corresponding to the source when Drug was delivered to the Person, while successive periods of Drug Exposures are combined under certain rules to produce continuous Drug Eras.

+

ETL Conventions

+

The SQL script for generating DRUG_ERA records can be found here.

+
+
+

DOSE_ERA

+

Table Description

+

A Dose Era is defined as a span of time when the Person is assumed to be exposed to a constant dose of a specific active ingredient.

+

ETL Conventions

+

Dose Eras will be derived from records in the DRUG_EXPOSURE table and the Dose information from the DRUG_STRENGTH table using a standardized algorithm. Dose Form information is not taken into account. So, if the patient changes between different formulations, or different manufacturers with the same formulation, the Dose Era is still spanning the entire time of exposure to the Ingredient.

+
+
+

CONDITION_ERA

+

Table Description

+

A Condition Era is defined as a span of time when the Person is assumed to have a given condition. Similar to Drug Eras, Condition Eras are chronological periods of Condition Occurrence. Combining individual Condition Occurrences into a single Condition Era serves two purposes:

+
    +
  • It allows aggregation of chronic conditions that require frequent ongoing care, instead of treating each Condition Occurrence as an independent event.
  • +
  • It allows aggregation of multiple, closely timed doctor visits for the same Condition to avoid double-counting the Condition Occurrences. For example, consider a Person who visits her Primary Care Physician (PCP) and who is referred to a specialist. At a later time, the Person visits the specialist, who confirms the PCP’s original diagnosis and provides the appropriate treatment to resolve the condition. These two independent doctor visits should be aggregated into one Condition Era.
  • +
+

ETL Conventions

+

Each Condition Era corresponds to one or many Condition Occurrence records that form a continuous interval. The condition_concept_id field contains Concepts that are identical to those of the CONDITION_OCCURRENCE table records that make up the Condition Era. In contrast to Drug Eras, Condition Eras are not aggregated to contain Conditions of different hierarchical layers. The SQl Script for generating CONDITION_ERA records can be found here The Condition Era Start Date is the start date of the first Condition Occurrence. The Condition Era End Date is the end date of the last Condition Occurrence. Condition Eras are built with a Persistence Window of 30 days, meaning, if no occurrence of the same condition_concept_id happens within 30 days of any one occurrence, it will be considered the condition_era_end_date.

+
+
+
+

Metadata Tables

+
+

METADATA

+

Table Description

+

The METADATA table contains metadata information about a dataset that has been transformed to the OMOP Common Data Model.

+
+
+

CDM_SOURCE

+

Table Description

+

The CDM_SOURCE table contains detail about the source database and the process used to transform the data into the OMOP Common Data Model.

+
+
+
+

Vocabulary Tables

+
+

CONCEPT

+

Table Description

+

The Standardized Vocabularies contains records, or Concepts, that uniquely identify each fundamental unit of meaning used to express clinical information in all domain tables of the CDM. Concepts are derived from vocabularies, which represent clinical information across a domain (e.g. conditions, drugs, procedures) through the use of codes and associated descriptions. Some Concepts are designated Standard Concepts, meaning these Concepts can be used as normative expressions of a clinical entity within the OMOP Common Data Model and within standardized analytics. Each Standard Concept belongs to one domain, which defines the location where the Concept would be expected to occur within data tables of the CDM.

+

Concepts can represent broad categories (like ‘Cardiovascular disease’), detailed clinical elements (‘Myocardial infarction of the anterolateral wall’) or modifying characteristics and attributes that define Concepts at various levels of detail (severity of a disease, associated morphology, etc.).

+

Records in the Standardized Vocabularies tables are derived from national or international vocabularies such as SNOMED-CT, RxNorm, and LOINC, or custom Concepts defined to cover various aspects of observational data analysis.

+
+
+

VOCABULARY

+

Table Description

+

The VOCABULARY table includes a list of the Vocabularies collected from various sources or created de novo by the OMOP community. This reference table is populated with a single record for each Vocabulary source and includes a descriptive name and other associated attributes for the Vocabulary.

+
+
+

DOMAIN

+

Table Description

+

The DOMAIN table includes a list of OMOP-defined Domains the Concepts of the Standardized Vocabularies can belong to. A Domain defines the set of allowable Concepts for the standardized fields in the CDM tables. For example, the “Condition” Domain contains Concepts that describe a condition of a patient, and these Concepts can only be stored in the condition_concept_id field of the CONDITION_OCCURRENCE and CONDITION_ERA tables. This reference table is populated with a single record for each Domain and includes a descriptive name for the Domain.

+
+
+

CONCEPT_CLASS

+

Table Description

+

The CONCEPT_CLASS table is a reference table, which includes a list of the classifications used to differentiate Concepts within a given Vocabulary. This reference table is populated with a single record for each Concept Class.

+
+
+

CONCEPT_RELATIONSHIP

+

Table Description

+

The CONCEPT_RELATIONSHIP table contains records that define direct relationships between any two Concepts and the nature or type of the relationship. Each type of a relationship is defined in the RELATIONSHIP table.

+
+
+

RELATIONSHIP

+

Table Description

+

The RELATIONSHIP table provides a reference list of all types of relationships that can be used to associate any two concepts in the CONCEPT_RELATIONSHP table.

+
+
+

CONCEPT_SYNONYM

+

Table Description

+

The CONCEPT_SYNONYM table is used to store alternate names and descriptions for Concepts.

+
+
+

CONCEPT_ANCESTOR

+

Table Description

+

The CONCEPT_ANCESTOR table is designed to simplify observational analysis by providing the complete hierarchical relationships between Concepts. Only direct parent-child relationships between Concepts are stored in the CONCEPT_RELATIONSHIP table. To determine higher level ancestry connections, all individual direct relationships would have to be navigated at analysis time. The CONCEPT_ANCESTOR table includes records for all parent-child relationships, as well as grandparent-grandchild relationships and those of any other level of lineage. Using the CONCEPT_ANCESTOR table allows for querying for all descendants of a hierarchical concept. For example, drug ingredients and drug products are all descendants of a drug class ancestor.

+

This table is entirely derived from the CONCEPT, CONCEPT_RELATIONSHIP and RELATIONSHIP tables.

+
+
+

SOURCE_TO_CONCEPT_MAP

+

Table Description

+

The source to concept map table is a legacy data structure within the OMOP Common Data Model, recommended for use in ETL processes to maintain local source codes which are not available as Concepts in the Standardized Vocabularies, and to establish mappings for each source code into a Standard Concept as target_concept_ids that can be used to populate the Common Data Model tables. The SOURCE_TO_CONCEPT_MAP table is no longer populated with content within the Standardized Vocabularies published to the OMOP community.

+
+
+

DRUG_STRENGTH

+

Table Description

+

The DRUG_STRENGTH table contains structured content about the amount or concentration and associated units of a specific ingredient contained within a particular drug product. This table is supplemental information to support standardized analysis of drug utilization.

+
+
+

COHORT

+

Table Description

+

The COHORT table contains records of subjects that satisfy a given set of criteria for a duration of time. The definition of the cohort is contained within the COHORT_DEFINITION table. It is listed as part of the RESULTS schema because it is a table that users of the database as well as tools such as ATLAS need to be able to write to. The CDM and Vocabulary tables are all read-only so it is suggested that the COHORT and COHORT_DEFINTION tables are kept in a separate schema to alleviate confusion.

+

ETL Conventions

+

Cohorts typically include patients diagnosed with a specific condition, patients exposed to a particular drug, but can also be Providers who have performed a specific Procedure. Cohort records must have a Start Date and an End Date, but the End Date may be set to Start Date or could have an applied censor date using the Observation Period Start Date. Cohort records must contain a Subject Id, which can refer to the Person, Provider, Visit record or Care Site though they are most often Person Ids. The Cohort Definition will define the type of subject through the subject concept id. A subject can belong (or not belong) to a cohort at any moment in time. A subject can only have one record in the cohort table for any moment of time, i.e. it is not possible for a person to contain multiple records indicating cohort membership that are overlapping in time

+
+
+

COHORT_DEFINITION

+

Table Description

+

The COHORT_DEFINITION table contains records defining a Cohort derived from the data through the associated description and syntax and upon instantiation (execution of the algorithm) placed into the COHORT table. Cohorts are a set of subjects that satisfy a given combination of inclusion criteria for a duration of time. The COHORT_DEFINITION table provides a standardized structure for maintaining the rules governing the inclusion of a subject into a cohort, and can store operational programming code to instantiate the cohort within the OMOP Common Data Model.

+
+
+ + + +
+
+ +
+ + + + + + + + + + + + + + + + diff --git a/rmd/cdmPrivacy.html b/rmd/cdmPrivacy.html new file mode 100644 index 0000000..392f369 --- /dev/null +++ b/rmd/cdmPrivacy.html @@ -0,0 +1,681 @@ + + + + + + + + + + + + + +Preserving Privacy in an OMOP CDM Implementation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+
+
+
+ +
+ + + + + + + + + +

By Kristin Kostka

+
+

Background

+

The OMOP CDM is a person-centric model. Being person-centric means the model can retain attributes that may be considered personal identified information (PII) or protected health information (PHI). There are many different ways a site may treat their OMOP CDM to uphold their privacy protocols. In this article we provide guidance on overall process and the potential fields that should be monitored to adhere to these various privacy preserving protocols.

+
+

Defining PII and PHI

+
    +
  • PII is defined as any representation of information that permits the identity of an individual to whom the information applies to be reasonably inferred to either direct or indirect means [1].
  • +
  • The United States Department of Health & Human Services´ Office for Civil Rights has defined PHI as any Personal Identifying Information (PII) that – individually or combined – could potentially identify a specific individual, their past, present or future healthcare, or the method of payment. There are eighteen unique identifiers considered to be PHI: 1) names, 2) geographic data, 3) all elements of dates, 4) telephone numbers, 5) FAX numbers, 6) email addresses, 7) Social Security numbers (SSN), 8) medical record numbers (MRN), 9) health plan beneficiary numbers, 10) account numbers, 11) certificate/license numbers, 12) vehicle identifiers and serial numbers including license places, 13) device identifiers and serial numbers, 14) web URLs, 15) internet protocol addresses, 16) biometric identifiers (i.e. retinal scan, fingerprints), 17) full face photos and comparable images, and 18) any unique identifying number, characteristic or code. PHI is no longer considered PHI when it de-identified of these unique attributes. PHI is commonly referred to in relation to the Health Insurance Portability and Accountability Act (HIPAA) and associated legislation such as the Health Information Technology for Economic and Clinical Health Act (HITECH) [2].
  • +
+
+
+
+

The Data Holder’s Responsibility

+

In OHDSI, it is the responsibility of each data holder to know, understand and follow local data governance processes related to use of the OMOP CDM. In the United States, these processes will follow your organization’s local interpretation for maintaining compliance to PII and PHI protection. In OMOP CDM implementations containing European Union citizen data, local governance processes will include measures to comply with General Data Protection Regulation (GDPR) [3]. As a community, the OHDSI data network covers more than 330 databases from 34 countries. There is extensive community knowledge on the interpretation of rule sets and exemplar IRB and local governance workflows that can be made available to institutions navigating these processes for the first time. If your organization does not have an established data governance process, please reach out on the OHDSI Forums under “Implementers” and the community can respond with shared guidance from their own deployments. As a community, we aim to conduct research that keeps patient-level data local and share only aggregate results.

+
+
+

Complying with Privacy Preservation

+

Complying with local governance processes depends on the rule set being used. There may be allowable times when data use agreements and data transfer agreements exist between collaborating institutions to facilitate sharing of PII and PHI. In this section we will discuss common rule sets that organizations adhere to.

+
+

Limited Data Sets

+

A limited data set (LDS) is defined as protected health information that excludes certain direct identifiers of an individual or of relatives, employers or household members of the individual — but may include city, state, ZIP code and elements of dates. A LDS can be disclosed only for purposes of research, public health or health care operations. LDS requirements are dictated by the HIPAA Privacy Rule.

+
+
+

De-identified Data Sets

+

A de-identified data, as defined by Section 164.514(a) of the HIPAA Privacy rule, is health information that does not identify an individual and with respect to which there is no reasonable basis to believe that the information can be used to identify an individual is not individually identifiable health information. There are two methods for achieving de-identification in accordance with HIPAA [4].

+
    +
  1. Expert Determination (§164.514(a))- Implementation specifications: requirements for de-identification of protected health information. A covered entity may determine that health information is not individually identifiable health information only if:
  2. +
+
    +
  1. A person with appropriate knowledge of and experience with generally accepted statistical and scientific principles and methods for rendering information not individually identifiable:
  2. +
+
    +
  1. Applying such principles and methods, determines that the risk is very small that the information could be used, alone or in combination with other reasonably available information, by an anticipated recipient to identify an individual who is a subject of the information; and
  2. +
  3. Documents the methods and results of the analysis that justify such determination.
  4. +
+
    +
  1. Safe Harbor (§164.514(b)) The eighteen unique identifiers are obfuscated. This includes processes such as:
  2. +
+
    +
  1. Dates of service are algorithmically shifted to protect patient privacy.
  2. +
  3. Patient ZIP codes are truncated to the first three digits or removed entirely if the ZIP code represents fewer than 20,000 individuals.
  4. +
  5. Removing and, when necessary, replacing unique identifiers AND The entity does not have actual knowledge that the information could be used alone or in combination with other information to identify an individual who is a subject of the information.
  6. +
+
+
+

Field-level Implications of De-identification Processes

+
+

PERSON Table Attributes

+

In the OMOP CDM, the PERSON table serves as the central identity management for all Persons in the database. It contains records that uniquely identify each person or patient, and some demographic information. It is a table that has a number of field-level implications for privacy preserving protocols.

+

Considerations include: - PERSON.person_id should never contain Medical Record Number, Social Security Number or similar uniquely identifiable number. This should be a number that is essentially meaningless but has the ability to be a primary key across tables. - PERSON.year_of_birth, PERSON.month_of_birth and PERSON.date_of_birth, PERSON.birth_datetime may require some redaction or modification depending on interpretation of rule set. Consult local guidance on the need to modify these fields when creating compliant views of de-identified data. - PERSON.person_source_value may contain sensitive information used to generate the person_id field. It is advised to practice caution when creating views of these data. It would be wise to obfuscate or redact this field if you are not sure what is contained in the raw information being extracted, transformed and loaded into the CDM.

+
+
+
+

Date Fields Across Domains

+

Date fields are used across many OMOP domains including: OBSERVATION_PERIOD, VISIT_OCCURRENCE, VISIT_DETAIL, CONDITION_OCCURRENCE, DRUG_EXPOSURE, PROCEDURE_OCCURRENCE, DEVICE_EXPOSURE, MEASUREMENT, OBSERVATION, DEATH, NOTE, NOTE_NLP, SPECIMEN, PAYER_PLAN_PERIOD, DRUG_ERA, DOSE_ERA, and CONDITION_ERA.

+

As discussed previously, some rule sets may require algorithmically shifting dates. It is advised that when date shifting is applied, it is done holistically. This means that when shifting dates, you should not treat each record independently. Instead, a robust date shifting algorithm will link off the .person_id (where is the domain name such as CONDITION_OCCURRENCE, etc) and apply the same offset to all events. This allows researchers to have the ability to understand the sequence of events while preserving patient privacy.

+

The implications of not holistically shifting all events together by the same offset means that information may be out of sequence or illogical. An example would be a death record that happens prior to other event records (conditions, drugs, procedures, etc). When applying an algorithmic shift of dates, it is important to educate your OMOP CDM user group of the known offset. This is especially important in temporal studies which may be looking to make statements about disease history relative to the time when an event is observed.

+

Some rule sets do not require algorithmic shifting of dates, such as Limited Data Sets. In these situations, a user of a LDS OMOP CDM would not be expecting dates to the shifted. If a shift is applied, it should be disclosed and the offset amount (e.g. +/- 7 days, +/- 30 days, etc) should be made available to those who have received permission to use a LDS dataset. Otherwise, these data are not upholding the assumptions of the rule set applied.

+
+
+

LOCATION Table Attributes

+

The LOCATION table represents a generic way to capture physical location or address information of Persons and Care Sites. When applying privacy preserving procedures, this table should be reviewed and scrubbed relative to the rule set. The LOCATION.zip field should be redacted relative to the type of process applied (e.g. 3-digit zip for de-identified data). The LOCATION.location_source_value should be reviewed for potential PII/PHI. It would be wise to obfuscate or redact this field if you are not sure what is contained in the raw information being extracted, transformed and loaded into the CDM.

+
+
+

PROVIDER Table Attributes

+

The PROVIDER table contains a list of uniquely identified healthcare providers. These are individuals providing hands-on healthcare to patients, such as physicians, nurses, midwives, physical therapists etc. In some privacy preserving processes, the PROVIDER.npi and PROVIDER.dea fields may be redacted. Please review this field and confirm that you are adhering to privacy rule sets. The PROVIDER.year_of_birth field is an optional field that may also require treatment in certain rule sets.

+
+
+

OBSERVATION Table Attributes

+

The OBSERVATION table captures clinical facts about a Person obtained in the context of examination, questioning or a procedure. Any data that cannot be represented by any other domains, such as social and lifestyle facts, medical history, family history, etc. are recorded here.

+

We strongly caution ETL teams to review the OBSERVATION table for potential PII/PHI. In some source systems, there can be information coming in these vocabularies that are not laboratory or clinical observations but instead are patient identifiers. If you search in ATHENA, you will find there are a number of standard terms in the SNOMED and LOINC vocabularies that can represent phone numbers, emails, and other PII information.

+

It is difficult to create an exhaustive list of terms because these ontologies do not maintain or publish lists of terms that may contain patient identifiers. It is, therefore, up to data holders to perform a review of this domain with an eye for these potential privacy issues. The National COVID Cohort Collaborative, a NIH consortium which uses the OMOP CDM, has published a resource for sites needing assistance with identifying these potentially problematic records. A “live” version of this table that will track updates over time is hosted at https://github.com/data2health/next-gen-data-sharing/blob/master/CodesWithPPIPotential.csv. We welcome additions to this list from the community.

+
+
+

Scrutinizing *_source_value

+

The *_source_value (where * is the domain name such as CONDITION_OCCURRENCE, etc) fields present an opportunity for sites to carry forward potential PII/PHI in ETL processes. Because the convention of the OMOP CDM has minimal boundaries on what are retained in these fields, it is important to treat all source_value fields as potentially containing PII/PHI. We highly advise all data holders to scrutinize these fields when applying privacy preserving processes. It is not uncommon for fields to be overloaded and contain potential patient identifiers. Please use caution when transmitting or making views of these fields available to users.

+
+
+

Scrutinizing String Fields

+

Across OMOP Domains, there are many fields which permit the use of strings (e.g. DRUG_EXPOSURE.sig, MEASUREMENT.value_as_string, OBSERVATION.value_as_string, We’ve discussed some of these in prior sections. It is advised that fields with strings often have the potential to contain unintentional PII/PHI. Targeted regular expressions can be built into ETL processes in order to “sniff” out any additional PII (or potential PII)–such as any data in the format of a phone number, or a person or place name. (E.g., the regular expression “Mr.|Mrs.|Dr.|, M.?D.?” will find any string with an English name prefix.) Depending on risk tolerance, the expressions could err toward sensitivity or specificity, and could be tweaked over time to meet different rule sets.

+

Extensive regular expression matching during ETL may add significant processing time and should therefore not be relied upon as a sole solution, but rather an extra protection against edge cases. Other algorithmic rules may also prove useful, such as automatically quarantining records with lengthy string values (which could signal the presence of free text). If these approaches are implemented, records that match the regular expressions or rules can be quarantined in a separate table or staging area to be manually reviewed by a data broker. Thus, in addition to adding another layer of PII protection, another advantage of these approaches is the potential to uncover ways that underlying vocabularies may be contributing to unintentional sharing of PII and create awareness for future privacy preserving processes shared across the community.

+
+
+

NOTE and NOTE_NLP Table Attributes

+

The NOTE table captures unstructured information that was recorded by a provider about a patient in free text (in ASCII, or preferably in UTF8 format) notes on a given date. The NOTE_NLP table encodes all output of NLP on clinical notes. Each row represents a single extracted term from a note. There is a high potential these tables may retain information that is considered PII/PHI. In addition to overall string searching, these tables are likely to be dropped altogether to adhere to the most stringent rule sets.

+

It is highly advised that if you are conducting a study with NOTE and NOTE_NLP table information, please consult with your local governance and privacy officers to ensure compliance with local rule sets.

+
+
+

Conclusion

+

Privacy preserving processes are not one-size fits all. There are many different rule sets that can be applied to datasets. Data holders are recommended to consult with their local privacy officer(s) to ensure all processes applied to a database are compliant with local interpretation of the selected rule set.

+
+
+
+

References

+
    +
  1. BibText version: @MISC{noauthor_undated-mg, title = “Guidance on the Protection of Personal Identifiable Information”, abstract = “Personal Identifiable Information (PII) is defined as:”, howpublished = “”, note = “Accessed: 2021-8-18” }
  2. +
+

Regular citation: Guidance on the Protection of Personal Identifiable Information. [cited 18 Aug 2021]. Available: https://www.dol.gov/general/ppii

+
    +
  1. BibText version: @MISC{HIPAA_Journal2017-yo, title = “What Does {PHI} Stand For?”, author = “{HIPAA Journal}”, abstract = “PHI is a term used in connection with health data, but what does PHI stand for? What information is included in the definition of PHI.”, month = dec, year = 2017, howpublished = “”, note = “Accessed: 2021-8-18”, language = “en” }
  2. +
+

Regular citation: HIPAA Journal. What Does PHI Stand For? 23 Dec 2017 [cited 18 Aug 2021]. Available: https://www.hipaajournal.com/what-does-phi-stand-for/

+
    +
  1. BibText version:
  2. +
+

@MISC{noauthor_2018-mt, title = “General Data Protection Regulation ({GDPR}) Compliance Guidelines”, abstract = “The EU General Data Protection Regulation went into effect on May 25, 2018, replacing the Data Protection Directive 95/46/EC. Designed to increase data privacy for EU citizens, the regulation levies steep fines on organizations that don’t follow the law.”, month = jun, year = 2018, howpublished = “”, note = “Accessed: 2021-8-18”, language = “en” }

+

Regular citation: General Data Protection Regulation (GDPR) Compliance Guidelines. 18 Jun 2018 [cited 18 Aug 2021]. Available: https://gdpr.eu/

+
    +
  1. BibText version:
  2. +
+

@MISC{Office_for_Civil_Rights_OCR_undated-zy, title = “Methods for De-identification of {PHI}”, author = “{Office for Civil Rights (OCR)}”, abstract = “Guidance about methods and approaches to achieve de-identification in accordance with the Health Insurance Portability and Accountability Act of 1996.”, howpublished = “”, note = “Accessed: 2021-8-19” }

+

Regular citation: Office for Civil Rights (OCR). Methods for De-identification of PHI. [cited 19 Aug 2021]. Available: https://www.hhs.gov/hipaa/for-professionals/privacy/special-topics/de-identification/index.html

+
+ + + +
+
+ +
+ + + + + + + + + + + + + + + + diff --git a/rmd/cdmRPackage.html b/rmd/cdmRPackage.html new file mode 100644 index 0000000..c28a7cc --- /dev/null +++ b/rmd/cdmRPackage.html @@ -0,0 +1,497 @@ + + + + + + + + + + + + + +CDM R Package Demo + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + +

Below is an example showing two different ways the CDM R package can be used to create the tables in your environment. First, it uses the buildRelease function to create the DDL files on your machine. Once you have these downloaded you can then run them in your SQL client to set up the empty CDM tables. Second, the script shows the executeDdl function that will connect up to your SQL client directly (assuming your dbms is one of the supported dialects) and instantiate the tables through R.

+
## First, install the package from GitHub
+install.packages("devtools")
+devtools::install_github("OHDSI/CommonDataModel")
+
+## List the currently supported SQL dialects
+CommonDataModel::listSupportedDialects()
+
+## List the currently supported CDM versions
+CommonDataModel::listSupportedVersions()
+
+## There are multiple ways to generate the DDLs
+
+## 1. Use the buildRelease function to generate the text files in the dialect you choose.
+##    This function will put the output files in the folder you specify
+
+CommonDataModel::buildRelease(cdmVersions = "5.4",
+                              targetDialects = "postgresql",
+                              outputfolder = "/pathToOutput")
+
+## 2. If you have an empty schema ready to go, the package will connect and instantiate
+##    the tables for you.
+
+### 2a. To start, you need to download DatabaseConnector in order to connect to your database.
+devtools::install_github("DatabaseConnector")
+
+cd <- DatabaseConnector::createConnectionDetails(dbms = "postgresql",
+                                                 server = "localhost/ohdsi",
+                                                 user = "postgres",
+                                                 password = "postgres",
+                                                 pathToDriver = "/pathToDriver"
+                                                 )
+
+CommonDataModel::executeDdl(connectionDetails = cd,
+                            cdmVersion = "5.4",
+                            cdmDatabaseSchema = "ohdsi_demo"
+                            )
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/rmd/cdmRefreshProcess.html b/rmd/cdmRefreshProcess.html new file mode 100644 index 0000000..5dc366f --- /dev/null +++ b/rmd/cdmRefreshProcess.html @@ -0,0 +1,610 @@ + + + + + + + + + + + + + +cdmRefreshProcess.knit + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+
+
+
+ +
+ + + + + + + + + +

The OMOP Common Data Model is managed by the OHDSI CDM Working Group. The formal remit of the CDM Working Group (WG) is to hear proposals for change, ratifying only those with valid use cases and data to support them. Then, once ratified, these proposals are incorporated into the next version of the CDM. In the past, this was done by the WG alone. The group would invite others from around the community to present use cases for change and suggestions for improvement. The WG would then vote on the proposals and a new CDM version would be released after a certain period of time or if enough proposals were voted in. This approach worked when the community was smaller but as it is growing rapidly the CDM WG needed to adapt the refresh cycle such that everyone has an opportunity to weigh in on the proposed changes.

+
+

CDM Refresh Cycle

+

+
+

1. Define New Version [Completed for v5.4]

+

The image above describes the new CDM refresh cycle. It begins with defining a new version. This has been completed for the current cycle. Issues and proposals on the github were considered during a 4-hour workshop where it was decided the next CDM version will be CDM v5.4, building off of CDM v5.3. The group then participated in a rapid-fire voting activity to identify which changes should be incorporated into CDM v5.4. Any items that were not unanimously agreed upon were then discussed in small groups to hone the proposal and suggestions were presented back to the group. The final roadmap for CDM v5.4 can be found here.

+

Looking to open a proposal to change or augment the CDM? Please open an issue and use the proposal template.

+
+
+

2. Sign off from Work Groups [Completed for v5.4]

+

Each member of the CDM WG is a liaison for another workgroup in the community. They are responsible for presenting the proposed changes to the CDM and collecting the feedback. This has resulted in very helpful suggestions from the EHR, Data Quality, Device, HADES, and ACHILLES groups. This outreach has proven to be very effective and should result in a very stable version.

+
+
+

3. Release DDLs

+

After all changes and suggestions are agreed upon by the community and work groups the next step is to generate the DDLs. The CDM WG hosted a hackathon on August 18-19, 2021. During this time the group created an R package to automatically generate the DDLs and the code to instantiate an empty CDM instance. Changes were made to v5.3 to generate v5.4 and the repository was refactored.

+
+
+

4. Software Update

+

There will be period of time once the DDLs are ready to allow the software and methods developers to prepare for the official release of the CDM. This is meant to serve as a buffer so that once the community starts adopting the new model, the tools and methods will be ready to support it.

+
+
+

5. Community Support

+

This is the final stage of the CDM refresh cycle. Once the DDLs are ready and the software and tools supports the new version, the CDM WG will work to help the community convert their data to the new model.

+
+
+
+

CDM WG Meeting Information

+

The CDM working group meets the first and third Tuesday of the month. See below for links to the meetings.

+

Every first Tuesday of the month at 1pm est Teams Meeting

+

Every third Tuesday of the month at 1pm est Teams Meeting

+

Note These were recently changed from a Skype meeting to a Microsoft Teams meeting. If you do you have access to the OHDSI Teams Tenet, please contact Clair Blacketer at .

+ +
+ + + +
+
+ +
+ + + + + + + + + + + + + + + + diff --git a/rmd/contribute.html b/rmd/contribute.html new file mode 100644 index 0000000..eba0c36 --- /dev/null +++ b/rmd/contribute.html @@ -0,0 +1,579 @@ + + + + + + + + + + + + + +Ask A Question + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+
+
+
+ +
+ + + + + + + + + +
+

Check the Forum

+

Chances are someone else has already asked your question on the forum and it has been answered.

+
+
+

Check the Issue Log

+

If you think you’ve found a bug you can check to see if someone also had that same problem on our issue page of the Common Data Model github. If you don’t see your issue feel free to open one. Try and be as specific as possible. What error message occurs? Can you provide a simple example as to why you are requesting a change to the model?

+
+
+

Join meetings

+

Please come to meetings, ask your question and contribute to the discussion! All details can be found on our home page.

+
+ + + +
+
+ +
+ + + + + + + + + + + + + + + + diff --git a/rmd/dataModelConventions.html b/rmd/dataModelConventions.html new file mode 100644 index 0000000..ecccbbd --- /dev/null +++ b/rmd/dataModelConventions.html @@ -0,0 +1,900 @@ + + + + + + + + + + + + + +dataModelConventions.knit + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+
+
+
+ +
+ + + + + + + + + +
+

Data Model Conventions

+

There are a number of implicit and explicit conventions that have been adopted in the CDM. Developers of methods that run against the CDM need to understand these conventions.

+
+

General

+

The OMOP CDM is platform-independent. Data types are defined generically using ANSI SQL data types (VARCHAR, INTEGER, FLOAT, DATE, DATETIME, CLOB). Precision is provided only for VARCHAR. It reflects the minimal required string length and can be expanded within a CDM instantiation. The CDM does not prescribe the date and datetime format. Standard queries against CDM may vary for local instantiations and date/datetime configurations.

+
+

Tables

+

For the tables of the main domains of the CDM it is imperative that concepts used are strictly limited to the domain. For example, the CONDITION_OCCURRENCE table contains only information about conditions (diagnoses, signs, symptoms), but no information about procedures. Not all source coding schemes adhere to such rules. For example, ICD-9-CM codes, which contain mostly diagnoses of human disease, also contain information about the status of patients having received a procedure. The ICD-9-CM code V20.3 ‘Newborn health supervision’ defines a continuous procedure and is therefore stored in the PROCEDURE_OCCURRENCE table.

+
+
+

Fields

+

Variable names across all tables follow one convention:

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NotationDescription
_SOURCE_VALUEVerbatim information from the source data, typically used in ETL to map to CONCEPT_ID, and not to be used by any standard analytics. For example, CONDITION_SOURCE_VALUE = ‘787.02’ was the ICD-9 code captured as a diagnosis from the administrative claim.
_IDUnique identifiers for key entities, which can serve as foreign keys to establish relationships across entities. For example, PERSON_ID uniquely identifies each individual. VISIT_OCCURRENCE_ID uniquely identifies a PERSON encounter at a point of care.
_CONCEPT_IDForeign key into the Standardized Vocabularies (i.e. the standard_concept attribute for the corresponding term is true), which serves as the primary basis for all standardized analytics. For example, CONDITION_CONCEPT_ID = 31967 contains the reference value for the SNOMED concept of ‘Nausea’
_SOURCE_CONCEPT_IDForeign key into the Standardized Vocabularies representing the concept and terminology used in the source data, when applicable. For example, CONDITION_SOURCE_CONCEPT_ID = 45431665 denotes the concept of ‘Nausea’ in the Read terminology; the analogous CONDITION_CONCEPT_ID might be 31967, since SNOMED-CT is the Standardized Vocabulary for most clinical diagnoses and findings.
_TYPE_CONCEPT_IDDelineates the origin of the source information, standardized within the Standardized Vocabularies. For example, DRUG_TYPE_CONCEPT_ID can allow analysts to discriminate between ‘Pharmacy dispensing’ and ‘Prescription written’
+
+
+
+

Vocabulary

+
+

Concepts

+

Concepts in the Common Data Model are derived from a number of public or proprietary terminologies such as SNOMED-CT and RxNorm, or custom generated to standardize aspects of observational data. Both types of Concepts are integrated based on the following rules:

+
    +
  • All Concepts are maintained centrally by the CDM and Vocabularies Working Group. Additional concepts can be added, as needed, upon request by creating a Github issue.
  • +
  • For all Concepts, whether they are custom generated or adopted from published terminologies, a unique numeric identifier concept_id is assigned and used as the key to link all observational data to the corresponding Concept reference data.
  • +
  • The concept_id of a Concept is persistent, i.e. stays the same for the same Concept between releases of the Standardized Vocabularies.
  • +
  • A descriptive name for each Concept is stored as the Concept Name as part of the CONCEPT table. Additional names and descriptions for the Concept are stored as Synonyms in the CONCEPT_SYNONYM table.
  • +
  • Each Concept is assigned to a Domain. For Standard Concepts, there is always a single Domain. Source Concepts can be composite or coordinated entities, and therefore can belong to more than one Domain. The domain_id field of the record contains the abbreviation of the Domain, or Domain combination. Please refer to the Standardized Vocabularies specification for details of the Domain Assignment.
  • +
  • Concept Class designations are attributes of Concepts. Each Vocabulary has its own set of permissible Concept Classes, although the same Concept Class can be used by more than one Vocabulary. Depending on the Vocabulary, the Concept Class may categorize Concepts vertically (parallel) or horizontally (hierarchically). See the specification of each vocabulary for details.
  • +
  • Concept Class attributes should not be confused with Classification Concepts. These are separate Concepts that have a hierarchical relationship to Standard Concepts or each other, while Concept Classes are unique Vocabulary-specific attributes for each Concept.
  • +
  • For Concepts inherited from published terminologies, the source code is retained in the concept_code field and can be used to reference the source vocabulary.
  • +
  • Standard Concepts (designated as ‘S’ in the standard_concept field) may appear in CDM tables in all *_concept_id fields, whereas Classification Concepts (‘C’) should not appear in the CDM data, but participate in the construction of the CONCEPT_ANCESTOR table and can be used to identify Descendants that may appear in the data. See CONCEPT_ANCESTOR table. Non-standard Concepts can only appear in *_source_concept_id fields and are not used in CONCEPT_ANCESTOR table. Please refer to the Standardized Vocabularies specifications for details of the Standard Concept designation.
  • +
  • The lifespan of a Concept is recorded through its valid_start_date, valid_end_date and the invalid_ reason fields. This allows Concepts to correctly reflect at which point in time were defined. Usually, Concepts get deprecated if their meaning was deemed ambiguous, a duplication of another Concept, or needed revision for scientific reason. For example, drug ingredients get updated when different salt or isomer variants enter the market. Usually, drugs taken off the market do not cause a deprecation by the terminology vendor. Since observational data are valid with respect to the time they are recorded, it is key for the Standardized Vocabularies to provide even obsolete codes and maintain their relationships to other current Concepts .
  • +
  • Concepts without a known instantiated date are assigned valid_start_date of ‘1-Jan-1970’.
  • +
  • Concepts that are not invalid are assigned valid_end_date of ‘31-Dec-2099’.
  • +
  • Deprecated Concepts (with a valid_end_date before the release date of the Standardized Vocabularies) will have a value of ‘D’ (deprecated without successor) or ‘U’ (updated). The updated Concepts have a record in the CONCEPT_RELATIONSHIP table indicating their active replacement Concept.
  • +
  • Values for concept_ids generated as part of Standardized Vocabularies will be reserved from 0 to 2,000,000,000. Above this range, concept_ids are available for local use and are guaranteed not to clash with future releases of the Standardized Vocabularies.
  • +
+
+
+

Vocabularies

+
    +
  • There is one record for each Vocabulary. One Vocabulary source or vendor can issue several Vocabularies, each of them creating their own record in the VOCABULARY table. However, the choice of whether a Vocabulary contains Concepts of different Concept Classes, or when these different classes constitute separate Vocabularies cannot precisely be decided based on the definition of what constitutes a Vocabulary. For example, the ICD-9 Volume 1 and 2 codes (ICD9CM, containing predominantly conditions and some procedures and observations) and the ICD-9 Volume 3 codes (ICD9Proc, containing predominantly procedures) are realized as two different Vocabularies. On the other hand, SNOMED-CT codes of the class Condition and those of the class Procedure are part of one and the same Vocabulary. Please refer to the Standardized Vocabularies specifications for details of each Vocabulary.
  • +
  • The vocabulary_id field contains an alphanumerical identifier, that can also be used as the abbreviation of the Vocabulary name.
  • +
  • The record with vocabulary_id = ‘None’ is reserved to contain information regarding the current version of the Entire Standardized Vocabularies.
  • +
  • The vocabulary_name field contains the full official name of the Vocabulary, as well as the source or vendor in parenthesis.
  • +
  • Each Vocabulary has an entry in the CONCEPT table, which is recorded in the vocabulary_concept_id field. This is for purposes of creating a closed Information Model, where all entities in the OMOP CDM are covered by a unique Concept.
  • +
+
+
+

Domains

+
    +
  • There is one record for each Domain. The domains are defined by the tables and fields in the OMOP CDM that can contain Concepts describing all the various aspects of the healthcare experience of a patient.
  • +
  • The domain_id field contains an alphanumerical identifier, that can also be used as the abbreviation of the Domain.
  • +
  • The domain_name field contains the unabbreviated names of the Domain.
  • +
  • Each Domain also has an entry in the Concept table, which is recorded in the domain_concept_id field. This is for purposes of creating a closed Information Model, where all entities in the OMOP CDM are covered by unique Concept.
  • +
  • Versions prior to v5.0.0 of the OMOP CDM did not support the notion of a Domain.
  • +
+
+
+

Concept Classes

+
    +
  • There is one record for each Concept Class. Concept Classes are used to create additional structure to the Concepts within each Vocabulary. Some Concept Classes are unique to a Vocabulary (for example “Clinical Finding” in SNOMED), but others can be used across different Vocabularies. The separation of Concepts through Concept Classes can be semantically horizontal (each Class subsumes Concepts of the same hierarchical level, akin to sub-Vocabularies within a Vocabulary) or vertical (each Class subsumes Concepts of a certain kind, going across hierarchical levels). For example, Concept Classes in SNOMED are vertical: The classes “Procedure” and “Clinical Finding” define very granular to very generic Concepts. On the other hand, “Clinical Drug” and “Ingredient” Concept Classes define horizontal layers or strata in the RxNorm vocabulary, which all belong to the same concept of a Drug.
  • +
  • The concept_class_id field contains an alphanumerical identifier, that can also be used as the abbreviation of the Concept Class.
  • +
  • The concept_class_name field contains the unabbreviated names of the Concept Class.
  • +
  • Each Concept Class also has an entry in the Concept table, which is recorded in the concept_ class_concept_id field. This is for purposes of creating a closed Information Model, where all entities in the OMOP CDM are covered by unique Concepts.
  • +
+
+
+

Concept Relationships

+
    +
  • Relationships can generally be classified as hierarchical (parent-child) or non-hierarchical (lateral).
  • +
  • All Relationships are directional, and each Concept Relationship is represented twice symmetrically within the CONCEPT_RELATIONSHIP table. For example, the two SNOMED concepts of ‘Acute myocardial infarction of the anterior wall’ and ‘Acute myocardial infarction’ have two Concept Relationships: +
      +
    • ‘Acute myocardial infarction of the anterior wall’ ‘Is a’ ‘Acute myocardial infarction’, and
    • +
    • ‘Acute myocardial infarction’ ‘Subsumes’ ‘Acute myocardial infarction of the anterior wall’.
    • +
  • +
  • There is one record for each Concept Relationship connecting the same Concepts with the same relationship_id.
  • +
  • Since all Concept Relationships exist with their mirror image (concept_id_1 and concept_id_2 swapped, and the relationship_id replaced by the reverse_relationship_id from the RELATIONSHIP table), it is not necessary to query for the existence of a relationship both in the concept_id_1 and concept_id_2 fields.
  • +
  • Concept Relationships define direct relationships between Concepts. Indirect relationships through 3rd Concepts are not captured in this table. However, the CONCEPT_ANCESTOR table does this for hierachical relationships over several “generations” of direct relationships.
  • +
  • In previous versions of the CDM, the relationship_id used to be a numerical identifier. See the RELATIONSHIP table.
  • +
+
+
+

Relationship Table

+
    +
  • There is one record for each Relationship.
  • +
  • Relationships are classified as hierarchical (parent-child) or non-hierarchical (lateral)
  • +
  • They are used to determine which concept relationship records should be included in the computation of the CONCEPT_ANCESTOR table.
  • +
  • The relationship_id field contains an alphanumerical identifier, that can also be used as the abbreviation of the Relationship.
  • +
  • The relationship_name field contains the unabbreviated names of the Relationship.
  • +
  • Relationships all exist symmetrically, i.e. in both direction. The relationship_id of the opposite Relationship is provided in the reverse_relationship_id field.
  • +
  • Each Relationship also has an equivalent entry in the Concept table, which is recorded in the relationship_ concept_id field. This is for purposes of creating a closed Information Model, where all entities in the OMOP CDM are covered by unique Concepts.
  • +
  • Hierarchical Relationships are used to build a hierarchical tree out of the Concepts, which is recorded in the CONCEPT_ANCESTOR table. For example, “has_ingredient” is a Relationship between Concept of the Concept Class ‘Clinical Drug’ and those of ‘Ingredient’, and all Ingredients can be classified as the “parental” hierarchical Concepts for the drug products they are part of. All ‘Is a’ Relationships are hierarchical.
  • +
  • Relationships, also hierarchical, can be between Concepts within the same Vocabulary or those adopted from different Vocabulary sources.
  • +
+
+
+

Concept Synonyms

+
    +
  • The concept_synonym_name field contains a valid Synonym of a concept, including the description in the concept_name itself. I.e. each Concept has at least one Synonym in the CONCEPT_SYNONYM table. As an example, for a SNOMED-CT Concept, if the fully specified name is stored as the concept_name of the CONCEPT table, then the Preferred Term and Synonyms associated with the Concept are stored in the CONCEPT_SYNONYM table.
  • +
  • Only Synonyms that are active and current are stored in the CONCEPT_SYNONYM table. Tracking synonym/description history and mapping of obsolete synonyms to current Concepts/Synonyms is out of scope for the Standard Vocabularies.
  • +
  • Currently, only English Synonyms are included.
  • +
+
+
+

Concept Ancestor

+
    +
  • Each concept is also recorded as an ancestor of itself.
  • +
  • Only valid and Standard Concepts participate in the CONCEPT_ANCESTOR table. It is not possible to find ancestors or descendants of deprecated or Source Concepts.
  • +
  • Usually, only Concepts of the same Domain are connected through records of the CONCEPT_ANCESTOR table, but there might be exceptions.
  • +
+
+
+

Source to Concept Map

+
    +
  • This table is no longer used to distribute mapping information between source codes and Standard Concepts for the Standard Vocabularies. Instead, the CONCEPT_RELATIONSHIP table is used for this purpose, using the relationship_id=‘Maps to’.
  • +
  • However, this table can still be used for the translation of local source codes into Standard Concepts.
  • +
  • Note: This table should not be used to translate source codes to Source Concepts. The source code of a Source Concept is captured in its concept_code field. If the source codes used in a given database do not follow correct formatting the ETL will have to perform this translation. For example, if ICD-9-CM codes are recorded without a dot the ETL will have to perform a lookup function that allows identifying the correct ICD-9-CM Source Concept (with the dot in the concept_code field).
  • +
  • The source_concept_id, or the combination of the fields source_code and the source_vocabulary_id uniquely identifies the source information. It is the equivalent to the concept_id_1 field in the CONCEPT_RELATIONSHIP table.
  • +
  • If there is no source_concept_id available because the source codes are local and not supported by the Standard Vocabulary, the content of the field is 0 (zero, not null) encoding an undefined concept. However, local Source Concepts are established (concept_id values above 2,000,000,000).
  • +
  • The source_code_description contains an optional description of the source code.
  • +
  • The target_concept_id contains the Concept the source code is mapped to. It is equivalent to the concept_id_2 in the CONCEPT_RELATIONSHIP table
  • +
  • The target_vocabulary_id field contains the vocabulary_id of the target concept. It is a duplication of the same information in the CONCEPT record of the Target Concept.
  • +
  • The fields valid_start_date, valid_end_date and invalid_reason are used to define the life cycle of the mapping information. Invalid mapping records should not be used for mapping information.
  • +
+
+
+

Drug Strength

+
    +
  • The DRUG_STRENGTH table contains information for each active (non-deprecated) Standard Drug Concept.
  • +
  • A drug which contains multiple active Ingredients will result in multiple DRUG_STRENGTH records, one for each active ingredient.
  • +
  • Ingredient strength information is provided either as absolute amount (usually for solid formulations) or as concentration (usually for liquid formulations).
  • +
  • If the absolute amount is provided (for example, ‘Acetaminophen 5 MG Tablet’) the amount_value and amount_unit_concept_id are used to define this content (in this case 5 and ‘MG’).
  • +
  • If the concentration is provided (for example ‘Acetaminophen 48 MG/ML Oral Solution’) the numerator_ value in combination with the numerator_unit_concept_id and denominator_unit_concept_id are used to define this content (in this case 48, ‘MG’ and ‘ML’).
  • +
  • In case of Quantified Clinical or Branded Drugs the denominator_value contains the total amount of the solution (not the amount of the ingredient). In all other drug concept classes the denominator amount is NULL because the concentration is always normalized to the unit of the denominator. So, a product containing 960 mg in 20 mL is provided as 48 mg/mL in the Clinical Drug and Clinical Drug Component, while as a Quantified Clinical Drug it is written as 960 mg/20 mL.
  • +
  • If the strength is provided in % (volume or mass-percent are not distinguished) it is stored in the numerator_value/numerator_unit_concept_id field combination, with both the denominator_value and denominator_unit_concept_id set to NULL. If it is a Quantified Drug the total amount of drug is provided in the denominator_value/denominator_unit_concept_id pair. E.g., the 30 G Isoconazole 2% Topical Cream is provided as 2% / in Clinical Drug and Clinical Drug Component, and as 2% /30 G.
  • +
  • Sometimes, one Ingredient is listed with different units within the same drug. This is very rare, and usually this happens if there are more than one Precise Ingredient. For example, ‘Penicillin G, 30 Benzathine 150000 UNT/ML / Penicillin G, Procaine 150000 MEQ/ML Injectable Suspension’ contains Penicillin G in two different forms.
  • +
  • Sometimes, different ingredients in liquid drugs are listed with different units in the denominator_ unit_concept_id. This is usually the case if the ingredients are liquids themselves (concentration provided as mL/mL) or solid substances (mg/mg). In these cases, the general assumptions is made that the density of the drug is that of water, and one can assume 1 g = 1 mL.
  • +
  • All Drug vocabularies containing Standard Concepts have entries in the DRUG_STRENGTH table.
  • +
  • There is now a Concept Class for supplier information whose relationships can be found in CONCEPT_ RELATIONSHIP with a relationship_id of ‘Has supplier’ and ‘Supplier of’
  • +
+
+
+
+

Mapping

+
+

Representing content as Concepts

+

In CDM data tables the meaning of the content of each record is represented using Concepts. Concepts are stored with their CONCEPT_ID as foreign keys to the CONCEPT table in the Standardized Vocabularies, which contains Concepts necessary to describe the healthcare experience of a patient. If a Standard Concept does not exist or cannot be identified, the Concept with the CONCEPT_ID 0 is used, representing a non-existing or un-mappable concept.

+

Records in the CONCEPT table contain all the detailed information about it (name, domain, class etc.). Concepts, Concept Relationships and other information relating to Concepts is contained in the tables of the Standardized Vocabularies.

+
+
+

Concept IDs and Source Values

+

Many tables contain equivalent information multiple times: As a Source Value, a Source Concept and as a Standard Concept.

+
    +
  • Source Values contain the codes from public code systems such as ICD-9-CM, NDC, CPT-4 etc. or locally controlled vocabularies (such as F for female and M for male) copied from the source data. Source Values are stored in the *_SOURCE_VALUE fields in the data tables.
  • +
  • Concepts are CDM-specific entities that represent the meaning of a clinical fact. Most concepts are based on code systems used in healthcare (called Source Concepts), while others were created de-novo (CONCEPT_CODE = ‘OMOP generated’). Concepts have unique IDs across all domains.
  • +
  • Source Concepts are the concepts that represent the code used in the source. Source Concepts are only used for common healthcare code systems, not for OMOP-generated Concepts. Source Concepts are stored in the *_SOURCE_CONCEPT_ID field in the data tables.
  • +
  • Standard Concepts are those concepts that are used to define the unique meaning of a clinical entity. For each entity there is one Standard Concept. Standard Concepts are typically drawn from existing public vocabulary sources. Concepts that have the equivalent meaning to a Standard Concept are mapped to the Standard Concept. Standard Concepts are referred to in the _CONCEPT_ID field of the data tables.
  • +
+

Source Values are only provided for convenience and quality assurance (QA) purposes. Source Values and Source Concepts are optional, while Standard Concepts are mandatory. Source Values may contain information that is only meaningful in the context of a specific data source.

+
+
+

Type Concepts

+

By Mik Kallfelz and Dmitry Dymshyts

+

Type Concepts (ending in _TYPE_CONCEPT_ID) are present in many tables. They are special Concepts with the purpose of indicating from where the data are derived in the source. For example, the Type Concept field can be used to distinguish a DRUG_EXPOSURE record that is derived from a pharmacy-dispensing claim from one indicative of a prescription written in an electronic health record (EHR).

+
    +
  • Type concepts help determining the provenance of a record in the OMOP CDM. Many tables hold a specific _type_concept_id field for which valid concepts can be used to indicate a particular source of that record. For a condition it can be helpful to know, if it was derived from an EHR system or insurance claims. For a drug exposure it should be very useful to distinguish between prescriptions and actual administrations.
  • +
  • In respect to the target table, matching type concepts should be chosen during the ETL step while processing sources. There are various representations in the list of type concepts of which some are quite specific for one table and others can be applied for many tables / domains, as they are quite generic. There is however no plausibility check or dependency between type concepts and tables which means they have to be chosen correctly.
  • +
  • There is now one specific vocabulary for type concepts which replaced a number of previously existing tables. For example, where previously there was a dedicated vocabulary for Drug Type Concepts, now we would choose the respective ones from the overall vocabulary (and ignore some of the old ones):
  • +
+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Drug TypeType Concept
Inpatient administrationEHR administration record
Physician administered drug (identified from EHR problem list)EHR administration record
Physician administered drug (identified from referral record)EHR administration record
Physician administered drug (identified from EHR observation)EHR administration record
Physician administered drug (identified from EHR order)EHR administration record
Prescription dispensed in pharmacyEHR dispensing record
Dispensed in Outpatient officeEHR dispensing record
Medication list entryEHR medication list
Prescription writtenEHR prescription
EHR prescription issue record
Prescription dispensed through mail orderMail order record
NLP derivedNLP
Patient Self-Reported MedicationPatient self-report
Physician administered drug (identified as procedure)Pharmacy claim
Drug era - 0 days persistence window
Drug era - 30 days persistence window
Randomized Drug
+
+
+

Time span of available data

+

Data tables for clinical data contain a datetime stamp (ending in _DATETIME, _START_DATETIME or _END_DATETIME), indicating when that clinical event occurred. As a rule, no record can be outside of a valid OBSERVATION_PERIOD time period. Clinical information that relates to events that happened prior to the first OBSERVATION_PERIOD will be captured as a record in the OBSERVATION table as ‘Medical history’ (CONCEPT_ID = 43054928), with the OBSERVATION_DATETIME set to the first OBSERVATION_PERIOD_START_DATE of that patient, and the VALUE_AS_CONCEPT_ID set to the corresponding CONCEPT_ID for the condition/drug/procedure that occurred in the past. No data occurring after the last OBSERVATION_PERIOD_END_DATE can be valid records in the CDM. * When mapping source data to the CDM, if time is unknown the default time of 00:00:00 should be chosen.

+
+
+

Source Values, Source Concept Ids, and Standard Concept Ids

+

Each table contains fields for Source Values, Source Concept Ids, and Standard Concept Ids.

+
    +
  • Source Values are fields to maintain the verbatim information from the source database, stored as unstructured text, and are generally not to be used by any standardized analytics. There is no standardization for these fields and these columns can be used as the local CDM builders see fit. A typical example would be an ICD-9 code without the decimal from an administrative claim as condition_source_value = ‘78702’ which is how it appeared in the source.
  • +
  • Source Concept Ids provide a repeatable representation of the source concept, when the source data are drawn from a commonly-used, internationally-recognized vocabulary that has been distributed with the OMOP Common Data Model. Specific use cases where source vocabulary-specific analytics are required can be accommodated by the use of the *_SOURCE_CONCEPT_ID fields, but these are generally not applicable across disparate data sources. The standard *_CONCEPT_ID fields are strongly suggested to be used in all standardized analytics, as specific vocabularies have been established within each data domain to facilitate standardization of both structure and content within the OMOP Common Data Model.
  • +
+

The following provide conventions for processing source data using these three fields in each domain:

+

When processing data where the source value is either free text or a reference to a coding scheme that is not contained within the Standardized Vocabularies:

+
    +
  • Map all Source Values to the corresponding Standard CONCEPT_IDs. Store the CONCEPT_IDs in the TARGET_CONCEPT_ID field of the SOURCE_TO_CONCEPT_MAP table. +
      +
    • If a CONCEPT_ID is not available for the source code, the TARGET_CONCEPT_ID field is set to 0.
    • +
  • +
+

When processing your data where Source Value is a reference to a coding scheme contained within the Standardized Vocabularies:

+
    +
  • Find all CONCEPT_IDs in the Source Vocabulary that correspond to your Source Values. Store the result in the SOURCE_CONCEPT_ID field. +
      +
    • If the source code follows the same formatting as the distributed vocabulary, the mapping can be directly obtained from the CONCEPT table using the CONCEPT_CODE field.
    • +
    • If the source code uses alternative formatting (ex. format has removed decimal point from ICD-9 codes), you will need to perform the formatting transformation within the ETL. In this case, you may wish to store the mappings from original codes to SOURCE_CONCEPT_IDs in the SOURCE_TO_CONCEPT_MAP table.
    • +
    • If the source code is not found in a vocabulary, the SOURCE_CONCEPT_ID field is set to 0
    • +
  • +
  • Use the CONCEPT_RELATIONSHIP table to identify the Standard CONCEPT_ID that corresponds to the SOURCE_CONCEPT_ID in the domain. +
      +
    • Each SOURCE_CONCEPT_ID can have 1 or more Standard CONCEPT_IDs mapped to it. Each Standard CONCEPT_ID belongs to only one primary domain but when a source CONCEPT_ID maps to multiple Standard CONCEPT_IDs, it is possible for that SOURCE_CONCEPT_ID to result in records being produced across multiple domains. For example, ICD-10-CM code Z34.00 ‘Encounter for supervision of normal first pregnancy, unspecified trimester’ will be mapped to the SNOMED concept ‘Routine antenatal care’ in the procedure domain and the concept in the condition domain ‘Primagravida’. It is also possible for one SOURCE_CONCEPT_ID to map to multiple Standard CONCEPT_IDs within the same domain. For example, ICD-9-CM code 070.43 ‘Hepatitis E with hepatic coma’ maps to the SNOMED concept for ‘Acute hepatitis E’ and a second SNOMED concept for ‘Hepatic coma’, in which case multiple CONDITION_OCCURRENCE records will be generated for the one source value record.
    • +
    • If the SOURCE_CONCEPT_ID is not mappable to any Standard CONCEPT_ID, the _CONCEPT_ID field is set to 0.
    • +
  • +
  • Write the data record into the table(s) corresponding to the domain of the Standard CONCEPT_ID(s). +
      +
    • If the Source Value has a SOURCE_CONCEPT_ID but the SOURCE_CONCEPT_ID is not mapped to a Standard CONCEPT_ID, then the domain for the data record, and hence it’s table location, is determined by the DOMAIN_ID field of the CONCEPT record the SOURCE_CONCEPT_ID refers to. The Standard _CONCEPT_ID is set to 0.
    • +
    • If the Source Value cannot be mapped to a SOURCE_CONCEPT_ID or Standard CONCEPT_ID, then direct the data record to the most appropriate CDM domain based on your local knowledge of the intent of the source data and associated value. For example, if the un-mappable Source Value came from a ‘diagnosis’ table then, in the absence of other information, you may choose to record that fact in the CONDITION_OCCURRENCE table.
    • +
  • +
+

Each Standard CONCEPT_ID field has a set of allowable CONCEPT_ID values. The allowable values are defined by the domain of the concepts. For example, there is a domain concept of ‘Gender’, for which there are only two allowable standard concepts of practical use (8507 - ‘Male’, 8532- ‘Female’) and one allowable generic concept to represent a standard notion of ‘no information’ (concept_id = 0). This ‘no information’ concept should be used when there is no mapping to a standard concept available or if there is no information available for that field. The exceptions are MEASUREMENT.VALUE_AS_CONCEPT_ID, OBSERVATION.VALUE_AS_CONCEPT_ID, MEASUREMENT.UNIT_CONCEPT_ID, OBSERVATION.UNIT_CONCEPT_ID, MEASUREMENT.OPERATOR_CONCEPT_ID, and OBSERVATION.MODIFIER_CONCEPT_ID, which can be NULL if the data do not contain the information (THEMIS issue #11).

+

There is no constraint on allowed CONCEPT_IDs within the SOURCE_CONCEPT_ID fields.

+
+
+

Custom SOURCE_TO_CONCEPT_MAP

+

When the source data uses coding systems that are not currently in the Standardized Vocabularies (e.g. ICPC codes for diagnoses), the convention is to store the mapping of such source codes to Standard Concepts in the SOURCE_TO_CONCEPT_MAP table. The codes used in the data source can be recorded in the SOURCE_VALUE fields, but no SOURCE_CONCEPT_ID will be available.

+

Custom source codes are not allowed to map to Standard Concepts that are marked as invalid.

+
+
+
+ + + +
+
+ +
+ + + + + + + + + + + + + + + + diff --git a/rmd/download.html b/rmd/download.html new file mode 100644 index 0000000..02482fe --- /dev/null +++ b/rmd/download.html @@ -0,0 +1,569 @@ + + + + + + + + + + + + + +Download + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+
+
+
+ +
+ + + + + + + + + +

All code for the ddls, constraints and indices are available on our github. Each version of the Common Data Model is denoted by a release. The OHDSI community supports the database management systems (dbms) Sql Server, Postgresql, Oracle, Redshift, Parallel Data Warehouse, BigQuery, Impala, and Netezza. Included in each release is a folder for each dbms. After downloading, choose the folder for your specific system and you will see all necessary files. For example the Sql Server folder has the ddl, primary key constraints, indices, and foreign key constraints. In contrast, the redshift folder only has a ddl.

+

These sql scripts have been fully tested on Oracle, Sql Server, and Postgresql and the rest are generated using the SqlRender package. If run into any problems please let us know by creating an issue on our github.

+ + + +
+
+ +
+ + + + + + + + + + + + + + + + diff --git a/rmd/drug_dose.html b/rmd/drug_dose.html new file mode 100644 index 0000000..af7e277 --- /dev/null +++ b/rmd/drug_dose.html @@ -0,0 +1,467 @@ + + + + + + + + + + + + + + + +drug_dose.knit + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + +
+

How to Calculate Drug Dose

+

These conventions have been curated to show how to consider calculating cumulative and daily drug dose for a given exposure record. These are high-level examples meant to demonstrate how one might use the DRUG_STRENGTH table and they do not take into account switching or stockpiling.

+

1. Tablets and other fixed amount formulations

Example: Acetaminophen (Paracetamol) 500 mg, 20 tablets.
DRUG_STRENGTH The denominator_unit is empty
DRUG_EXPOSURE The quantity refers to number of pieces, e.g. tablets.
In the example: 20
Ingredient dose= quantity x amount_value [amount_unit_concept_id]
Acetaminophen dose = 20 x 500mg = 10,000mg
Daily dose = Acetaminophen dose/drug duration

+

2. Puffs of an inhaler

Note: There is no difference to use case 1 above besides that the DRUG_STRENGTH table may put {actuat} in the denominator unit. In this case the strength is provided in the numerator.
DRUG_STRENGTH The denominator_unit is {actuat}
DRUG_EXPOSURE The quantity refers to the number of pieces, e.g. puffs
Ingredient dose= quantity x numerator_value [numerator_unit_concept_id]
Daily dose = Ingredient dose/drug duration

3. Quantified Drugs which are formulated as a concentration

Example: The Clinical Drug is Acetaminophen 250 mg/mL in a 5mL oral suspension. The Quantified Clinical Drug would have 1250 mg / 5 ml in the DRUG_STRENGTH table. Two suspensions are dispensed.
DRUG_STRENGTH The denominator_unit is either mg or mL. The denominator_value might be different from 1.
DRUG_EXPOSURE The quantity refers to a fraction or multiple of the pack.
In the example: 2
Ingredient dose= quantity x numerator_value [numerator_unit_concept_id]
Acetaminophen dose = 2 x 1250mg = 2500mg
Daily dose = Acetaminophen dose/drug duration

4. Drugs with the total amount provided in quantity, e.g. chemotherapeutics

Example: 42799258 “Benzyl Alcohol 0.1 ML/ML / Pramoxine hydrochloride 0.01 MG/MG Topical Gel” dispensed in a 1.25oz pack.
DRUG_STRENGTH The denominator_unit is either mg or mL.
Example: Benzyl Alcohol in mL and Pramoxine hydrochloride in mg
DRUG_EXPOSURE The quantity refers to mL or g.
Example: 1.25 x 30 (conversion factor oz -> mL) = 37
Ingredient dose= quantity x numerator_value [numerator_unit_concept_id]
Benzyl Alcohol dose = 37 x 0.1mL = 3.7mL
Daily dose = Benzyl Alcohol dose/drug duration
Pramoxine hydrochloride dose = 37 x 0.01mg x 1000 = 370mg
Daily dose = Pramoxine hydrochloride dose/drug duration

Note: The analytical side should check the denominator in the DRUG_STRENGTH table. As mg is used for the second ingredient the factor 1000 will be applied to convert between g and mg.

5. Compounded drugs

Example: Ibuprofen 20%/Piroxicam 1% Cream, 30ml in 5ml tubes.
DRUG_STRENGTH We need entries for the ingredients of Ibuprofen and Piroxicam, probably with an amount_value of 1 and a unit of mg.
DRUG_EXPOSURE The quantity refers to the total amount of the compound. Use one record in the DRUG_EXPOSURE table for each compound.
Example: 20% Ibuprofen of 30ml = 6mL, 1% Piroxicam of 30ml = 0.3mL
Ingredient dose= Depends on the drugs involved: One of the use cases above. Ibuprofen dose = 6 x 1mg x 1000 = 6000mg
Daily dose = Ibuprofen dose/drug duration
Piroxicam dose = 0.3 x 1mg x 1000 = 300mg
Daily dose = Piroxicam dose/drug duration
Note: The analytical side determines that the denominator for both ingredients in the DRUG_STRENGTH table is mg and applies the factor 1000 to convert between mL/g and mg.

6. Drugs with the active ingredient released over time, e.g. patches

Example: Ethinyl Estradiol 0.000833 MG/HR / norelgestromin 0.00625 MG/HR Weekly Transdermal Patch
DRUG_STRENGTH The denominator units refer to hour.
Example: Ethinyl Estradiol 0.000833 mg/h / norelgestromin 0.00625 mg/h
DRUG_EXPOSURE The quantity refers to the number of pieces.
Example: 1 patch
Ingredient rate= numerator_value [numerator_unit_concept_id]
Ethinyl Estradiol rate = 0.000833 mg/h
norelgestromin rate 0.00625 mg/h
Note: This can be converted to a daily dosage by multiplying it with 24. (Assuming 1 patch at a time for at least 24 hours)

+
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/rmd/ehrObsPeriods.html b/rmd/ehrObsPeriods.html new file mode 100644 index 0000000..39293da --- /dev/null +++ b/rmd/ehrObsPeriods.html @@ -0,0 +1,522 @@ + + + + + + + + + + + + + +Observation Period Considerations for EHR Data + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + +

By Melanie Philofsky and the EHR Working Group

+

The EHR WG convened on July 24, August 7, and August 21, 2020 to discuss the creation of an Observation Period from EHR data. The current and future conventions are not prescriptive enough and leave room for various ways of interpretation. The goals of our discussions were to increase the standardization for the implementation of the OBSERVATION_PERIOD table by providing some general guidelines for determining the start, end, and gaps in Observation Periods. The suggestions we came up with are only “suggestions” at this point. More research should be done to understand how these choices might impact evidence generated using these data. All of these decisions should be tempered by local understanding of patients in the EHR you are ETLing.

+ +
+

Observation Period Start Date

+ +
+
+

Observation Period end date

+

Set the observation_period_end_date as the first date from the following:

+ +
+
+

Observation Period Gaps and Persistence Windows

+
+

Observation Period Gaps

+

Periods of time when a Person does not receive care from your institution and therefore is not observed and should not have an Observation Period. These gaps are usually hard to determine because most Persons don’t announce their departure from an EHR/healthcare institution. Therefore, a heuristic will need to be instituted to determine Observation Period Gaps where the information is not explicit.

+
+
+

Observation Period Persistence Window

+

Defined as the maximum time allowed between two clinical events under the assumption a Person would have a clinical event recorded, if they are not healthy and seek care.

+

Example: Person 1 has a series of clinical events recorded from Jan. 1, 2010 to June 15, 2012, where the time between clinical events does not exceed 60 days. The next clinical event for Person 1 is on Oct. 1, 2018. Starting Oct. 1, 2018 Person 1 has clinical events occurring at least every 90 days up to the present date.

+

There is a 6+ year gap between groups of clinical events recorded in the CDM. After discussion in the EHR WG, we believe this 6+ year gap is indicative of a Person not being seen within our EHR/healthcare institution. Per convention #4 for Observation Period table, “As a general assumption, during an Observation Period any clinical event that happens to the patient is expected to be recorded. Conversely, the absence of data indicates that no clinical events occurred to the patient.” Person 1 has two Observation Periods.

+

1st Observation Period

+
    +
  • observation_period_start_date = 01/01/2010
  • +
  • observation_period_end_date = 08/15/2012 (Per the end_date guideline above)
  • +
+

2nd Observation Period

+
    +
  • observation_period_start_date = 10/01/2018
  • +
  • observation_period_end_date = 09/01/2020 (Date of the data pull, per the end_date guideline above)
  • +
+

Now, there are cases where a Person only receives care within you EHR system when absolutely necessary. And if your EHR doesn’t offer primary care services, the majority of Persons who lack healthcare insurance or any other reason why Persons are only seen in urgent or emergent situations, the above heuristic might be too restrictive. This is a guideline. A question the EHR WG debated was how long between clinical events should we assume any clinical event that happens to the Person is expected to be recorded? When should we end one Observation Period and begin another? What should be the time between events for an Observation Period Persistence Window? Wellness checkups/Visits happen approximately every 12-18 months depending on a multitude of factors.

+

Our recommendation: If Observation Period Gaps are 548 days or more, then the previous Observation Period should end and another Observation Period should begin on the date of the next clinical event as per the Person 1 example above.

+
+
+
+

Additional ETL considerations

+ +

The Observation Period can be created by only one clinical event. However, the clinical event must NOT be from the Death table. If a Death date does not have any other clinical records 18 months before AND 18 months after the death date, then an Observation Period will not be created. We believe this logic is needed because if a Person only has a death death_date without other clinical event records, a Person is most likely not being “observed” when the death occurred. If a Person was being observed at their time of death, then other records (visit, condition, measurement, etc.) would be created. This rule is most relevant for those with death registry data since a Person who dies in the hospital has many clinical event records.

+
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/rmd/faq.html b/rmd/faq.html new file mode 100644 index 0000000..a2d3240 --- /dev/null +++ b/rmd/faq.html @@ -0,0 +1,558 @@ + + + + + + + + + + + + + +OMOP CDM Frequently Asked Questions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + +

1. I understand that the common data model (CDM) is a way of organizing disparate data sources into the same relational database design, but how can it be effective since many databases use different coding schemes?

+

During the extract, transform, load (ETL) process of converting a data source into the OMOP common data model, we standardize the structure (e.g. tables, fields, data types), conventions (e.g. rules that govern how source data should be represented), and content (e.g. what common vocabularies are used to speak the same language across clinical domains). The common data model preserves all source data, including the original source vocabulary codes, but adds the standardized vocabularies to allow for network research across the entire OHDSI research community.

+

2. How does my data get transformed into the common data model?

+

You or someone in your organization will need to create a process to build your CDM. Don’t worry though, you are not alone! The open nature of the community means that much of the code that other participants have written to transform their own data is available for you to use. If you have a data license for a large administrative claims database like IBM MarketScan® or Optum’s Clinformatics® Extended Data Mart, chances are that someone has already done the legwork. Here is one example of a full builder freely available on github that has been written for a variety of data sources.

+

The community forums are also a great place to ask questions if you are stuck or need guidance on how to represent your data in the common data model. Members are usually very responsive!

+

3. Are any tables or fields optional?

+

It is expected that all tables will be present in a CDM though it is not a requirement that they are all populated. The two mandatory tables are:

+ +

It is then up to you which tables to populate, though the core event tables are generally agreed upon to be Condition_occurrence, Procedure_occurrence, Drug_exposure, Measurement, and Observation. Each table has certain required fields, a full list of which can be found on the Common Data Model wiki page.

+

4. Does the data model include any derived information? Which tables or values are derived?

+

The common data model stores verbatim data from the source across various clinical domains, such as records for conditions, drugs, procedures, and measurements. In addition, to assist the analyst, the common data model also provides some derived tables, based on commonly used analytic procedures. For example, the Condition_era table is derived from the Condition_occurrence table and both the Drug_era and Dose_era tables are derived from the Drug_exposure table. An era is defined as a span of time when a patient is assumed to have a given condition or exposure to a particular active ingredient. Members of the community have written code to create these tables and it is out on the github if you choose to use it in your CDM build. It is important to reinforce, the analyst has the opportunity, but not the obligation, to use any of the derived tables and all of the source data is still available for direct use if the analysis calls for different assumptions.

+

5. How is age captured in the model?

+

Year_of_birth, month_of_birth, day_of_birth and birth_datetime are all fields in the Person table designed to capture some form of date of birth. While only year_of_birth is required, these fields allow for maximum flexibility over a wide range of data sources.

+

6. How are gender, race, and ethnicity captured in the model? Are they coded using values a human reader can understand?

+

Standard Concepts are used to denote all clinical entities throughout the OMOP common data model, including gender, race, and ethnicity. Source values are mapped to Standard Concepts during the extract, transform, load (ETL) process of converting a database to the OMOP Common Data Model. These are then stored in the Gender_concept_id, Race_concept_id and Ethnicity_concept_id fields in the Person table. Because the standard concepts span across all clinical domains, and in keeping with Cimino’s ‘Desiderata for Controlled Medical Vocabularies in the Twenty-First Century’, the identifiers are unique, persistent nonsematic identifiers. Gender, for example, is stored as either 8532 (female) or 8507 (male) in gender_concept_id while the original value from the source is stored in gender_source_value (M, male, F, etc).

+

7. Are there conditions/procedures/drugs or other domains that should be masked or hidden in the CDM?

+

The masking of information related to a person is dependent on the organization’s privacy policies and may vary by data asset (THEMIS issue #21).

+

8. How is time-varying patient information such as location of residence addressed in the model?

+

The OMOP common data model has been pragmatically defined based on the desired analytic use cases of the community, as well as the available types of data that community members have access to. Prior to CDM v6.0, each person record had associated demographic attributes which are assumed to be constant for the patient throughout the course of their periods of observation, like location and primary care provider. With the release of CDM v6.0, the Location_History table is now available to track the movements of people, care sites, and providers over time. Only the most recent location_id should be stored in the Person table to eliminate duplication, while the person’s movements are stored in Location_History.

+

Something like marital status is a little different as it is considered to be an observation rather than a demographic attribute. This means that it is housed in the Observation table rather than the Person table, giving the opportunity to store each change in status as a unique record.

+

If someone in the community had a use case for time-varying location of residence and also had source data that contains this information, we’d welcome participation in the CDM workgroup to evolve the model further.

+

9. How does the model denote the time period during which a Person’s information is valid?

+

The OMOP Common Data Model uses something called observation periods (stored in the Observation_period table) as a way to define the time span during which a patient is at-risk to have a clinical event recorded. In administrative claims databases, for example, these observation periods are often analogous to the notion of ‘enrollment’.

+

10. How does the model capture start and stop dates for insurance coverage? What if a person’s coverage changes?

+

The Payer_plan_period table captures details of the period of time that a Person is continuously enrolled under a specific health Plan benefit structure from a given Payer. Payer plan periods, as opposed to observation periods, can overlap so as to denote the time when a Person is enrolled in multiple plans at the same time such as Medicare Part A and Medicare Part D.

+

11. What if I have EHR data? How would I create observation periods?

+

An observation period is considered as the time at which a patient is at-risk to have a clinical event recorded in the source system. Determining the appropriate observation period for each source data can vary, depending on what information the source contains. If a source does not provide information about a patient’s entry or exit from a system, then reasonable heuristics need to be developed and applied within the ETL.

+
+

Vocabulary Mapping

+

12. Do I have to map my source codes to Standard Concepts myself? Are there vocabulary mappings that already exist for me to leverage?

+

If your data use any of the 55 source vocabularies that are currently supported, the mappings have been done for you. The full list is available from the open-source ATHENA tool under the download tab (see below). You can choose to download the ten vocabulary tables from there as well – you will need a copy in your environment if you plan on building a CDM.

+

+

The ATHENA tool also allows you to explore the vocabulary before downloading it if you are curious about the mappings or if you have a specific code in mind and would like to know which standard concept it is associated with; just click on the search tab and type in a keyword to begin searching.

+

13. If I want to apply the mappings myself, can I do so? Are they transparent to all users?

+

Yes, all mappings are available in the Concept_relationship table (which can be downloaded from ATHENA). Each value in a supported source terminology is assigned a Concept_id (which is considered non-standard). Each Source_concept_id will have a mapping to a Standard_concept_id. For example:

+

+

In this case the standard SNOMED concept 201826 for type 2 diabetes mellitus would be stored in the Condition_occurrence table as the Condition_concept_id and the ICD10CM concept 1567956 for type 2 diabetes mellitus would be stored as the Condition_source_concept_id.

+

14. Can RXNorm codes be stored in the model? Can I store multiple levels if I so choose? What if one collaborator uses a different level of RXNorm than I use when transforming their database?

+

In the OMOP Common Data Model RXNorm is considered the standard vocabulary for representing drug exposures. One of the great things about the Standardized Vocabulary is that the hierarchical nature of RXNorm is preserved to enable efficient querying. It is agreed upon best practice to store the lowest level RXNorm available and then use the Vocabulary to explore any pertinent relationships. Drug ingredients are the highest-level ancestors so a query for the descendants of an ingredient should turn up all drug products (Clinical Drug or Branded Drug) containing that ingredient. A query designed in this way will find drugs of interest in any CDM regardless of the level of RXNorm used.

+

15. What if the vocabulary has a mapping I don’t agree with? Can it be changed?

+

Yes, that is the beauty of the community! If you find a mapping in the vocabulary that doesn’t seem to belong or that you think could be better, feel free to write a note on the forums or on the vocabulary github. If the community agrees with your assessment it will be addressed in the next vocabulary version.

+

16. What if I have source codes that are specific to my site? How would these be mapped?

+

In the OMOP Vocabulary there is an empty table called the Source_to_concept_map. It is a simple table structure that allows you to establish mapping(s) for each source code with a standard concept in the OMOP Vocabulary (TARGET_CONCEPT_ID). This work can be facilitated by the OHDSI tool Usagi (pictured below) which searches for text similarity between your source code descriptions and the OMOP Vocabulary and exports mappings in a SOURCE_TO_CONCEPT_MAP table structure. Example Source_to_concept_map files can be found here. These generated Source_to_concept_map files are then loaded into the OMOP Vocabulary’s empty Source_to_concept_map prior to processing the native data into the CDM so that the CDM builder can use them in a build.

+

+

If an source code is not supported by the OMOP Vocabulary, one can create a new records in the CONCEPT table, however the CONCEPT_IDs should start >2000000000 so that it is easy to tell between the OMOP Vocabulary concepts and the site specific concepts. Once those concepts exist CONCEPT_RELATIONSHIPS can be generated to assign them to a standard terminologies, USAGI can facilitate this process as well (THEMIS issue #22).

+

17. How are one-to-many mappings applied?

+

If one source code maps to two Standard Concepts then two rows are stored in the corresponding clinical event table.

+

18. What if I want to keep my original data as well as the mapped values? Is there a way for me to do that?

+

Yes! Source values and Source Concepts are fully maintained within the OMOP Common Data Model. A Source Concept represents the code in the source data. Each Source Concept is mapped to one or more Standard Concepts during the ETL process and both are stored in the corresponding clinical event table. If no mapping is available, the Standard Concept with the concept_id = 0 is written into the *_concept_id field (Condition_concept_id, Procedure_concept_id, etc.) so as to preserve the record from the native data.

+
+
+

Common Data Model Versioning

+

19. Who decides when and how to change the data model?

+

The community! There is a working group designed around updating the model and everything is done by consensus. Members submit proposed changes to the github in the form of issues and the group meets once a month to discuss and vote on the changes. Any ratified proposals are then added to the queue for a future version of the Common Data Model.

+

20. Are changes to the model backwards compatible?

+

Generally point version changes (5.1 -> 5.2) are backwards compatible and major version changes (4.0 -> 5.0) may not be. All updates to the model are listed in the release notes for each version and anything that could potentially affect backwards compatibility is clearly labeled.

+

21. How frequently does the model change?

+

The current schedule is for major versions to be released every year and point versions to be release every quarter though that is subject to the needs of the community.

+

22. What is the dissemination plan for changes?

+

Changes are first listed in the release notes on the github and in the common data model wiki. New versions are also announced on the weekly community calls and on the community forums.

+
+
+

OHDSI Tools

+

23. What are the currently available analytic tools?

+

While there are a variety of tools freely available from the community, these are the most widely used:

+ +

24. Who is responsible for updating the tools to account for data model changes, bugs, and errors?

+

The community! All the tools are open source meaning that anyone can submit an issue they have found, offer suggestions, and write code to fix the problem.

+

25. Do the current tools allow a user to define a treatment gap (persistence window) of any value when creating treatment episodes?

+

Yes – the ATLAS tool allows you to specify a persistence window between drug exposures when defining a cohort (see image below).

+

+

26. Can the current tools identify medication use during pregnancy?

+

Yes, you can identify pregnancy markers from various clinical domains, including conditions and procedures, for example ‘live birth’, and then define temporal logic to look for drug exposure records in some interval prior to the pregnancy end. In addition, members of the community have built an advanced logic to define pregnancy episodes with all pregnancy outcomes represented, which can be useful for this type of research.

+

27. Do the current tools execute against the mapped values or source values?

+

The tools can execute against both source and mapped values, though mapped values are strongly encouraged. Since one of the aims of OHDSI is to create a distributed data network across the world on which to run research studies, the use of source values fails to take advantage of the benefits of the Common Data Model.

+
+
+

Network Research Studies

+

28. Who can generate requests?

+

Anyone in the community! Any question that gains enough interest and participation can be a network research study.

+

29. Who will develop the queries to distribute to the network?

+

Typically a principal investigator leads the development of a protocol. The PI may also lead the development of the analysis procedure corresponding to the protocol. If the PI does not have the technical skills required to write the analysis procedure that implements the protocol, someone in the community can help them put it together.

+

30. What language are the queries written in?

+

Queries are written in R and SQL. The SqlRender package can translate any query written in a templated SQL Server-like dialect to any of the supported RDBMS environments, including Postgresql, Oracle, Redshift, Parallel Data Warehouse, Hadoop Impala, Google BigQuery, and Netezza.

+

31. How do the queries get to the data partners and how are they run once there?

+

OHDSI runs as a distributed data network. All analyses are publicly available and can be downloaded to run at each site. The packages can be run locally and, at the data partner’s discretion, aggregate results can be shared with the study coordinator.

+

Data partners can also make use of one of OHDSI’s open-source tools called ARACHNE, a tool to facilitate distributed network analytics against the OMOP CDM.

+
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/rmd/index.html b/rmd/index.html new file mode 100644 index 0000000..8efa750 --- /dev/null +++ b/rmd/index.html @@ -0,0 +1,565 @@ + + + + + + + + + + + + + +index.knit + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + +

The Observational Medical Outcomes Partnership (OMOP) Common Data Model (CDM) is an open community data standard, designed to standardize the structure and content of observational data and to enable efficient analyses that can produce reliable evidence. A central component of the OMOP CDM is the OHDSI standardized vocabularies. The OHDSI vocabularies allow organization and standardization of medical terms to be used across the various clinical domains of the OMOP common data model and enable standardized analytics that leverage the knowledge base when constructing exposure and outcome phenotypes and other features within characterization, population-level effect estimation, and patient-level prediction studies.

+

This website is meant to serve as a resource describing the specification of the available versions of the Common Data Model. This includes the structure of the model itself and the agreed upon conventions for each table and field as decided by the OHDSI Community. The vocabulary tables are part of the model and, as such, are detailed here. To download the vocabulary itself, please visit https://athena.ohdsi.org. For more information about the OHDSI suite of tools designed to implement best practices in characterization, population-level effect estimation and patient-level prediction, please visit https://ohdsi.github.io/Hades/.

+
+

Current CDM Version

+

The current CDM version is CDM v5.4, depicted below. This CDM version was developed over the course of a year by considering requests that were sent via our issues page. The list of proposed changes was then shared with the community in multiple ways: through discussions at the weekly OHDSI Community calls, discussions with the OHDSI Steering Committee, and discussions with all potentially affected workgroups. The final changes were then delivered to the Community through a new R package designed to dynamically generate the DDLs and documentation for all supported SQL dialects.

+ +


+
+

Current Support for CDM v5.4

+

The table below details which OHDSI tools support CDM v5.4. There are two levels of support: legacy support means that the tool supports all tables and fields that were present in CDM v5.3 and feature support indicates that the tool supports any new tables and fields in CDM v5.4 that were not present in CDM v5.3. A green check ✅ indicates that the support level for the listed tool is in place, has been tested, and released. A warning sign ⚠️ indicates that the support level for the listed tool has been initiated but has not yet been tested and released.

+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ToolDescriptionLegacy SupportFeature Support
CDM R packageThis package can be downloaded from https://github.com/OHDSI/CommonDataModel/. It functions to dynamically create the OMOP CDM documentation and DDL scripts to instantiate the CDM tables.
Data Quality DashboardThis package can be downloaded from https://github.com/OHDSI/DataQualityDashboard. It runs a set of > 3500 data quality checks against an OMOP CDM instance and is required to be run on all databases prior to participating in an OHDSI network research study.⚠️
AchillesThis package can be downloaded from https://github.com/OHDSI/Achilles, performing a set of broad database characterizations agains an OMOP CDM instance.⚠️
ARESThis package can be downloaded from https://github.com/OHDSI/Ares and is designed to display the results from both the ACHILLES and DataQualityDashboard packages to support data quality and characterization research.⚠️
ATLASATLAS is an open source software tool for researchers to conduct scientific analyses on standardized observational data. Demo⚠️
Rabbit-In-A-HatThis package can be downloaded from https://github.com/OHDSI/WhiteRabbit and is an application for interactive design of an ETL to the OMOP Common Data Model with the help of the the scan report generated by White Rabbit.
Feature ExtractionThis package can be downloaded from https://github.com/OHDSI/FeatureExtraction. It is designed to generate features (covariates) for a cohort generated using the OMOP CDM.✅*
Cohort DiagnosticsThis package can be downloaded from https://github.com/OHDSI/CohortDiagnostics and is used to critically evaluate cohort phenotypes.⚠️
+


* The Feature Extraction package supports all relevant new features in CDM v5.4. For example, it was decided that, from a methodological perspective, the EPISODE and EPISODE_EVENT tables should not be included to define cohort covariates because the events that make up episodes are already pulled in as potential covariates.

+


+
+
+
+

The CDM Working Group

+

The CDM is managed by the OHDSI CDM Working Group. If you would like to join our group please fill out this form and check “Common Data Model” to be added to our Microsoft Teams environment. This working group endeavors to maintain the OMOP CDM as a living model by soliciting and responding to requests from the community based on use cases and research purposes. For more information on the CDM refresh process please see the description here. You will find information on our meetings and links to join at the end of this page.

+
+

Meeting Information

+

The CDM working group meets the first and third Tuesday of the month. See below for links to the meetings.

+

Every first Tuesday of the month at 1pm est Teams Meeting

+

Every third Tuesday of the month at 1pm est Teams Meeting

+

Note If you do you have access to the OHDSI Teams Tenet, either contact Clair Blacketer at or fill out [this form] (https://forms.office.com/Pages/ResponsePage.aspx?id=lAAPoyCRq0q6TOVQkCOy1ZyG6Ud_r2tKuS0HcGnqiQZUOVJFUzBFWE1aSVlLN0ozR01MUVQ4T0RGNyQlQCN0PWcu) and check “Common Data Model”

+
+ +
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/rmd/indices-primary-foreign.html b/rmd/indices-primary-foreign.html new file mode 100644 index 0000000..76891ad --- /dev/null +++ b/rmd/indices-primary-foreign.html @@ -0,0 +1,622 @@ + + + + + + + + + + + + + +Indices, Primary Keys and Foreign Key Constraints + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+
+
+
+ +
+ + + + + + + + + +
+

Overview

+

Database indices improve the performance of queries against a database by organizing the data in a way that increase query execution.

+

This article was written to provide guidance on the setting of indices, primary and foreign keys for data that has been transformed into the Observational Medical Outcome Partnership (OMOP) Common Data Model (CDM). The community that supports the design and development of the OHDSI/CommonDataModel Github repository is a diverse collaborative of healthcare and technical profesisonals whom have limited data base adminstrative (DBA) experience. As a result, the comments below should be interpreted as suggestions and recommendations to help increase performance. Your teams needs may call for a modified configuration.

+
+
+

General Recommendations

+

Should your database of choice support indexing, the OMOP CDM Working Group recommends

+
    +
  • Indexing on all columns containing an "_id" (e.g. condition_occurrence_id, drug_exposure_id, measurement_id, procedure_occurrence_id, etc.)
  • +
  • Indexing on primary and foreign keys
  • +
+

For all databases, regardless of custom indice support, primary and foreign keys should be set. This is a step towards ensuring data integrity. Information on what table level attributes should be set as primary and foreign keys can be found within the *_Field_Level.csv file(s) located in the INST/CSV directory

+
+
+

Database support

+

The OHDSI/CommonDataModel package leverages OHDSI/SQLRender and as a result is only capable of supporting sources that are supported by OHDSI/SQLRender. The following databases are currently supported.

+
+

Microsoft SQL Server

+
+
+

Oracle

+
+
+

PostgreSQL

+
+
+

Amazon Redshift

+

On AWS Redshift it is important to ensure that your data is properly distributed and sorted across nodes. Compression on certain columns may also help. The designed DDL does set DISTKEYS in an effort to optimize performance. This configuration can be seen within the Redshift-specific DDL.

+
+
+

Impala

+
+
+

IBM Netezza

+
+
+

Google BigQuery

+

Google BigQuery does not require manual optimization and/or sizing. Google BigQuery does massive parallel full table scans and intensive caching, all under the hood. Reference

+
+
+

Microsoft Parallel Data Warehouse (PDW)

+
+
+

SQLite

+
+
+

Databricks

+

This database type is not yet supported but is actively being worked on by a number of collaborators. For more informtion, please contact Ajit Londhe of Amgen.

+
+
+ + + + +
+
+ +
+ + + + + + + + + + + + + + + + diff --git a/rmd/sqlScripts.Rmd b/rmd/sqlScripts.Rmd index 929c86c..68c8201 100644 --- a/rmd/sqlScripts.Rmd +++ b/rmd/sqlScripts.Rmd @@ -61,173 +61,164 @@ SELECT c.concept_code AS SOURCE_CODE, c.concept_id AS SOURCE_CONCEPT_ID, c.CONCE This script will insert values into the CONDITION_ERA table given that the CONDITION_OCCURRENCE table is populated. It will string together condition records that have <= 30 days between them into eras during which the Person is assumed to have the given condition. **NOTE** This query only works with 5.3 and below. ```{sql eval= FALSE, echo=TRUE} -/**************************************************** -OHDSI-SQL File Instructions ------------------------------ - 1. Set parameter name of schema that contains CDMv4 instance - (@SOURCE_CDMV4, @SOURCE_CDMV4_SCHEMA) - 2. Set parameter name of schema that contains CDMv5 instance - (@TARGET_CDMV5, @TARGET_CDMV5_SCHEMA) - 3. Run this script through SqlRender to produce a script that will work in your - source dialect. SqlRender can be found here: https://github.com/OHDSI/SqlRender - 4. Run the script produced by SQL Render on your target RDBDMS. - File Instructions -------------------------- - 1. This script will hold a number of placeholders for your CDM V4 and CDMV5 - database/schema. In order to make this file work in your environment, you - should plan to do a global "FIND AND REPLACE" on this file to fill in the - file with values that pertain to your environment. The following are the - tokens you should use when doing your "FIND AND REPLACE" operation: - - [CDM] - [CDM].[CDMSCHEMA] - -*********************************************************************************/ -/* SCRIPT PARAMETERS */ +if object_id('tempdb..#tmp_de', 'U') is not null drop table #tmp_de; - - {DEFAULT @TARGET_CDMV5 = '[CDM]' } -- The target CDMv5 database name - {DEFAULT @TARGET_CDMV5_SCHEMA = '[CDM].[CDMSCHEMA]' } -- the target CDMv5 database plus schema +WITH +ctePreDrugTarget(drug_exposure_id, person_id, ingredient_concept_id, drug_exposure_start_date, days_supply, drug_exposure_end_date) AS +(-- Normalize DRUG_EXPOSURE_END_DATE to either the existing drug exposure end date, or add days supply, or add 1 day to the start date + SELECT + d.drug_exposure_id + , d.person_id + , c.concept_id AS ingredient_concept_id + , d.drug_exposure_start_date AS drug_exposure_start_date + , d.days_supply AS days_supply + , COALESCE( + ---NULLIF returns NULL if both values are the same, otherwise it returns the first parameter + NULLIF(drug_exposure_end_date, NULL), + ---If drug_exposure_end_date != NULL, return drug_exposure_end_date, otherwise go to next case + NULLIF(dateadd(day,days_supply,drug_exposure_start_date), drug_exposure_start_date), + ---If days_supply != NULL or 0, return drug_exposure_start_date + days_supply, otherwise go to next case + dateadd(day,1,drug_exposure_start_date) + ---Add 1 day to the drug_exposure_start_date since there is no end_date or INTERVAL for the days_supply + ) AS drug_exposure_end_date + FROM @cdm_schema.drug_exposure d + JOIN @cdm_schema.concept_ancestor ca ON ca.descendant_concept_id = d.drug_concept_id + JOIN @cdm_schema.concept c ON ca.ancestor_concept_id = c.concept_id + WHERE c.vocabulary_id = 'RxNorm' ---8 selects RxNorm from the vocabulary_id + AND c.concept_class_id = 'Ingredient' + AND d.drug_concept_id != 0 ---Our unmapped drug_concept_id's are set to 0, so we don't want different drugs wrapped up in the same era + AND coalesce(d.days_supply,0) >= 0 ---We have cases where days_supply is negative, and this can set the end_date before the start_date, which we don't want. So we're just looking over those rows. This is a data-quality issue. +) -USE @TARGET_CDMV5; - - - -/**** -CONDITION ERA -Note: Eras derived from CONDITION_OCCURRENCE table, using 30d gap - ****/ -IF OBJECT_ID('tempdb..#condition_era_phase_1', 'U') IS NOT NULL - DROP TABLE #condition_era_phase_1; - -/* / */ - -IF OBJECT_ID('tempdb..#cteConditionTarget', 'U') IS NOT NULL - DROP TABLE #cteConditionTarget; - -/* / */ - --- create base eras from the concepts found in condition_occurrence -SELECT co.PERSON_ID - ,co.condition_concept_id - ,co.CONDITION_START_DATE - ,COALESCE(co.CONDITION_END_DATE, DATEADD(day, 1, CONDITION_START_DATE)) AS CONDITION_END_DATE -INTO #cteConditionTarget -FROM @TARGET_CDMV5_SCHEMA.CONDITION_OCCURRENCE co; - -/* / */ - -IF OBJECT_ID('tempdb..#cteCondEndDates', 'U') IS NOT NULL - DROP TABLE #cteCondEndDates; - -/* / */ - -SELECT PERSON_ID - ,CONDITION_CONCEPT_ID - ,DATEADD(day, - 30, EVENT_DATE) AS END_DATE -- unpad the end date -INTO #cteCondEndDates -FROM ( - SELECT E1.PERSON_ID - ,E1.CONDITION_CONCEPT_ID - ,E1.EVENT_DATE - ,COALESCE(E1.START_ORDINAL, MAX(E2.START_ORDINAL)) START_ORDINAL - ,E1.OVERALL_ORD - FROM ( - SELECT PERSON_ID - ,CONDITION_CONCEPT_ID - ,EVENT_DATE - ,EVENT_TYPE - ,START_ORDINAL - ,ROW_NUMBER() OVER ( - PARTITION BY PERSON_ID - ,CONDITION_CONCEPT_ID ORDER BY EVENT_DATE - ,EVENT_TYPE - ) AS OVERALL_ORD -- this re-numbers the inner UNION so all rows are numbered ordered by the event date +, cteSubExposureEndDates (person_id, ingredient_concept_id, end_date) AS --- A preliminary sorting that groups all of the overlapping exposures into one exposure so that we don't double-count non-gap-days +( + SELECT person_id, ingredient_concept_id, event_date AS end_date + FROM + ( + SELECT person_id, ingredient_concept_id, event_date, event_type, + MAX(start_ordinal) OVER (PARTITION BY person_id, ingredient_concept_id + ORDER BY event_date, event_type ROWS unbounded preceding) AS start_ordinal, + -- this pulls the current START down from the prior rows so that the NULLs + -- from the END DATES will contain a value we can compare with + ROW_NUMBER() OVER (PARTITION BY person_id, ingredient_concept_id + ORDER BY event_date, event_type) AS overall_ord + -- this re-numbers the inner UNION so all rows are numbered ordered by the event date FROM ( -- select the start dates, assigning a row number to each - SELECT PERSON_ID - ,CONDITION_CONCEPT_ID - ,CONDITION_START_DATE AS EVENT_DATE - ,- 1 AS EVENT_TYPE - ,ROW_NUMBER() OVER ( - PARTITION BY PERSON_ID - ,CONDITION_CONCEPT_ID ORDER BY CONDITION_START_DATE - ) AS START_ORDINAL - FROM #cteConditionTarget + SELECT person_id, ingredient_concept_id, drug_exposure_start_date AS event_date, + -1 AS event_type, + ROW_NUMBER() OVER (PARTITION BY person_id, ingredient_concept_id + ORDER BY drug_exposure_start_date) AS start_ordinal + FROM ctePreDrugTarget + + UNION ALL + + SELECT person_id, ingredient_concept_id, drug_exposure_end_date, 1 AS event_type, NULL + FROM ctePreDrugTarget + ) RAWDATA + ) e + WHERE (2 * e.start_ordinal) - e.overall_ord = 0 +) + +, cteDrugExposureEnds (person_id, drug_concept_id, drug_exposure_start_date, drug_sub_exposure_end_date) AS +( +SELECT + dt.person_id + , dt.ingredient_concept_id + , dt.drug_exposure_start_date + , MIN(e.end_date) AS drug_sub_exposure_end_date +FROM ctePreDrugTarget dt +JOIN cteSubExposureEndDates e ON dt.person_id = e.person_id AND dt.ingredient_concept_id = e.ingredient_concept_id AND e.end_date >= dt.drug_exposure_start_date +GROUP BY + dt.drug_exposure_id + , dt.person_id + , dt.ingredient_concept_id + , dt.drug_exposure_start_date +) +-------------------------------------------------------------------------------------------------------------- +, cteSubExposures(row_number, person_id, drug_concept_id, drug_sub_exposure_start_date, drug_sub_exposure_end_date, drug_exposure_count) AS +( + SELECT ROW_NUMBER() OVER (PARTITION BY person_id, drug_concept_id, drug_sub_exposure_end_date ORDER BY person_id) + , person_id, drug_concept_id, MIN(drug_exposure_start_date) AS drug_sub_exposure_start_date, drug_sub_exposure_end_date, COUNT(*) AS drug_exposure_count + FROM cteDrugExposureEnds + GROUP BY person_id, drug_concept_id, drug_sub_exposure_end_date + --ORDER BY person_id, drug_concept_id +) +-------------------------------------------------------------------------------------------------------------- +/*Everything above grouped exposures into sub_exposures if there was overlap between exposures. + *So there was no persistence window. Now we can add the persistence window to calculate eras. + */ +-------------------------------------------------------------------------------------------------------------- +, cteFinalTarget(row_number, person_id, ingredient_concept_id, drug_sub_exposure_start_date, drug_sub_exposure_end_date, drug_exposure_count, days_exposed) AS +( + SELECT row_number, person_id, drug_concept_id, drug_sub_exposure_start_date, drug_sub_exposure_end_date, drug_exposure_count + , datediff(day,drug_sub_exposure_start_date,drug_sub_exposure_end_date) AS days_exposed + FROM cteSubExposures +) +-------------------------------------------------------------------------------------------------------------- +, cteEndDates (person_id, ingredient_concept_id, end_date) AS -- the magic +( + SELECT person_id, ingredient_concept_id, dateadd(day,-30,event_date) AS end_date -- unpad the end date + FROM + ( + SELECT person_id, ingredient_concept_id, event_date, event_type, + MAX(start_ordinal) OVER (PARTITION BY person_id, ingredient_concept_id + ORDER BY event_date, event_type ROWS UNBOUNDED PRECEDING) AS start_ordinal, + -- this pulls the current START down from the prior rows so that the NULLs + -- from the END DATES will contain a value we can compare with + ROW_NUMBER() OVER (PARTITION BY person_id, ingredient_concept_id + ORDER BY event_date, event_type) AS overall_ord + -- this re-numbers the inner UNION so all rows are numbered ordered by the event date + FROM ( + -- select the start dates, assigning a row number to each + SELECT person_id, ingredient_concept_id, drug_sub_exposure_start_date AS event_date, + -1 AS event_type, + ROW_NUMBER() OVER (PARTITION BY person_id, ingredient_concept_id + ORDER BY drug_sub_exposure_start_date) AS start_ordinal + FROM cteFinalTarget UNION ALL -- pad the end dates by 30 to allow a grace period for overlapping ranges. - SELECT PERSON_ID - ,CONDITION_CONCEPT_ID - ,DATEADD(day, 30, CONDITION_END_DATE) - ,1 AS EVENT_TYPE - ,NULL - FROM #cteConditionTarget - ) RAWDATA - ) E1 - INNER JOIN ( - SELECT PERSON_ID - ,CONDITION_CONCEPT_ID - ,CONDITION_START_DATE AS EVENT_DATE - ,ROW_NUMBER() OVER ( - PARTITION BY PERSON_ID - ,CONDITION_CONCEPT_ID ORDER BY CONDITION_START_DATE - ) AS START_ORDINAL - FROM #cteConditionTarget - ) E2 ON E1.PERSON_ID = E2.PERSON_ID - AND E1.CONDITION_CONCEPT_ID = E2.CONDITION_CONCEPT_ID - AND E2.EVENT_DATE <= E1.EVENT_DATE - GROUP BY E1.PERSON_ID - ,E1.CONDITION_CONCEPT_ID - ,E1.EVENT_DATE - ,E1.START_ORDINAL - ,E1.OVERALL_ORD - ) E -WHERE (2 * E.START_ORDINAL) - E.OVERALL_ORD = 0; + SELECT person_id, ingredient_concept_id, dateadd(day,30,drug_sub_exposure_end_date), 1 AS event_type, NULL + FROM cteFinalTarget + ) RAWDATA + ) e + WHERE (2 * e.start_ordinal) - e.overall_ord = 0 -/* / */ +) +, cteDrugEraEnds (person_id, drug_concept_id, drug_sub_exposure_start_date, drug_era_end_date, drug_exposure_count, days_exposed) AS +( +SELECT + ft.person_id + , ft.ingredient_concept_id + , ft.drug_sub_exposure_start_date + , MIN(e.end_date) AS era_end_date + , drug_exposure_count + , days_exposed +FROM cteFinalTarget ft +JOIN cteEndDates e ON ft.person_id = e.person_id AND ft.ingredient_concept_id = e.ingredient_concept_id AND e.end_date >= ft.drug_sub_exposure_start_date +GROUP BY + ft.person_id + , ft.ingredient_concept_id + , ft.drug_sub_exposure_start_date + , drug_exposure_count + , days_exposed +) +SELECT + row_number()over(order by person_id) drug_era_id + , person_id + , drug_concept_id + , MIN(drug_sub_exposure_start_date) AS drug_era_start_date + , drug_era_end_date + , SUM(drug_exposure_count) AS drug_exposure_count + , datediff(day,MIN(drug_sub_exposure_start_date),drug_era_end_date)-SUM(days_exposed) as gap_days +INTO #tmp_de +FROM cteDrugEraEnds dee +GROUP BY person_id, drug_concept_id, drug_era_end_date; -IF OBJECT_ID('tempdb..#cteConditionEnds', 'U') IS NOT NULL - DROP TABLE #cteConditionEnds; - -/* / */ - -SELECT c.PERSON_ID - ,c.CONDITION_CONCEPT_ID - ,c.CONDITION_START_DATE - ,MIN(e.END_DATE) AS ERA_END_DATE -INTO #cteConditionEnds -FROM #cteConditionTarget c -INNER JOIN #cteCondEndDates e ON c.PERSON_ID = e.PERSON_ID - AND c.CONDITION_CONCEPT_ID = e.CONDITION_CONCEPT_ID - AND e.END_DATE >= c.CONDITION_START_DATE -GROUP BY c.PERSON_ID - ,c.CONDITION_CONCEPT_ID - ,c.CONDITION_START_DATE; - -/* / */ - -INSERT INTO @TARGET_CDMV5_SCHEMA.condition_era ( - condition_era_id - ,person_id - ,condition_concept_id - ,condition_era_start_date - ,condition_era_end_date - ,condition_occurrence_count - ) -SELECT row_number() OVER ( - ORDER BY person_id - ) AS condition_era_id - ,person_id - ,CONDITION_CONCEPT_ID - ,min(CONDITION_START_DATE) AS CONDITION_ERA_START_DATE - ,ERA_END_DATE AS CONDITION_ERA_END_DATE - ,COUNT(*) AS CONDITION_OCCURRENCE_COUNT -FROM #cteConditionEnds -GROUP BY person_id - ,CONDITION_CONCEPT_ID - ,ERA_END_DATE; +INSERT INTO @cdm_schema.drug_era(drug_era_id,person_id, drug_concept_id, drug_era_start_date, drug_era_end_date, drug_exposure_count, gap_days) +SELECT * FROM #tmp_de; ``` ### Drug Eras @@ -236,174 +227,164 @@ If the DRUG_EXPOSURE table is populated this script will string together periods ```{sql eval = FALSE, echo = TRUE} -/**************************************************** -OHDSI-SQL File Instructions ------------------------------ - 1. Set parameter name of schema that contains CDMv4 instance - (@SOURCE_CDMV4, @SOURCE_CDMV4_SCHEMA) - 2. Set parameter name of schema that contains CDMv5 instance - (@TARGET_CDMV5, @TARGET_CDMV5_SCHEMA) - 3. Run this script through SqlRender to produce a script that will work in your - source dialect. SqlRender can be found here: https://github.com/OHDSI/SqlRender - 4. Run the script produced by SQL Render on your target RDBDMS. - File Instructions -------------------------- - 1. This script will hold a number of placeholders for your CDM V4 and CDMV5 - database/schema. In order to make this file work in your environment, you - should plan to do a global "FIND AND REPLACE" on this file to fill in the - file with values that pertain to your environment. The following are the - tokens you should use when doing your "FIND AND REPLACE" operation: - - [CDM] - [CDM].[CDMSCHEMA] - -*********************************************************************************/ -/* SCRIPT PARAMETERS */ +if object_id('tempdb..#tmp_de', 'U') is not null drop table #tmp_de; - - {DEFAULT @TARGET_CDMV5 = '[CDM]' } -- The target CDMv5 database name - {DEFAULT @TARGET_CDMV5_SCHEMA = '[CDM].[CDMSCHEMA]' } -- the target CDMv5 database plus schema +WITH +ctePreDrugTarget(drug_exposure_id, person_id, ingredient_concept_id, drug_exposure_start_date, days_supply, drug_exposure_end_date) AS +(-- Normalize DRUG_EXPOSURE_END_DATE to either the existing drug exposure end date, or add days supply, or add 1 day to the start date + SELECT + d.drug_exposure_id + , d.person_id + , c.concept_id AS ingredient_concept_id + , d.drug_exposure_start_date AS drug_exposure_start_date + , d.days_supply AS days_supply + , COALESCE( + ---NULLIF returns NULL if both values are the same, otherwise it returns the first parameter + NULLIF(drug_exposure_end_date, NULL), + ---If drug_exposure_end_date != NULL, return drug_exposure_end_date, otherwise go to next case + NULLIF(dateadd(day,days_supply,drug_exposure_start_date), drug_exposure_start_date), + ---If days_supply != NULL or 0, return drug_exposure_start_date + days_supply, otherwise go to next case + dateadd(day,1,drug_exposure_start_date) + ---Add 1 day to the drug_exposure_start_date since there is no end_date or INTERVAL for the days_supply + ) AS drug_exposure_end_date + FROM @cdm_schema.drug_exposure d + JOIN @cdm_schema.concept_ancestor ca ON ca.descendant_concept_id = d.drug_concept_id + JOIN @cdm_schema.concept c ON ca.ancestor_concept_id = c.concept_id + WHERE c.vocabulary_id = 'RxNorm' ---8 selects RxNorm from the vocabulary_id + AND c.concept_class_id = 'Ingredient' + AND d.drug_concept_id != 0 ---Our unmapped drug_concept_id's are set to 0, so we don't want different drugs wrapped up in the same era + AND coalesce(d.days_supply,0) >= 0 ---We have cases where days_supply is negative, and this can set the end_date before the start_date, which we don't want. So we're just looking over those rows. This is a data-quality issue. +) -USE @TARGET_CDMV5; - - - -/**** -DRUG ERA -Note: Eras derived from DRUG_EXPOSURE table, using 30d gap - ****/ -IF OBJECT_ID('tempdb..#cteDrugTarget', 'U') IS NOT NULL - DROP TABLE #cteDrugTarget; - -/* / */ - --- Normalize DRUG_EXPOSURE_END_DATE to either the existing drug exposure end date, or add days supply, or add 1 day to the start date -SELECT d.DRUG_EXPOSURE_ID - ,d.PERSON_ID - ,c.CONCEPT_ID - ,d.DRUG_TYPE_CONCEPT_ID - ,DRUG_EXPOSURE_START_DATE - ,COALESCE(DRUG_EXPOSURE_END_DATE, DATEADD(day, DAYS_SUPPLY, DRUG_EXPOSURE_START_DATE), DATEADD(day, 1, DRUG_EXPOSURE_START_DATE)) AS DRUG_EXPOSURE_END_DATE - ,c.CONCEPT_ID AS INGREDIENT_CONCEPT_ID -INTO #cteDrugTarget -FROM @TARGET_CDMV5_SCHEMA.DRUG_EXPOSURE d -INNER JOIN @TARGET_CDMV5_SCHEMA.CONCEPT_ANCESTOR ca ON ca.DESCENDANT_CONCEPT_ID = d.DRUG_CONCEPT_ID -INNER JOIN @TARGET_CDMV5_SCHEMA.CONCEPT c ON ca.ANCESTOR_CONCEPT_ID = c.CONCEPT_ID -WHERE c.DOMAIN_ID = 'Drug' - AND c.CONCEPT_CLASS_ID = 'Ingredient' - AND c.STANDARD_CONCEPT = 'S'; - -/* / */ - -IF OBJECT_ID('tempdb..#cteEndDates', 'U') IS NOT NULL - DROP TABLE #cteEndDates; - -/* / */ - -SELECT PERSON_ID - ,INGREDIENT_CONCEPT_ID - ,DATEADD(day, - 30, EVENT_DATE) AS END_DATE -- unpad the end date -INTO #cteEndDates -FROM ( - SELECT E1.PERSON_ID - ,E1.INGREDIENT_CONCEPT_ID - ,E1.EVENT_DATE - ,COALESCE(E1.START_ORDINAL, MAX(E2.START_ORDINAL)) START_ORDINAL - ,E1.OVERALL_ORD - FROM ( - SELECT PERSON_ID - ,INGREDIENT_CONCEPT_ID - ,EVENT_DATE - ,EVENT_TYPE - ,START_ORDINAL - ,ROW_NUMBER() OVER ( - PARTITION BY PERSON_ID - ,INGREDIENT_CONCEPT_ID ORDER BY EVENT_DATE - ,EVENT_TYPE - ) AS OVERALL_ORD -- this re-numbers the inner UNION so all rows are numbered ordered by the event date +, cteSubExposureEndDates (person_id, ingredient_concept_id, end_date) AS --- A preliminary sorting that groups all of the overlapping exposures into one exposure so that we don't double-count non-gap-days +( + SELECT person_id, ingredient_concept_id, event_date AS end_date + FROM + ( + SELECT person_id, ingredient_concept_id, event_date, event_type, + MAX(start_ordinal) OVER (PARTITION BY person_id, ingredient_concept_id + ORDER BY event_date, event_type ROWS unbounded preceding) AS start_ordinal, + -- this pulls the current START down from the prior rows so that the NULLs + -- from the END DATES will contain a value we can compare with + ROW_NUMBER() OVER (PARTITION BY person_id, ingredient_concept_id + ORDER BY event_date, event_type) AS overall_ord + -- this re-numbers the inner UNION so all rows are numbered ordered by the event date FROM ( -- select the start dates, assigning a row number to each - SELECT PERSON_ID - ,INGREDIENT_CONCEPT_ID - ,DRUG_EXPOSURE_START_DATE AS EVENT_DATE - ,0 AS EVENT_TYPE - ,ROW_NUMBER() OVER ( - PARTITION BY PERSON_ID - ,INGREDIENT_CONCEPT_ID ORDER BY DRUG_EXPOSURE_START_DATE - ) AS START_ORDINAL - FROM #cteDrugTarget + SELECT person_id, ingredient_concept_id, drug_exposure_start_date AS event_date, + -1 AS event_type, + ROW_NUMBER() OVER (PARTITION BY person_id, ingredient_concept_id + ORDER BY drug_exposure_start_date) AS start_ordinal + FROM ctePreDrugTarget UNION ALL - -- add the end dates with NULL as the row number, padding the end dates by 30 to allow a grace period for overlapping ranges. - SELECT PERSON_ID - ,INGREDIENT_CONCEPT_ID - ,DATEADD(day, 30, DRUG_EXPOSURE_END_DATE) - ,1 AS EVENT_TYPE - ,NULL - FROM #cteDrugTarget - ) RAWDATA - ) E1 - INNER JOIN ( - SELECT PERSON_ID - ,INGREDIENT_CONCEPT_ID - ,DRUG_EXPOSURE_START_DATE AS EVENT_DATE - ,ROW_NUMBER() OVER ( - PARTITION BY PERSON_ID - ,INGREDIENT_CONCEPT_ID ORDER BY DRUG_EXPOSURE_START_DATE - ) AS START_ORDINAL - FROM #cteDrugTarget - ) E2 ON E1.PERSON_ID = E2.PERSON_ID - AND E1.INGREDIENT_CONCEPT_ID = E2.INGREDIENT_CONCEPT_ID - AND E2.EVENT_DATE <= E1.EVENT_DATE - GROUP BY E1.PERSON_ID - ,E1.INGREDIENT_CONCEPT_ID - ,E1.EVENT_DATE - ,E1.START_ORDINAL - ,E1.OVERALL_ORD - ) E -WHERE 2 * E.START_ORDINAL - E.OVERALL_ORD = 0; + SELECT person_id, ingredient_concept_id, drug_exposure_end_date, 1 AS event_type, NULL + FROM ctePreDrugTarget + ) RAWDATA + ) e + WHERE (2 * e.start_ordinal) - e.overall_ord = 0 +) -/* / */ +, cteDrugExposureEnds (person_id, drug_concept_id, drug_exposure_start_date, drug_sub_exposure_end_date) AS +( +SELECT + dt.person_id + , dt.ingredient_concept_id + , dt.drug_exposure_start_date + , MIN(e.end_date) AS drug_sub_exposure_end_date +FROM ctePreDrugTarget dt +JOIN cteSubExposureEndDates e ON dt.person_id = e.person_id AND dt.ingredient_concept_id = e.ingredient_concept_id AND e.end_date >= dt.drug_exposure_start_date +GROUP BY + dt.drug_exposure_id + , dt.person_id + , dt.ingredient_concept_id + , dt.drug_exposure_start_date +) +-------------------------------------------------------------------------------------------------------------- +, cteSubExposures(row_number, person_id, drug_concept_id, drug_sub_exposure_start_date, drug_sub_exposure_end_date, drug_exposure_count) AS +( + SELECT ROW_NUMBER() OVER (PARTITION BY person_id, drug_concept_id, drug_sub_exposure_end_date ORDER BY person_id) + , person_id, drug_concept_id, MIN(drug_exposure_start_date) AS drug_sub_exposure_start_date, drug_sub_exposure_end_date, COUNT(*) AS drug_exposure_count + FROM cteDrugExposureEnds + GROUP BY person_id, drug_concept_id, drug_sub_exposure_end_date + --ORDER BY person_id, drug_concept_id +) +-------------------------------------------------------------------------------------------------------------- +/*Everything above grouped exposures into sub_exposures if there was overlap between exposures. + *So there was no persistence window. Now we can add the persistence window to calculate eras. + */ +-------------------------------------------------------------------------------------------------------------- +, cteFinalTarget(row_number, person_id, ingredient_concept_id, drug_sub_exposure_start_date, drug_sub_exposure_end_date, drug_exposure_count, days_exposed) AS +( + SELECT row_number, person_id, drug_concept_id, drug_sub_exposure_start_date, drug_sub_exposure_end_date, drug_exposure_count + , datediff(day,drug_sub_exposure_start_date,drug_sub_exposure_end_date) AS days_exposed + FROM cteSubExposures +) +-------------------------------------------------------------------------------------------------------------- +, cteEndDates (person_id, ingredient_concept_id, end_date) AS -- the magic +( + SELECT person_id, ingredient_concept_id, dateadd(day,-30,event_date) AS end_date -- unpad the end date + FROM + ( + SELECT person_id, ingredient_concept_id, event_date, event_type, + MAX(start_ordinal) OVER (PARTITION BY person_id, ingredient_concept_id + ORDER BY event_date, event_type ROWS UNBOUNDED PRECEDING) AS start_ordinal, + -- this pulls the current START down from the prior rows so that the NULLs + -- from the END DATES will contain a value we can compare with + ROW_NUMBER() OVER (PARTITION BY person_id, ingredient_concept_id + ORDER BY event_date, event_type) AS overall_ord + -- this re-numbers the inner UNION so all rows are numbered ordered by the event date + FROM ( + -- select the start dates, assigning a row number to each + SELECT person_id, ingredient_concept_id, drug_sub_exposure_start_date AS event_date, + -1 AS event_type, + ROW_NUMBER() OVER (PARTITION BY person_id, ingredient_concept_id + ORDER BY drug_sub_exposure_start_date) AS start_ordinal + FROM cteFinalTarget -IF OBJECT_ID('tempdb..#cteDrugExpEnds', 'U') IS NOT NULL - DROP TABLE #cteDrugExpEnds; + UNION ALL -/* / */ + -- pad the end dates by 30 to allow a grace period for overlapping ranges. + SELECT person_id, ingredient_concept_id, dateadd(day,30,drug_sub_exposure_end_date), 1 AS event_type, NULL + FROM cteFinalTarget + ) RAWDATA + ) e + WHERE (2 * e.start_ordinal) - e.overall_ord = 0 -SELECT d.PERSON_ID - ,d.INGREDIENT_CONCEPT_ID - ,d.DRUG_TYPE_CONCEPT_ID - ,d.DRUG_EXPOSURE_START_DATE - ,MIN(e.END_DATE) AS ERA_END_DATE -INTO #cteDrugExpEnds -FROM #cteDrugTarget d -INNER JOIN #cteEndDates e ON d.PERSON_ID = e.PERSON_ID - AND d.INGREDIENT_CONCEPT_ID = e.INGREDIENT_CONCEPT_ID - AND e.END_DATE >= d.DRUG_EXPOSURE_START_DATE -GROUP BY d.PERSON_ID - ,d.INGREDIENT_CONCEPT_ID - ,d.DRUG_TYPE_CONCEPT_ID - ,d.DRUG_EXPOSURE_START_DATE; - -/* / */ - -INSERT INTO @TARGET_CDMV5_SCHEMA.drug_era -SELECT row_number() OVER ( - ORDER BY person_id - ) AS drug_era_id - ,person_id - ,INGREDIENT_CONCEPT_ID - ,min(DRUG_EXPOSURE_START_DATE) AS drug_era_start_date - ,ERA_END_DATE - ,COUNT(*) AS DRUG_EXPOSURE_COUNT - ,30 AS gap_days -FROM #cteDrugExpEnds -GROUP BY person_id - ,INGREDIENT_CONCEPT_ID - ,drug_type_concept_id - ,ERA_END_DATE; +) +, cteDrugEraEnds (person_id, drug_concept_id, drug_sub_exposure_start_date, drug_era_end_date, drug_exposure_count, days_exposed) AS +( +SELECT + ft.person_id + , ft.ingredient_concept_id + , ft.drug_sub_exposure_start_date + , MIN(e.end_date) AS era_end_date + , drug_exposure_count + , days_exposed +FROM cteFinalTarget ft +JOIN cteEndDates e ON ft.person_id = e.person_id AND ft.ingredient_concept_id = e.ingredient_concept_id AND e.end_date >= ft.drug_sub_exposure_start_date +GROUP BY + ft.person_id + , ft.ingredient_concept_id + , ft.drug_sub_exposure_start_date + , drug_exposure_count + , days_exposed +) +SELECT + row_number()over(order by person_id) drug_era_id + , person_id + , drug_concept_id + , MIN(drug_sub_exposure_start_date) AS drug_era_start_date + , drug_era_end_date + , SUM(drug_exposure_count) AS drug_exposure_count + , datediff(day,MIN(drug_sub_exposure_start_date),drug_era_end_date)-SUM(days_exposed) as gap_days +INTO #tmp_de +FROM cteDrugEraEnds dee +GROUP BY person_id, drug_concept_id, drug_era_end_date; +INSERT INTO @cdm_schema.drug_era(drug_era_id,person_id, drug_concept_id, drug_era_start_date, drug_era_end_date, drug_exposure_count, gap_days) +SELECT * FROM #tmp_de; ``` ## **Example ETL Script**