OMOP/docs/faq.html

977 lines
36 KiB
HTML
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<title>OMOP CDM Frequently Asked Questions</title>
<script src="site_libs/header-attrs-2.25/header-attrs.js"></script>
<script src="site_libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="site_libs/bootstrap-3.3.5/css/cosmo.min.css" rel="stylesheet" />
<script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script>
<style>h1 {font-size: 34px;}
h1.title {font-size: 38px;}
h2 {font-size: 30px;}
h3 {font-size: 24px;}
h4 {font-size: 18px;}
h5 {font-size: 16px;}
h6 {font-size: 12px;}
code {color: inherit; background-color: rgba(0, 0, 0, 0.04);}
pre:not([class]) { background-color: white }</style>
<script src="site_libs/jqueryui-1.13.2/jquery-ui.min.js"></script>
<link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" />
<script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script>
<script src="site_libs/navigation-1.1/tabsets.js"></script>
<link href="site_libs/highlightjs-9.12.0/default.css" rel="stylesheet" />
<script src="site_libs/highlightjs-9.12.0/highlight.js"></script>
<link href="site_libs/font-awesome-6.4.2/css/all.min.css" rel="stylesheet" />
<link href="site_libs/font-awesome-6.4.2/css/v4-shims.min.css" rel="stylesheet" />
<link rel='shortcut icon' type='image/x-icon' href='favicon.ico' />
<style type="text/css">
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
</style>
<style type="text/css">code{white-space: pre;}</style>
<script type="text/javascript">
if (window.hljs) {
hljs.configure({languages: []});
hljs.initHighlightingOnLoad();
if (document.readyState && document.readyState === "complete") {
window.setTimeout(function() { hljs.initHighlighting(); }, 0);
}
}
</script>
<link rel="stylesheet" href="style.css" type="text/css" />
<style type = "text/css">
.main-container {
max-width: 940px;
margin-left: auto;
margin-right: auto;
}
img {
max-width:100%;
}
.tabbed-pane {
padding-top: 12px;
}
.html-widget {
margin-bottom: 20px;
}
button.code-folding-btn:focus {
outline: none;
}
summary {
display: list-item;
}
details > summary > p:only-child {
display: inline;
}
pre code {
padding: 0;
}
</style>
<style type="text/css">
.dropdown-submenu {
position: relative;
}
.dropdown-submenu>.dropdown-menu {
top: 0;
left: 100%;
margin-top: -6px;
margin-left: -1px;
border-radius: 0 6px 6px 6px;
}
.dropdown-submenu:hover>.dropdown-menu {
display: block;
}
.dropdown-submenu>a:after {
display: block;
content: " ";
float: right;
width: 0;
height: 0;
border-color: transparent;
border-style: solid;
border-width: 5px 0 5px 5px;
border-left-color: #cccccc;
margin-top: 5px;
margin-right: -10px;
}
.dropdown-submenu:hover>a:after {
border-left-color: #adb5bd;
}
.dropdown-submenu.pull-left {
float: none;
}
.dropdown-submenu.pull-left>.dropdown-menu {
left: -100%;
margin-left: 10px;
border-radius: 6px 0 6px 6px;
}
</style>
<script type="text/javascript">
// manage active state of menu based on current page
$(document).ready(function () {
// active menu anchor
href = window.location.pathname
href = href.substr(href.lastIndexOf('/') + 1)
if (href === "")
href = "index.html";
var menuAnchor = $('a[href="' + href + '"]');
// mark the anchor link active (and if it's in a dropdown, also mark that active)
var dropdown = menuAnchor.closest('li.dropdown');
if (window.bootstrap) { // Bootstrap 4+
menuAnchor.addClass('active');
dropdown.find('> .dropdown-toggle').addClass('active');
} else { // Bootstrap 3
menuAnchor.parent().addClass('active');
dropdown.addClass('active');
}
// Navbar adjustments
var navHeight = $(".navbar").first().height() + 15;
var style = document.createElement('style');
var pt = "padding-top: " + navHeight + "px; ";
var mt = "margin-top: -" + navHeight + "px; ";
var css = "";
// offset scroll position for anchor links (for fixed navbar)
for (var i = 1; i <= 6; i++) {
css += ".section h" + i + "{ " + pt + mt + "}\n";
}
style.innerHTML = "body {" + pt + "padding-bottom: 40px; }\n" + css;
document.head.appendChild(style);
});
</script>
<!-- tabsets -->
<style type="text/css">
.tabset-dropdown > .nav-tabs {
display: inline-table;
max-height: 500px;
min-height: 44px;
overflow-y: auto;
border: 1px solid #ddd;
border-radius: 4px;
}
.tabset-dropdown > .nav-tabs > li.active:before, .tabset-dropdown > .nav-tabs.nav-tabs-open:before {
content: "\e259";
font-family: 'Glyphicons Halflings';
display: inline-block;
padding: 10px;
border-right: 1px solid #ddd;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open > li.active:before {
content: "\e258";
font-family: 'Glyphicons Halflings';
border: none;
}
.tabset-dropdown > .nav-tabs > li.active {
display: block;
}
.tabset-dropdown > .nav-tabs > li > a,
.tabset-dropdown > .nav-tabs > li > a:focus,
.tabset-dropdown > .nav-tabs > li > a:hover {
border: none;
display: inline-block;
border-radius: 4px;
background-color: transparent;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open > li {
display: block;
float: none;
}
.tabset-dropdown > .nav-tabs > li {
display: none;
}
</style>
<!-- code folding -->
<style type="text/css">
#TOC {
margin: 25px 0px 20px 0px;
}
@media (max-width: 768px) {
#TOC {
position: relative;
width: 100%;
}
}
@media print {
.toc-content {
/* see https://github.com/w3c/csswg-drafts/issues/4434 */
float: right;
}
}
.toc-content {
padding-left: 30px;
padding-right: 40px;
}
div.main-container {
max-width: 1200px;
}
div.tocify {
width: 20%;
max-width: 260px;
max-height: 85%;
}
@media (min-width: 768px) and (max-width: 991px) {
div.tocify {
width: 25%;
}
}
@media (max-width: 767px) {
div.tocify {
width: 100%;
max-width: none;
}
}
.tocify ul, .tocify li {
line-height: 20px;
}
.tocify-subheader .tocify-item {
font-size: 0.90em;
}
.tocify .list-group-item {
border-radius: 0px;
}
</style>
</head>
<body>
<div class="container-fluid main-container">
<!-- setup 3col/9col grid for toc_float and main content -->
<div class="row">
<div class="col-xs-12 col-sm-4 col-md-3">
<div id="TOC" class="tocify">
</div>
</div>
<div class="toc-content col-xs-12 col-sm-8 col-md-9">
<div class="navbar navbar-default navbar-fixed-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-bs-toggle="collapse" data-target="#navbar" data-bs-target="#navbar">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="index.html"><div><img src="ohdsi16x16.png"></img> OMOP Common Data Model </div></a>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<li>
<a href="index.html">
<span class="fa fa-house"></span>
</a>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<span class="fa fa-landmark"></span>
Background
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="background.html">Model Background</a>
</li>
<li>
<a href="cdmRefreshProcess.html">CDM Refresh Process</a>
</li>
<li>
<a href="vocabulary.html">How the Vocabulary is Built</a>
</li>
</ul>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<span class="fa fa-list-alt"></span>
Conventions
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="dataModelConventions.html">General Conventions</a>
</li>
<li>
<a href="ehrObsPeriods.html">Observation Periods for EHR Data</a>
</li>
<li>
<a href="cdmPrivacy.html">Patient Privacy and OMOP</a>
</li>
</ul>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<span class="fa fa-history"></span>
CDM Versions
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="cdm30.html">CDM v3.0</a>
</li>
<li>
<a href="cdm60.html">CDM v6.0</a>
</li>
<li>
<a href="cdm53.html">CDM v5.3</a>
</li>
<li class="dropdown-submenu">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">NEW CDM v5.4</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="cdm54.html">CDM v5.4</a>
</li>
<li>
<a href="cdm54Changes.html">Changes from CDM v5.3</a>
</li>
<li>
<a href="cdm54erd.html">Entity Relationships</a>
</li>
</ul>
</li>
</ul>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<span class="fa fa-plus-square"></span>
CDM Proposals
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="cdmRequestProcess.html">How to Propose Changes to the CDM</a>
</li>
<li>
<a href="https://github.com/OHDSI/CommonDataModel/issues?q=is%3Aopen+is%3Aissue+label%3AProposal">Under Review</a>
</li>
<li class="dropdown-submenu">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">Accepted</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="https://github.com/OHDSI/CommonDataModel/issues/252">Region_concept_id</a>
</li>
</ul>
</li>
</ul>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<span class="fa fa-question"></span>
How to
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="download.html">Download the DDL</a>
</li>
<li>
<a href="cdmRPackage.html">Use the CDM R Package</a>
</li>
<li>
<a href="drug_dose.html">Calculate Drug Dose</a>
</li>
</ul>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<span class="fa fa-life-ring"></span>
Support
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="cdmDecisionTree.html">Help! My Data Doesn't Fit!</a>
</li>
<li>
<a href="faq.html">FAQ</a>
</li>
<li>
<a href="sqlScripts.html">SQL Scripts</a>
</li>
<li>
<a href="contribute.html">Ask a Question</a>
</li>
</ul>
</li>
</ul>
<ul class="nav navbar-nav navbar-right">
<li>
<a href="https://github.com/OHDSI/CommonDataModel">
<span class="fa fa-github"></span>
</a>
</li>
</ul>
</div><!--/.nav-collapse -->
</div><!--/.container -->
</div><!--/.navbar -->
<div id="header">
<h1 class="title toc-ignore">OMOP CDM Frequently Asked Questions</h1>
</div>
<p><strong>1. I understand that the common data model (CDM) is a way of
organizing disparate data sources into the same relational database
design, but how can it be effective since many databases use different
coding schemes?</strong></p>
<p>During the extract, transform, load (ETL) process of converting a
data source into the OMOP common data model, we standardize the
structure (e.g. tables, fields, data types), conventions (e.g. rules
that govern how source data should be represented), and content
(e.g. what common vocabularies are used to speak the same language
across clinical domains). The common data model preserves all source
data, including the original source vocabulary codes, but adds the
standardized vocabularies to allow for network research across the
entire OHDSI research community.</p>
<p><strong>2. How does my data get transformed into the common data
model?</strong></p>
<p>You or someone in your organization will need to create a process to
build your CDM. Dont worry though, you are not alone! The open nature
of the community means that much of the code that other participants
have written to transform their own data is available for you to use. If
you have a data license for a large administrative claims database like
IBM MarketScan® or Optums Clinformatics® Extended Data Mart, chances
are that someone has already done the legwork. Here is one example of a
full builder freely available on <a
href="https://github.com/OHDSI/ETL-CDMBuilder">github</a> that has been
written for a variety of data sources.</p>
<p>The <a href="http://forums.ohdsi.org/">community forums</a> are also
a great place to ask questions if you are stuck or need guidance on how
to represent your data in the common data model. Members are usually
very responsive!</p>
<p><strong>3. Are any tables or fields optional?</strong></p>
<p>It is expected that all tables will be present in a CDM though it is
not a requirement that they are all populated. The two mandatory tables
are:</p>
<ul>
<li><a
href="https://github.com/OHDSI/CommonDataModel/wiki/person">Person</a>:
Contains records that uniquely identify each patient in the source data
who is at-risk to have clinical observations recorded within the source
systems.</li>
<li><a
href="https://github.com/OHDSI/CommonDataModel/wiki/observation_period">Observation_period</a>:
Contains records which uniquely define the spans of time for which a
Person is at-risk to have clinical events recorded within the source
systems.</li>
</ul>
<p>It is then up to you which tables to populate, though the core event
tables are generally agreed upon to be <a
href="https://github.com/OHDSI/CommonDataModel/wiki/CONDITION_OCCURRENCE">Condition_occurrence</a>,
<a
href="https://github.com/OHDSI/CommonDataModel/wiki/PROCEDURE_OCCURRENCE">Procedure_occurrence</a>,
<a
href="https://github.com/OHDSI/CommonDataModel/wiki/DRUG_EXPOSURE">Drug_exposure</a>,
<a
href="https://github.com/OHDSI/CommonDataModel/wiki/MEASUREMENT">Measurement</a>,
and <a
href="https://github.com/OHDSI/CommonDataModel/wiki/OBSERVATION">Observation</a>.
Each table has certain required fields, a full list of which can be
found on the Common Data Model <a
href="https://github.com/OHDSI/CommonDataModel/wiki/">wiki page</a>.</p>
<p><strong>4. Does the data model include any derived information? Which
tables or values are derived?</strong></p>
<p>The common data model stores verbatim data from the source across
various clinical domains, such as records for conditions, drugs,
procedures, and measurements. In addition, to assist the analyst, the
common data model also provides some derived tables, based on commonly
used analytic procedures. For example, the <a
href="https://github.com/OHDSI/CommonDataModel/wiki/CONDITION_ERA">Condition_era</a>
table is derived from the <a
href="https://github.com/OHDSI/CommonDataModel/wiki/CONDITION_OCCURENCE">Condition_occurrence</a>
table and both the <a
href="https://github.com/OHDSI/CommonDataModel/wiki/DRUG_ERA">Drug_era</a>
and <a
href="https://github.com/OHDSI/CommonDataModel/wiki/DOSE_ERA">Dose_era</a>
tables are derived from the <a
href="https://github.com/OHDSI/CommonDataModel/wiki/DRUG_EXPOSURE">Drug_exposure</a>
table. An era is defined as a span of time when a patient is assumed to
have a given condition or exposure to a particular active ingredient.
Members of the community have written code to create these tables and it
is out on the <a
href="https://github.com/OHDSI/CommonDataModel/tree/master/CodeExcerpts/DerivedTables">github</a>
if you choose to use it in your CDM build. It is important to reinforce,
the analyst has the opportunity, but not the obligation, to use any of
the derived tables and all of the source data is still available for
direct use if the analysis calls for different assumptions.</p>
<p><strong>5. How is age captured in the model?</strong></p>
<p>Year_of_birth, month_of_birth, day_of_birth and birth_datetime are
all fields in the Person table designed to capture some form of date of
birth. While only year_of_birth is required, these fields allow for
maximum flexibility over a wide range of data sources.</p>
<p><strong>6. How are gender, race, and ethnicity captured in the model?
Are they coded using values a human reader can understand?</strong></p>
<p>Standard Concepts are used to denote all clinical entities throughout
the OMOP common data model, including gender, race, and ethnicity.
Source values are mapped to Standard Concepts during the extract,
transform, load (ETL) process of converting a database to the OMOP
Common Data Model. These are then stored in the Gender_concept_id,
Race_concept_id and Ethnicity_concept_id fields in the Person table.
Because the standard concepts span across all clinical domains, and in
keeping with Ciminos Desiderata for Controlled Medical Vocabularies in
the Twenty-First Century, the identifiers are unique, persistent
nonsematic identifiers. Gender, for example, is stored as either 8532
(female) or 8507 (male) in gender_concept_id while the original value
from the source is stored in gender_source_value (M, male, F, etc).</p>
<p><strong>7. Are there conditions/procedures/drugs or other domains
that should be masked or hidden in the CDM?</strong></p>
<p>The masking of information related to a person is dependent on the
organizations privacy policies and may vary by data asset (<a
href="https://github.com/OHDSI/Themis/issues/21">THEMIS issue
#21</a>).</p>
<p><strong>8. How is time-varying patient information such as location
of residence addressed in the model?</strong></p>
<p>The OMOP common data model has been pragmatically defined based on
the desired analytic use cases of the community, as well as the
available types of data that community members have access to. Prior to
CDM v6.0, each person record had associated demographic attributes which
are assumed to be constant for the patient throughout the course of
their periods of observation, like location and primary care provider.
With the release of CDM v6.0, the Location_History table is now
available to track the movements of people, care sites, and providers
over time. Only the most recent location_id should be stored in the
Person table to eliminate duplication, while the persons movements are
stored in Location_History.</p>
<p>Something like marital status is a little different as it is
considered to be an observation rather than a demographic attribute.
This means that it is housed in the Observation table rather than the
Person table, giving the opportunity to store each change in status as a
unique record.</p>
<p>If someone in the community had a use case for time-varying location
of residence and also had source data that contains this information,
wed welcome participation in the CDM workgroup to evolve the model
further.</p>
<p><strong>9. How does the model denote the time period during which a
Persons information is valid?</strong></p>
<p>The OMOP Common Data Model uses something called observation periods
(stored in the <a
href="https://github.com/OHDSI/CommonDataModel/wiki/observation_period">Observation_period</a>
table) as a way to define the time span during which a patient is
at-risk to have a clinical event recorded. In administrative claims
databases, for example, these observation periods are often analogous to
the notion of enrollment.</p>
<p><strong>10. How does the model capture start and stop dates for
insurance coverage? What if a persons coverage changes?</strong></p>
<p>The <a
href="https://github.com/OHDSI/CommonDataModel/wiki/payer_plan_period">Payer_plan_period</a>
table captures details of the period of time that a Person is
continuously enrolled under a specific health Plan benefit structure
from a given Payer. Payer plan periods, as opposed to observation
periods, can overlap so as to denote the time when a Person is enrolled
in multiple plans at the same time such as Medicare Part A and Medicare
Part D.</p>
<p><strong>11. What if I have EHR data? How would I create observation
periods?</strong></p>
<p>An observation period is considered as the time at which a patient is
at-risk to have a clinical event recorded in the source system.
Determining the appropriate observation period for each source data can
vary, depending on what information the source contains. If a source
does not provide information about a patients entry or exit from a
system, then reasonable heuristics need to be developed and applied
within the ETL.</p>
<div id="vocabulary-mapping" class="section level2">
<h2>Vocabulary Mapping</h2>
<p><strong>12. Do I have to map my source codes to Standard Concepts
myself? Are there vocabulary mappings that already exist for me to
leverage?</strong></p>
<p>If your data use any of the 55 source vocabularies that are currently
supported, the mappings have been done for you. The full list is
available from the open-source <a
href="http://athena.ohdsi.org/search-terms/terms">ATHENA</a> tool under
the download tab (see below). You can choose to download the ten <a
href="https://github.com/OHDSI/CommonDataModel/wiki/Standardized-Vocabularies">vocabulary
tables</a> from there as well you will need a copy in your environment
if you plan on building a CDM.</p>
<p><img src="images/Athena_download_box.png" /></p>
<p>The <a href="http://athena.ohdsi.org/search-terms/terms">ATHENA</a>
tool also allows you to explore the vocabulary before downloading it if
you are curious about the mappings or if you have a specific code in
mind and would like to know which standard concept it is associated
with; just click on the search tab and type in a keyword to begin
searching.</p>
<p><strong>13. If I want to apply the mappings myself, can I do so? Are
they transparent to all users?</strong></p>
<p>Yes, all mappings are available in the <a
href="https://github.com/OHDSI/CommonDataModel/wiki/CONCEPT_RELATIONSHIP">Concept_relationship</a>
table (which can be downloaded from <a
href="http://athena.ohdsi.org/search-terms/terms">ATHENA</a>). Each
value in a supported source terminology is assigned a Concept_id (which
is considered non-standard). Each Source_concept_id will have a mapping
to a Standard_concept_id. For example:</p>
<p><img src="images/Sepsis_to_SNOMED.png" /></p>
<p>In this case the standard SNOMED concept 201826 for type 2 diabetes
mellitus would be stored in the Condition_occurrence table as the
Condition_concept_id and the ICD10CM concept 1567956 for type 2 diabetes
mellitus would be stored as the Condition_source_concept_id.</p>
<p><strong>14. Can RXNorm codes be stored in the model? Can I store
multiple levels if I so choose? What if one collaborator uses a
different level of RXNorm than I use when transforming their
database?</strong></p>
<p>In the OMOP Common Data Model RXNorm is considered the standard
vocabulary for representing drug exposures. One of the great things
about the Standardized Vocabulary is that the hierarchical nature of
RXNorm is preserved to enable efficient querying. It is agreed upon best
practice to store the lowest level RXNorm available and then use the
Vocabulary to explore any pertinent relationships. Drug ingredients are
the highest-level ancestors so a query for the descendants of an
ingredient should turn up all drug products (Clinical Drug or Branded
Drug) containing that ingredient. A query designed in this way will find
drugs of interest in any CDM regardless of the level of RXNorm used.</p>
<p><strong>15. What if the vocabulary has a mapping I dont agree with?
Can it be changed?</strong></p>
<p>Yes, that is the beauty of the community! If you find a mapping in
the vocabulary that doesnt seem to belong or that you think could be
better, feel free to write a note on the <a
href="https://forums.ohdsi.org/">forums</a> or on the <a
href="https://github.com/OHDSI/Vocabulary-v5.0/issues">vocabulary
github</a>. If the community agrees with your assessment it will be
addressed in the next vocabulary version.</p>
<p><strong>16. What if I have source codes that are specific to my site?
How would these be mapped?</strong></p>
<p>In the OMOP Vocabulary there is an empty table called the
Source_to_concept_map. It is a simple table structure that allows you to
establish mapping(s) for each source code with a standard concept in the
OMOP Vocabulary (TARGET_CONCEPT_ID). This work can be facilitated by the
OHDSI tool <a href="https://github.com/OHDSI/Usagi">Usagi</a> (pictured
below) which searches for text similarity between your source code
descriptions and the OMOP Vocabulary and exports mappings in a
SOURCE_TO_CONCEPT_MAP table structure. Example Source_to_concept_map
files can be found <a
href="https://github.com/OHDSI/ETL-CDMBuilder/tree/master/man/VOCABULARY_ADDITIONS">here</a>.
These generated Source_to_concept_map files are then loaded into the
OMOP Vocabularys empty Source_to_concept_map prior to processing the
native data into the CDM so that the CDM builder can use them in a
build.</p>
<p><img src="images/Usagi.png" /></p>
<p>If an source code is not supported by the OMOP Vocabulary, one can
create a new records in the CONCEPT table, however the CONCEPT_IDs
should start &gt;2000000000 so that it is easy to tell between the OMOP
Vocabulary concepts and the site specific concepts. Once those concepts
exist CONCEPT_RELATIONSHIPS can be generated to assign them to a
standard terminologies, USAGI can facilitate this process as well (<a
href="https://github.com/OHDSI/Themis/issues/22">THEMIS issue
#22</a>).</p>
<p><strong>17. How are one-to-many mappings applied?</strong></p>
<p>If one source code maps to two Standard Concepts then two rows are
stored in the corresponding clinical event table.</p>
<p><strong>18. What if I want to keep my original data as well as the
mapped values? Is there a way for me to do that?</strong></p>
<p>Yes! Source values and Source Concepts are fully maintained within
the OMOP Common Data Model. A Source Concept represents the code in the
source data. Each Source Concept is mapped to one or more Standard
Concepts during the ETL process and both are stored in the corresponding
clinical event table. If no mapping is available, the Standard Concept
with the concept_id = 0 is written into the *_concept_id field
(Condition_concept_id, Procedure_concept_id, etc.) so as to preserve the
record from the native data.</p>
</div>
<div id="common-data-model-versioning" class="section level2">
<h2>Common Data Model Versioning</h2>
<p><strong>19. Who decides when and how to change the data
model?</strong></p>
<p>The community! There is a <a
href="https://docs.google.com/document/d/144e_fc7dyuinfJfbYW5MsJeSijVSzsNE7GMY6KRX10g/edit?usp=sharing">working
group</a> designed around updating the model and everything is done by
consensus. Members submit proposed changes to the <a
href="https://github.com/OHDSI/CommonDataModel">github</a> in the form
of <a href="https://github.com/OHDSI/CommonDataModel/issues">issues</a>
and the group meets once a month to discuss and vote on the changes. Any
ratified proposals are then added to the queue for a future version of
the Common Data Model.</p>
<p><strong>20. Are changes to the model backwards
compatible?</strong></p>
<p>Generally point version changes (5.1 -&gt; 5.2) are backwards
compatible and major version changes (4.0 -&gt; 5.0) may not be. All
updates to the model are listed in the release notes for each version
and anything that could potentially affect backwards compatibility is
clearly labeled.</p>
<p><strong>21. How frequently does the model change?</strong></p>
<p>The current schedule is for major versions to be released every year
and point versions to be release every quarter though that is subject to
the needs of the community.</p>
<p><strong>22. What is the dissemination plan for changes?</strong></p>
<p>Changes are first listed in the release notes on the <a
href="https://github.com/OHDSI/CommonDataModel/">github</a> and in the
<a href="https://github.com/OHDSI/CommonDataModel/wiki">common data
model wiki</a>. New versions are also announced on the weekly community
calls and on the <a href="https://forums.ohdsi.org">community
forums</a>.</p>
</div>
<div id="ohdsi-tools" class="section level2">
<h2>OHDSI Tools</h2>
<p><strong>23. What are the currently available analytic
tools?</strong></p>
<p>While there are a variety of tools freely available from the
community, these are the most widely used:</p>
<ul>
<li><a href="http://www.github.com/ohdsi/achilles">ACHILLES</a> a
stand-alone tool for database characterization</li>
<li><a href="http://www.ohdsi.org/web/atlas/#/home">ATLAS</a> - an
integrated platform for vocabulary exploration, cohort definition, case
review, clinical characterization, incidence estimation,
population-level effect estimation design, and patient-level prediction
design (<a href="http://www.github.com/ohdsi/atlas">link to
github</a>)</li>
<li><a href="https://github.com/OHDSI/ArachneUI">ARACHNE</a> a tool to
facilitate distributed network analyses</li>
<li><a href="https://github.com/OHDSI/whiterabbit">WhiteRabbit</a> - an
application that can be used to analyse the structure and contents of a
database as preparation for designing an ETL</li>
<li><a href="https://github.com/OHDSI/whiterabbit">RabbitInAHat</a> - an
application for interactive design of an ETL to the OMOP Common Data
Model with the help of the the scan report generated by White
Rabbit</li>
<li><a href="https://github.com/OHDSI/usagi">Usagi</a> - an application
to help create mappings between coding systems and the Vocabulary
standard concepts.</li>
</ul>
<p><strong>24. Who is responsible for updating the tools to account for
data model changes, bugs, and errors?</strong></p>
<p>The community! All the tools are open source meaning that anyone can
submit an issue they have found, offer suggestions, and write code to
fix the problem.</p>
<p><strong>25. Do the current tools allow a user to define a treatment
gap (persistence window) of any value when creating treatment
episodes?</strong></p>
<p>Yes the ATLAS tool allows you to specify a persistence window
between drug exposures when defining a cohort (see image below).</p>
<p><img src="images/ATLAS_Persistence_Window.png" /></p>
<p><strong>26. Can the current tools identify medication use during
pregnancy?</strong></p>
<p>Yes, you can identify pregnancy markers from various clinical
domains, including conditions and procedures, for example live birth,
and then define temporal logic to look for drug exposure records in some
interval prior to the pregnancy end. In addition, members of the
community have built an advanced logic to define pregnancy episodes with
all pregnancy outcomes represented, which can be useful for this type of
research.</p>
<p><strong>27. Do the current tools execute against the mapped values or
source values?</strong></p>
<p>The tools can execute against both source and mapped values, though
mapped values are strongly encouraged. Since one of the aims of OHDSI is
to create a distributed data network across the world on which to run
research studies, the use of source values fails to take advantage of
the benefits of the Common Data Model.</p>
</div>
<div id="network-research-studies" class="section level2">
<h2>Network Research Studies</h2>
<p><strong>28. Who can generate requests?</strong></p>
<p>Anyone in the community! Any question that gains enough interest and
participation can be a network research study.</p>
<p><strong>29. Who will develop the queries to distribute to the
network?</strong></p>
<p>Typically a principal investigator leads the development of a
protocol. The PI may also lead the development of the analysis procedure
corresponding to the protocol. If the PI does not have the technical
skills required to write the analysis procedure that implements the
protocol, someone in the community can help them put it together.</p>
<p><strong>30. What language are the queries written in?</strong></p>
<p>Queries are written in R and SQL. The <a
href="https://github.com/OHDSI/sqlrender">SqlRender</a> package can
translate any query written in a templated SQL Server-like dialect to
any of the supported RDBMS environments, including Postgresql, Oracle,
Redshift, Parallel Data Warehouse, Hadoop Impala, Google BigQuery, and
Netezza.</p>
<p><strong>31. How do the queries get to the data partners and how are
they run once there?</strong></p>
<p>OHDSI runs as a distributed data network. All analyses are publicly
available and can be downloaded to run at each site. The packages can be
run locally and, at the data partners discretion, aggregate results can
be shared with the study coordinator.</p>
<p>Data partners can also make use of one of OHDSIs open-source tools
called <a href="https://github.com/OHDSI/arachne">ARACHNE</a>, a tool to
facilitate distributed network analytics against the OMOP CDM.</p>
</div>
<div id="recommended-system-requirements" class="section level2">
<h2>Recommended System Requirements</h2>
<p>It is difficult to recommend what technical capabilities a site needs
to set up an ETL because it is heavily dependent on the amount of data
they have and how they plan to use it. Here are some examples of options
that have worked well for small to medium organizations and large
organizations:</p>
<p><strong>Small-to-Medium Organization</strong></p>
<ul>
<li>CDM size is 100MB to several GBs</li>
<li>Vocab ~20GB</li>
<li>Results &lt; 500 MB</li>
<li>Recommend
<ul>
<li>Server class machine disk &gt;= 250GB (SSD preferred), &gt;= 4
cores, &gt;= 32GB RAM</li>
</ul></li>
</ul>
<p><strong>Large Organization</strong></p>
<ul>
<li>CDM size is 12GB to several TBs</li>
<li>Vocab ~20GB</li>
<li>Results &lt; 500 MB</li>
<li>Recommend
<ul>
<li>Cloud-based infrastructure like multiple AWS Redshift clusters, for
example:</li>
<li><img src="images/AWS_clusters.png" /></li>
</ul></li>
</ul>
</div>
</div>
</div>
</div>
<script>
// add bootstrap table styles to pandoc tables
function bootstrapStylePandocTables() {
$('tr.odd').parent('tbody').parent('table').addClass('table table-condensed');
}
$(document).ready(function () {
bootstrapStylePandocTables();
});
</script>
<!-- tabsets -->
<script>
$(document).ready(function () {
window.buildTabsets("TOC");
});
$(document).ready(function () {
$('.tabset-dropdown > .nav-tabs > li').click(function () {
$(this).parent().toggleClass('nav-tabs-open');
});
});
</script>
<!-- code folding -->
<script>
$(document).ready(function () {
// temporarily add toc-ignore selector to headers for the consistency with Pandoc
$('.unlisted.unnumbered').addClass('toc-ignore')
// move toc-ignore selectors from section div to header
$('div.section.toc-ignore')
.removeClass('toc-ignore')
.children('h1,h2,h3,h4,h5').addClass('toc-ignore');
// establish options
var options = {
selectors: "h1,h2,h3,h4,h5",
theme: "bootstrap3",
context: '.toc-content',
hashGenerator: function (text) {
return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_');
},
ignoreSelector: ".toc-ignore",
scrollTo: 0
};
options.showAndHide = true;
options.smoothScroll = true;
// tocify
var toc = $("#TOC").tocify(options).data("toc-tocify");
});
</script>
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
</body>
</html>