OMOP/docs/sqlScripts.html

1077 lines
37 KiB
HTML
Raw Permalink Normal View History

2020-10-06 15:11:13 +00:00
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
2023-03-30 14:08:24 +00:00
<title>SQL Scripts</title>
2020-10-06 15:11:13 +00:00
2023-03-30 14:08:24 +00:00
<script src="site_libs/header-attrs-2.21/header-attrs.js"></script>
<script src="site_libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
2020-10-06 15:11:13 +00:00
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="site_libs/bootstrap-3.3.5/css/cosmo.min.css" rel="stylesheet" />
<script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script>
<style>h1 {font-size: 34px;}
h1.title {font-size: 38px;}
h2 {font-size: 30px;}
h3 {font-size: 24px;}
h4 {font-size: 18px;}
h5 {font-size: 16px;}
h6 {font-size: 12px;}
code {color: inherit; background-color: rgba(0, 0, 0, 0.04);}
pre:not([class]) { background-color: white }</style>
2020-10-06 15:11:13 +00:00
<script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script>
<link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" />
<script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script>
<script src="site_libs/navigation-1.1/tabsets.js"></script>
<link href="site_libs/highlightjs-9.12.0/default.css" rel="stylesheet" />
<script src="site_libs/highlightjs-9.12.0/highlight.js"></script>
2023-03-30 14:08:24 +00:00
<link href="site_libs/font-awesome-6.2.1/css/all.min.css" rel="stylesheet" />
<link href="site_libs/font-awesome-6.2.1/css/v4-shims.min.css" rel="stylesheet" />
2020-10-06 15:11:13 +00:00
<link rel='shortcut icon' type='image/x-icon' href='favicon.ico' />
2021-01-06 15:52:57 +00:00
<style type="text/css">
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
</style>
2020-10-06 15:11:13 +00:00
<style type="text/css">code{white-space: pre;}</style>
<script type="text/javascript">
if (window.hljs) {
hljs.configure({languages: []});
hljs.initHighlightingOnLoad();
if (document.readyState && document.readyState === "complete") {
window.setTimeout(function() { hljs.initHighlighting(); }, 0);
}
}
</script>
2021-01-06 15:52:57 +00:00
2020-10-06 15:11:13 +00:00
<link rel="stylesheet" href="style.css" type="text/css" />
<style type = "text/css">
.main-container {
max-width: 940px;
margin-left: auto;
margin-right: auto;
}
img {
max-width:100%;
}
.tabbed-pane {
padding-top: 12px;
}
.html-widget {
margin-bottom: 20px;
}
button.code-folding-btn:focus {
outline: none;
}
summary {
display: list-item;
}
details > summary > p:only-child {
display: inline;
}
pre code {
padding: 0;
}
2020-10-06 15:11:13 +00:00
</style>
<style type="text/css">
.dropdown-submenu {
position: relative;
}
.dropdown-submenu>.dropdown-menu {
top: 0;
left: 100%;
margin-top: -6px;
margin-left: -1px;
border-radius: 0 6px 6px 6px;
}
.dropdown-submenu:hover>.dropdown-menu {
display: block;
}
.dropdown-submenu>a:after {
display: block;
content: " ";
float: right;
width: 0;
height: 0;
border-color: transparent;
border-style: solid;
border-width: 5px 0 5px 5px;
border-left-color: #cccccc;
margin-top: 5px;
margin-right: -10px;
}
.dropdown-submenu:hover>a:after {
border-left-color: #adb5bd;
2020-10-06 15:11:13 +00:00
}
.dropdown-submenu.pull-left {
float: none;
}
.dropdown-submenu.pull-left>.dropdown-menu {
left: -100%;
margin-left: 10px;
border-radius: 6px 0 6px 6px;
}
</style>
<script type="text/javascript">
2020-10-06 15:11:13 +00:00
// manage active state of menu based on current page
$(document).ready(function () {
// active menu anchor
href = window.location.pathname
href = href.substr(href.lastIndexOf('/') + 1)
if (href === "")
href = "index.html";
var menuAnchor = $('a[href="' + href + '"]');
// mark the anchor link active (and if it's in a dropdown, also mark that active)
var dropdown = menuAnchor.closest('li.dropdown');
if (window.bootstrap) { // Bootstrap 4+
menuAnchor.addClass('active');
dropdown.find('> .dropdown-toggle').addClass('active');
} else { // Bootstrap 3
menuAnchor.parent().addClass('active');
dropdown.addClass('active');
}
// Navbar adjustments
var navHeight = $(".navbar").first().height() + 15;
var style = document.createElement('style');
var pt = "padding-top: " + navHeight + "px; ";
var mt = "margin-top: -" + navHeight + "px; ";
var css = "";
// offset scroll position for anchor links (for fixed navbar)
for (var i = 1; i <= 6; i++) {
css += ".section h" + i + "{ " + pt + mt + "}\n";
}
style.innerHTML = "body {" + pt + "padding-bottom: 40px; }\n" + css;
document.head.appendChild(style);
2020-10-06 15:11:13 +00:00
});
</script>
<!-- tabsets -->
<style type="text/css">
.tabset-dropdown > .nav-tabs {
display: inline-table;
max-height: 500px;
min-height: 44px;
overflow-y: auto;
border: 1px solid #ddd;
border-radius: 4px;
}
2023-03-30 14:08:24 +00:00
.tabset-dropdown > .nav-tabs > li.active:before, .tabset-dropdown > .nav-tabs.nav-tabs-open:before {
content: "\e259";
2020-10-06 15:11:13 +00:00
font-family: 'Glyphicons Halflings';
display: inline-block;
padding: 10px;
border-right: 1px solid #ddd;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open > li.active:before {
2023-03-30 14:08:24 +00:00
content: "\e258";
2020-10-06 15:11:13 +00:00
font-family: 'Glyphicons Halflings';
2023-03-30 14:08:24 +00:00
border: none;
2020-10-06 15:11:13 +00:00
}
.tabset-dropdown > .nav-tabs > li.active {
display: block;
}
.tabset-dropdown > .nav-tabs > li > a,
.tabset-dropdown > .nav-tabs > li > a:focus,
.tabset-dropdown > .nav-tabs > li > a:hover {
border: none;
display: inline-block;
border-radius: 4px;
background-color: transparent;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open > li {
display: block;
float: none;
}
.tabset-dropdown > .nav-tabs > li {
display: none;
}
</style>
<!-- code folding -->
<style type="text/css">
#TOC {
margin: 25px 0px 20px 0px;
}
@media (max-width: 768px) {
#TOC {
position: relative;
width: 100%;
}
}
@media print {
.toc-content {
/* see https://github.com/w3c/csswg-drafts/issues/4434 */
float: right;
}
}
.toc-content {
padding-left: 30px;
padding-right: 40px;
}
div.main-container {
max-width: 1200px;
}
div.tocify {
width: 20%;
max-width: 260px;
max-height: 85%;
}
@media (min-width: 768px) and (max-width: 991px) {
div.tocify {
width: 25%;
}
}
@media (max-width: 767px) {
div.tocify {
width: 100%;
max-width: none;
}
}
.tocify ul, .tocify li {
line-height: 20px;
}
.tocify-subheader .tocify-item {
font-size: 0.90em;
}
.tocify .list-group-item {
border-radius: 0px;
}
</style>
</head>
<body>
<div class="container-fluid main-container">
<!-- setup 3col/9col grid for toc_float and main content -->
<div class="row">
2020-10-06 15:11:13 +00:00
<div class="col-xs-12 col-sm-4 col-md-3">
<div id="TOC" class="tocify">
</div>
</div>
<div class="toc-content col-xs-12 col-sm-8 col-md-9">
<div class="navbar navbar-default navbar-fixed-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-bs-toggle="collapse" data-target="#navbar" data-bs-target="#navbar">
2020-10-06 15:11:13 +00:00
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="index.html"><div><img src="ohdsi16x16.png"></img> OMOP Common Data Model </div></a>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<li>
<a href="index.html">
<span class="fa fa-house"></span>
2020-10-06 15:11:13 +00:00
</a>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<span class="fa fa-landmark"></span>
2020-10-06 15:11:13 +00:00
Background
<span class="caret"></span>
2020-10-06 15:11:13 +00:00
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="background.html">Model Background</a>
</li>
<li>
<a href="cdmRefreshProcess.html">CDM Refresh Process</a>
</li>
<li>
<a href="vocabulary.html">How the Vocabulary is Built</a>
</li>
</ul>
2020-10-06 15:11:13 +00:00
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<span class="fa fa-list-alt"></span>
2020-10-06 15:11:13 +00:00
Conventions
<span class="caret"></span>
2020-10-06 15:11:13 +00:00
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="dataModelConventions.html">General Conventions</a>
</li>
<li>
<a href="ehrObsPeriods.html">Observation Periods for EHR Data</a>
</li>
<li>
<a href="cdmPrivacy.html">Patient Privacy and OMOP</a>
</li>
</ul>
2020-10-06 15:11:13 +00:00
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<span class="fa fa-history"></span>
2020-10-06 15:11:13 +00:00
CDM Versions
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="cdm30.html">CDM v3.0</a>
</li>
2020-10-06 15:11:13 +00:00
<li>
<a href="cdm60.html">CDM v6.0</a>
2020-10-06 15:11:13 +00:00
</li>
<li>
<a href="cdm53.html">CDM v5.3</a>
</li>
<li class="dropdown-submenu">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">NEW CDM v5.4</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="cdm54.html">CDM v5.4</a>
</li>
<li>
<a href="cdm54Changes.html">Changes from CDM v5.3</a>
</li>
2023-03-30 14:08:24 +00:00
<li>
<a href="cdm54erd.html">Entity Relationships</a>
</li>
</ul>
2020-10-06 15:11:13 +00:00
</li>
</ul>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<span class="fa fa-plus-square"></span>
2020-10-06 15:11:13 +00:00
2022-07-27 01:48:28 +00:00
CDM Proposals
2020-10-06 15:11:13 +00:00
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
2022-07-27 01:48:28 +00:00
<li>
<a href="cdmRequestProcess.html">How to Propose Changes to the CDM</a>
</li>
2020-10-06 15:11:13 +00:00
<li>
<a href="https://github.com/OHDSI/CommonDataModel/issues?q=is%3Aopen+is%3Aissue+label%3AProposal">Under Review</a>
2020-10-06 15:11:13 +00:00
</li>
<li class="dropdown-submenu">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">Accepted</a>
2020-10-06 15:11:13 +00:00
<ul class="dropdown-menu" role="menu">
<li>
<a href="https://github.com/OHDSI/CommonDataModel/issues/252">Region_concept_id</a>
</li>
</ul>
</li>
</ul>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<span class="fa fa-question"></span>
2020-10-06 15:11:13 +00:00
How to
2020-10-06 15:11:13 +00:00
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="download.html">Download the DDL</a>
2020-10-06 15:11:13 +00:00
</li>
<li>
<a href="cdmRPackage.html">Use the CDM R Package</a>
</li>
<li>
<a href="drug_dose.html">Calculate Drug Dose</a>
2020-10-06 15:11:13 +00:00
</li>
</ul>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<span class="fa fa-life-ring"></span>
2020-10-06 15:11:13 +00:00
Support
2020-10-06 15:11:13 +00:00
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
2022-07-27 01:48:28 +00:00
<li>
<a href="cdmDecisionTree.html">Help! My Data Doesn't Fit!</a>
</li>
2020-10-06 15:11:13 +00:00
<li>
<a href="faq.html">FAQ</a>
2020-10-06 15:11:13 +00:00
</li>
<li>
<a href="sqlScripts.html">SQL Scripts</a>
2020-10-06 15:11:13 +00:00
</li>
<li>
<a href="contribute.html">Ask a Question</a>
2020-10-06 15:11:13 +00:00
</li>
</ul>
</li>
</ul>
<ul class="nav navbar-nav navbar-right">
<li>
<a href="https://github.com/OHDSI/CommonDataModel">
<span class="fa fa-github"></span>
2020-10-06 15:11:13 +00:00
</a>
</li>
</ul>
</div><!--/.nav-collapse -->
</div><!--/.container -->
</div><!--/.navbar -->
<div id="header">
2020-10-06 15:11:13 +00:00
</div>
<div id="sql-scripts" class="section level1">
<h1><strong>SQL Scripts</strong></h1>
<p>These SQL scripts have been developed by the community to facilitate
the Extract, Transform, and Load (ETL) process from source data to the
OMOP CDM.</p>
2020-10-06 15:11:13 +00:00
<div id="vocabulary-mapping" class="section level2">
<h2><strong>Vocabulary Mapping</strong></h2>
<div id="source-to-standard" class="section level3">
<h3>Source to Standard</h3>
<p>This script will create a large table containing all source codes and
the Standard Concepts they map to. By creating this table ahead of time
and referencing it during ETL, you can save processing time than if you
were to map source codes on the fly. It is important to note that this
script will show ALL mappings, not just those to Standard, Valid
Concepts. Be sure to add a filter that will only grab the correct
Standard, Valid codes.</p>
2020-10-06 15:11:13 +00:00
<pre class="sql"><code>SELECT c.concept_code AS SOURCE_CODE, c.concept_id AS SOURCE_CONCEPT_ID, c.concept_name AS SOURCE_CODE_DESCRIPTION, c.vocabulary_id AS SOURCE_VOCABULARY_ID, c.domain_id AS SOURCE_DOMAIN_ID, c.CONCEPT_CLASS_ID AS SOURCE_CONCEPT_CLASS_ID, c.VALID_START_DATE AS SOURCE_VALID_START_DATE, c.VALID_END_DATE AS SOURCE_VALID_END_DATE, c.INVALID_REASON AS SOURCE_INVALID_REASON, c1.concept_id AS TARGET_CONCEPT_ID, c1.concept_name AS TARGET_CONCEPT_NAME, c1.VOCABULARY_ID AS TARGET_VOCABUALRY_ID, c1.domain_id AS TARGET_DOMAIN_ID, c1.concept_class_id AS TARGET_CONCEPT_CLASS_ID, c1.INVALID_REASON AS TARGET_INVALID_REASON, c1.standard_concept AS TARGET_STANDARD_CONCEPT
FROM CONCEPT C
JOIN CONCEPT_RELATIONSHIP CR
ON C.CONCEPT_ID = CR.CONCEPT_ID_1
AND CR.invalid_reason IS NULL
2020-12-10 17:43:21 +00:00
AND lower(cr.relationship_id) = &#39;maps to&#39;
2020-10-06 15:11:13 +00:00
JOIN CONCEPT C1
ON CR.CONCEPT_ID_2 = C1.CONCEPT_ID
AND C1.INVALID_REASON IS NULL
UNION
SELECT source_code, SOURCE_CONCEPT_ID, SOURCE_CODE_DESCRIPTION, source_vocabulary_id, c1.domain_id AS SOURCE_DOMAIN_ID, c2.CONCEPT_CLASS_ID AS SOURCE_CONCEPT_CLASS_ID, c1.VALID_START_DATE AS SOURCE_VALID_START_DATE, c1.VALID_END_DATE AS SOURCE_VALID_END_DATE, stcm.INVALID_REASON AS SOURCE_INVALID_REASON,target_concept_id, c2.CONCEPT_NAME AS TARGET_CONCEPT_NAME, target_vocabulary_id, c2.domain_id AS TARGET_DOMAIN_ID, c2.concept_class_id AS TARGET_CONCEPT_CLASS_ID, c2.INVALID_REASON AS TARGET_INVALID_REASON, c2.standard_concept AS TARGET_STANDARD_CONCEPT
FROM source_to_concept_map stcm
LEFT OUTER JOIN CONCEPT c1
ON c1.concept_id = stcm.source_concept_id
LEFT OUTER JOIN CONCEPT c2
ON c2.CONCEPT_ID = stcm.target_concept_id
WHERE stcm.INVALID_REASON IS NULL
</code></pre>
</div>
<div id="source-to-source" class="section level3">
<h3>Source to Source</h3>
<p>This script will create a large table containing all source codes and
their referent Concepts. While you may be tempted to use the Source to
Standard query to find these, it is important to use this one instead to
populate *_SOURCE_CONCEPT_ID fields. Some source codes map to multiple
Standard Concepts though they have only one source Concept. By using the
Source to Standard query you may inadvertently create duplicate records
in your CDM instance.</p>
2020-10-06 15:11:13 +00:00
<pre class="sql"><code>SELECT c.concept_code AS SOURCE_CODE, c.concept_id AS SOURCE_CONCEPT_ID, c.CONCEPT_NAME AS SOURCE_CODE_DESCRIPTION, c.vocabulary_id AS SOURCE_VOCABULARY_ID, c.domain_id AS SOURCE_DOMAIN_ID, c.concept_class_id AS SOURCE_CONCEPT_CLASS_ID, c.VALID_START_DATE AS SOURCE_VALID_START_DATE, c.VALID_END_DATE AS SOURCE_VALID_END_DATE, c.invalid_reason AS SOURCE_INVALID_REASON, c.concept_ID as TARGET_CONCEPT_ID, c.concept_name AS TARGET_CONCEPT_NAME, c.vocabulary_id AS TARGET_VOCABULARY_ID, c.domain_id AS TARGET_DOMAIN_ID, c.concept_class_id AS TARGET_CONCEPT_CLASS_ID, c.INVALID_REASON AS TARGET_INVALID_REASON, c.STANDARD_CONCEPT AS TARGET_STANDARD_CONCEPT
FROM CONCEPT c
UNION
SELECT source_code, SOURCE_CONCEPT_ID, SOURCE_CODE_DESCRIPTION, source_vocabulary_id, c1.domain_id AS SOURCE_DOMAIN_ID, c2.CONCEPT_CLASS_ID AS SOURCE_CONCEPT_CLASS_ID, c1.VALID_START_DATE AS SOURCE_VALID_START_DATE, c1.VALID_END_DATE AS SOURCE_VALID_END_DATE,stcm.INVALID_REASON AS SOURCE_INVALID_REASON, target_concept_id, c2.CONCEPT_NAME AS TARGET_CONCEPT_NAME, target_vocabulary_id, c2.domain_id AS TARGET_DOMAIN_ID, c2.concept_class_id AS TARGET_CONCEPT_CLASS_ID, c2.INVALID_REASON AS TARGET_INVALID_REASON, c2.standard_concept AS TARGET_STANDARD_CONCEPT
FROM source_to_concept_map stcm
LEFT OUTER JOIN CONCEPT c1
ON c1.concept_id = stcm.source_concept_id
LEFT OUTER JOIN CONCEPT c2
ON c2.CONCEPT_ID = stcm.target_concept_id
WHERE stcm.INVALID_REASON IS NULL</code></pre>
</div>
</div>
<div id="era-tables" class="section level2">
<h2><strong>Era Tables</strong></h2>
<div id="condition-eras" class="section level3">
<h3>Condition Eras</h3>
<p>This script will insert values into the CONDITION_ERA table given
that the CONDITION_OCCURRENCE table is populated. It will string
together condition records that have &lt;= 30 days between them into
eras during which the Person is assumed to have the given condition.
<strong>NOTE</strong> This query only works with 5.3 and below.</p>
2020-10-06 15:11:13 +00:00
<pre class="sql"><code>/****************************************************
OHDSI-SQL File Instructions
-----------------------------
1. Set parameter name of schema that contains CDMv4 instance
(@SOURCE_CDMV4, @SOURCE_CDMV4_SCHEMA)
2. Set parameter name of schema that contains CDMv5 instance
(@TARGET_CDMV5, @TARGET_CDMV5_SCHEMA)
3. Run this script through SqlRender to produce a script that will work in your
source dialect. SqlRender can be found here: https://github.com/OHDSI/SqlRender
4. Run the script produced by SQL Render on your target RDBDMS.
&lt;RDBMS&gt; File Instructions
-------------------------
1. This script will hold a number of placeholders for your CDM V4 and CDMV5
database/schema. In order to make this file work in your environment, you
should plan to do a global &quot;FIND AND REPLACE&quot; on this file to fill in the
file with values that pertain to your environment. The following are the
tokens you should use when doing your &quot;FIND AND REPLACE&quot; operation:
[CDM]
[CDM].[CDMSCHEMA]
*********************************************************************************/
/* SCRIPT PARAMETERS */
{DEFAULT @TARGET_CDMV5 = &#39;[CDM]&#39; } -- The target CDMv5 database name
{DEFAULT @TARGET_CDMV5_SCHEMA = &#39;[CDM].[CDMSCHEMA]&#39; } -- the target CDMv5 database plus schema
USE @TARGET_CDMV5;
/****
CONDITION ERA
Note: Eras derived from CONDITION_OCCURRENCE table, using 30d gap
****/
IF OBJECT_ID(&#39;tempdb..#condition_era_phase_1&#39;, &#39;U&#39;) IS NOT NULL
DROP TABLE #condition_era_phase_1;
/* / */
IF OBJECT_ID(&#39;tempdb..#cteConditionTarget&#39;, &#39;U&#39;) IS NOT NULL
DROP TABLE #cteConditionTarget;
/* / */
-- create base eras from the concepts found in condition_occurrence
SELECT co.PERSON_ID
,co.condition_concept_id
,co.CONDITION_START_DATE
,COALESCE(co.CONDITION_END_DATE, DATEADD(day, 1, CONDITION_START_DATE)) AS CONDITION_END_DATE
INTO #cteConditionTarget
FROM @TARGET_CDMV5_SCHEMA.CONDITION_OCCURRENCE co;
/* / */
IF OBJECT_ID(&#39;tempdb..#cteCondEndDates&#39;, &#39;U&#39;) IS NOT NULL
DROP TABLE #cteCondEndDates;
/* / */
SELECT PERSON_ID
,CONDITION_CONCEPT_ID
,DATEADD(day, - 30, EVENT_DATE) AS END_DATE -- unpad the end date
INTO #cteCondEndDates
FROM (
SELECT E1.PERSON_ID
,E1.CONDITION_CONCEPT_ID
,E1.EVENT_DATE
,COALESCE(E1.START_ORDINAL, MAX(E2.START_ORDINAL)) START_ORDINAL
,E1.OVERALL_ORD
FROM (
SELECT PERSON_ID
,CONDITION_CONCEPT_ID
,EVENT_DATE
,EVENT_TYPE
,START_ORDINAL
,ROW_NUMBER() OVER (
PARTITION BY PERSON_ID
,CONDITION_CONCEPT_ID ORDER BY EVENT_DATE
,EVENT_TYPE
) AS OVERALL_ORD -- this re-numbers the inner UNION so all rows are numbered ordered by the event date
FROM (
-- select the start dates, assigning a row number to each
SELECT PERSON_ID
,CONDITION_CONCEPT_ID
,CONDITION_START_DATE AS EVENT_DATE
,- 1 AS EVENT_TYPE
,ROW_NUMBER() OVER (
PARTITION BY PERSON_ID
,CONDITION_CONCEPT_ID ORDER BY CONDITION_START_DATE
) AS START_ORDINAL
FROM #cteConditionTarget
UNION ALL
-- pad the end dates by 30 to allow a grace period for overlapping ranges.
SELECT PERSON_ID
,CONDITION_CONCEPT_ID
,DATEADD(day, 30, CONDITION_END_DATE)
,1 AS EVENT_TYPE
,NULL
FROM #cteConditionTarget
) RAWDATA
) E1
INNER JOIN (
SELECT PERSON_ID
,CONDITION_CONCEPT_ID
,CONDITION_START_DATE AS EVENT_DATE
,ROW_NUMBER() OVER (
PARTITION BY PERSON_ID
,CONDITION_CONCEPT_ID ORDER BY CONDITION_START_DATE
) AS START_ORDINAL
FROM #cteConditionTarget
) E2 ON E1.PERSON_ID = E2.PERSON_ID
AND E1.CONDITION_CONCEPT_ID = E2.CONDITION_CONCEPT_ID
AND E2.EVENT_DATE &lt;= E1.EVENT_DATE
GROUP BY E1.PERSON_ID
,E1.CONDITION_CONCEPT_ID
,E1.EVENT_DATE
,E1.START_ORDINAL
,E1.OVERALL_ORD
) E
WHERE (2 * E.START_ORDINAL) - E.OVERALL_ORD = 0;
/* / */
IF OBJECT_ID(&#39;tempdb..#cteConditionEnds&#39;, &#39;U&#39;) IS NOT NULL
DROP TABLE #cteConditionEnds;
/* / */
SELECT c.PERSON_ID
,c.CONDITION_CONCEPT_ID
,c.CONDITION_START_DATE
,MIN(e.END_DATE) AS ERA_END_DATE
INTO #cteConditionEnds
FROM #cteConditionTarget c
INNER JOIN #cteCondEndDates e ON c.PERSON_ID = e.PERSON_ID
AND c.CONDITION_CONCEPT_ID = e.CONDITION_CONCEPT_ID
AND e.END_DATE &gt;= c.CONDITION_START_DATE
GROUP BY c.PERSON_ID
,c.CONDITION_CONCEPT_ID
,c.CONDITION_START_DATE;
/* / */
INSERT INTO @TARGET_CDMV5_SCHEMA.condition_era (
condition_era_id
,person_id
,condition_concept_id
,condition_era_start_date
,condition_era_end_date
,condition_occurrence_count
)
SELECT row_number() OVER (
ORDER BY person_id
) AS condition_era_id
,person_id
,CONDITION_CONCEPT_ID
,min(CONDITION_START_DATE) AS CONDITION_ERA_START_DATE
,ERA_END_DATE AS CONDITION_ERA_END_DATE
,COUNT(*) AS CONDITION_OCCURRENCE_COUNT
FROM #cteConditionEnds
GROUP BY person_id
,CONDITION_CONCEPT_ID
,ERA_END_DATE;</code></pre>
</div>
<div id="drug-eras" class="section level3">
<h3>Drug Eras</h3>
<p>If the DRUG_EXPOSURE table is populated this script will string
together periods of time that a person is exposed to an active drug
ingredient, allowing for 30 gaps in between. It will then insert the
resulting era records into the DRUG_ERA table. <strong>NOTE</strong>
This query only works with the <strong>version 5</strong> series and
below.</p>
2020-10-06 15:11:13 +00:00
<pre class="sql"><code>
-- Code taken from:
-- https://github.com/OHDSI/ETL-CMS/blob/master/SQL/create_CDMv5_drug_era_non_stockpile.sql
if object_id(&#39;tempdb..#tmp_de&#39;, &#39;U&#39;) is not null drop table #tmp_de;
WITH
ctePreDrugTarget(drug_exposure_id, person_id, ingredient_concept_id, drug_exposure_start_date, days_supply, drug_exposure_end_date) AS
(-- Normalize DRUG_EXPOSURE_END_DATE to either the existing drug exposure end date, or add days supply, or add 1 day to the start date
SELECT
d.drug_exposure_id
, d.person_id
, c.concept_id AS ingredient_concept_id
, d.drug_exposure_start_date AS drug_exposure_start_date
, d.days_supply AS days_supply
, COALESCE(
---NULLIF returns NULL if both values are the same, otherwise it returns the first parameter
NULLIF(drug_exposure_end_date, NULL),
---If drug_exposure_end_date != NULL, return drug_exposure_end_date, otherwise go to next case
NULLIF(dateadd(day,days_supply,drug_exposure_start_date), drug_exposure_start_date),
---If days_supply != NULL or 0, return drug_exposure_start_date + days_supply, otherwise go to next case
dateadd(day,1,drug_exposure_start_date)
---Add 1 day to the drug_exposure_start_date since there is no end_date or INTERVAL for the days_supply
) AS drug_exposure_end_date
FROM @cdm_schema.drug_exposure d
JOIN @cdm_schema.concept_ancestor ca ON ca.descendant_concept_id = d.drug_concept_id
JOIN @cdm_schema.concept c ON ca.ancestor_concept_id = c.concept_id
WHERE c.vocabulary_id = &#39;RxNorm&#39; ---8 selects RxNorm from the vocabulary_id
AND c.concept_class_id = &#39;Ingredient&#39;
AND d.drug_concept_id != 0 ---Our unmapped drug_concept_id&#39;s are set to 0, so we don&#39;t want different drugs wrapped up in the same era
AND coalesce(d.days_supply,0) &gt;= 0 ---We have cases where days_supply is negative, and this can set the end_date before the start_date, which we don&#39;t want. So we&#39;re just looking over those rows. This is a data-quality issue.
)
, cteSubExposureEndDates (person_id, ingredient_concept_id, end_date) AS --- A preliminary sorting that groups all of the overlapping exposures into one exposure so that we don&#39;t double-count non-gap-days
(
SELECT person_id, ingredient_concept_id, event_date AS end_date
FROM
(
SELECT person_id, ingredient_concept_id, event_date, event_type,
MAX(start_ordinal) OVER (PARTITION BY person_id, ingredient_concept_id
ORDER BY event_date, event_type ROWS unbounded preceding) AS start_ordinal,
-- this pulls the current START down from the prior rows so that the NULLs
-- from the END DATES will contain a value we can compare with
ROW_NUMBER() OVER (PARTITION BY person_id, ingredient_concept_id
ORDER BY event_date, event_type) AS overall_ord
-- this re-numbers the inner UNION so all rows are numbered ordered by the event date
2020-10-06 15:11:13 +00:00
FROM (
-- select the start dates, assigning a row number to each
SELECT person_id, ingredient_concept_id, drug_exposure_start_date AS event_date,
-1 AS event_type,
ROW_NUMBER() OVER (PARTITION BY person_id, ingredient_concept_id
ORDER BY drug_exposure_start_date) AS start_ordinal
FROM ctePreDrugTarget
2020-10-06 15:11:13 +00:00
UNION ALL
SELECT person_id, ingredient_concept_id, drug_exposure_end_date, 1 AS event_type, NULL
FROM ctePreDrugTarget
) RAWDATA
) e
WHERE (2 * e.start_ordinal) - e.overall_ord = 0
)
, cteDrugExposureEnds (person_id, drug_concept_id, drug_exposure_start_date, drug_sub_exposure_end_date) AS
(
SELECT
dt.person_id
, dt.ingredient_concept_id
, dt.drug_exposure_start_date
, MIN(e.end_date) AS drug_sub_exposure_end_date
FROM ctePreDrugTarget dt
JOIN cteSubExposureEndDates e ON dt.person_id = e.person_id AND dt.ingredient_concept_id = e.ingredient_concept_id AND e.end_date &gt;= dt.drug_exposure_start_date
GROUP BY
dt.drug_exposure_id
, dt.person_id
, dt.ingredient_concept_id
, dt.drug_exposure_start_date
)
--------------------------------------------------------------------------------------------------------------
, cteSubExposures(row_number, person_id, drug_concept_id, drug_sub_exposure_start_date, drug_sub_exposure_end_date, drug_exposure_count) AS
(
SELECT ROW_NUMBER() OVER (PARTITION BY person_id, drug_concept_id, drug_sub_exposure_end_date ORDER BY person_id)
, person_id, drug_concept_id, MIN(drug_exposure_start_date) AS drug_sub_exposure_start_date, drug_sub_exposure_end_date, COUNT(*) AS drug_exposure_count
FROM cteDrugExposureEnds
GROUP BY person_id, drug_concept_id, drug_sub_exposure_end_date
--ORDER BY person_id, drug_concept_id
)
--------------------------------------------------------------------------------------------------------------
/*Everything above grouped exposures into sub_exposures if there was overlap between exposures.
*So there was no persistence window. Now we can add the persistence window to calculate eras.
*/
--------------------------------------------------------------------------------------------------------------
, cteFinalTarget(row_number, person_id, ingredient_concept_id, drug_sub_exposure_start_date, drug_sub_exposure_end_date, drug_exposure_count, days_exposed) AS
(
SELECT row_number, person_id, drug_concept_id, drug_sub_exposure_start_date, drug_sub_exposure_end_date, drug_exposure_count
, datediff(day,drug_sub_exposure_start_date,drug_sub_exposure_end_date) AS days_exposed
FROM cteSubExposures
)
--------------------------------------------------------------------------------------------------------------
, cteEndDates (person_id, ingredient_concept_id, end_date) AS -- the magic
(
SELECT person_id, ingredient_concept_id, dateadd(day,-30,event_date) AS end_date -- unpad the end date
FROM
(
SELECT person_id, ingredient_concept_id, event_date, event_type,
MAX(start_ordinal) OVER (PARTITION BY person_id, ingredient_concept_id
ORDER BY event_date, event_type ROWS UNBOUNDED PRECEDING) AS start_ordinal,
-- this pulls the current START down from the prior rows so that the NULLs
-- from the END DATES will contain a value we can compare with
ROW_NUMBER() OVER (PARTITION BY person_id, ingredient_concept_id
ORDER BY event_date, event_type) AS overall_ord
-- this re-numbers the inner UNION so all rows are numbered ordered by the event date
FROM (
-- select the start dates, assigning a row number to each
SELECT person_id, ingredient_concept_id, drug_sub_exposure_start_date AS event_date,
-1 AS event_type,
ROW_NUMBER() OVER (PARTITION BY person_id, ingredient_concept_id
ORDER BY drug_sub_exposure_start_date) AS start_ordinal
FROM cteFinalTarget
2020-10-06 15:11:13 +00:00
UNION ALL
2020-10-06 15:11:13 +00:00
-- pad the end dates by 30 to allow a grace period for overlapping ranges.
SELECT person_id, ingredient_concept_id, dateadd(day,30,drug_sub_exposure_end_date), 1 AS event_type, NULL
FROM cteFinalTarget
) RAWDATA
) e
WHERE (2 * e.start_ordinal) - e.overall_ord = 0
)
, cteDrugEraEnds (person_id, drug_concept_id, drug_sub_exposure_start_date, drug_era_end_date, drug_exposure_count, days_exposed) AS
(
SELECT
ft.person_id
, ft.ingredient_concept_id
, ft.drug_sub_exposure_start_date
, MIN(e.end_date) AS era_end_date
, drug_exposure_count
, days_exposed
FROM cteFinalTarget ft
JOIN cteEndDates e ON ft.person_id = e.person_id AND ft.ingredient_concept_id = e.ingredient_concept_id AND e.end_date &gt;= ft.drug_sub_exposure_start_date
GROUP BY
ft.person_id
, ft.ingredient_concept_id
, ft.drug_sub_exposure_start_date
, drug_exposure_count
, days_exposed
)
SELECT
row_number()over(order by person_id) drug_era_id
, person_id
, drug_concept_id
, MIN(drug_sub_exposure_start_date) AS drug_era_start_date
, drug_era_end_date
, SUM(drug_exposure_count) AS drug_exposure_count
, datediff(day,MIN(drug_sub_exposure_start_date),drug_era_end_date)-SUM(days_exposed) as gap_days
INTO #tmp_de
FROM cteDrugEraEnds dee
GROUP BY person_id, drug_concept_id, drug_era_end_date;
INSERT INTO @cdm_schema.drug_era(drug_era_id,person_id, drug_concept_id, drug_era_start_date, drug_era_end_date, drug_exposure_count, gap_days)
SELECT * FROM #tmp_de;</code></pre>
2020-10-06 15:11:13 +00:00
</div>
</div>
<div id="example-etl-script" class="section level2">
<h2><strong>Example ETL Script</strong></h2>
<div id="cdm_source-table" class="section level3">
<h3>CDM_SOURCE Table</h3>
<p>The script below is an example for how to fill in the CDM_SOURCE
table. This table is required for the <a
href="https://github.com/OHDSI/DataQualityDashboard">Data Quality
2023-03-30 14:08:24 +00:00
Dashboard</a> package to run.</p>
<div id="v5.3" class="section level4">
<h4>v5.3</h4>
<p>This example was taken from the <a
href="https://github.com/OHDSI/ETL-Synthea">ETL-Synthea</a> package, a
good example on how to write a complete extract-transform-load script
from start to finish, including vocabulary import.</p>
2023-03-30 14:08:24 +00:00
<pre class="sql"><code>INSERT INTO @cdm_schema.cdm_source (
cdm_source_name,
cdm_source_abbreviation,
cdm_holder,
source_description,
source_documentation_reference,
cdm_etl_reference,
source_release_date,
cdm_release_date,
cdm_version,
vocabulary_version
)
2023-03-30 14:08:24 +00:00
SELECT
&#39;Synthea synthetic health database&#39;,
&#39;Synthea&#39;,
&#39;OHDSI Community&#39;,
&#39;SyntheaTM is a Synthetic Patient Population Simulator. The goal is to output synthetic, realistic (but not real), patient data and associated health records in a variety of formats.&#39;,
&#39;https://synthetichealth.github.io/synthea/&#39;,
&#39;https://github.com/OHDSI/ETL-Synthea&#39;,
getdate(), -- NB: Set this value to the day the source data was pulled
getdate(),
&#39;v5.3&#39;,
vocabulary_version
FROM @cdm_schema.vocabulary
WHERE vocabulary_id = &#39;None&#39;;</code></pre>
</div>
<div id="v5.4" class="section level4">
<h4>v5.4</h4>
<pre class="sql"><code>INSERT INTO @cdm_schema.cdm_source (
cdm_source_name,
cdm_source_abbreviation,
cdm_holder,
source_description,
source_documentation_reference,
cdm_etl_reference,
source_release_date,
cdm_release_date,
cdm_version,
vocabulary_version,
vocabulary_version_concept_id
)
SELECT
&#39;&lt;your_cdm_source_name&gt;&#39;,
&#39;&lt;your_cdm_source_abbreviation&gt;&#39;,
&#39;&lt;your_cdm_holder&gt;&#39;,
&#39;&lt;your_source_description&gt;&#39;,
&#39;&lt;your_source_documentation_reference&gt;&#39;, -- e.g. link to source data dictionary
&#39;&lt;your_etl_reference&gt;&#39;, -- e.g. link to ETL documentation
&#39;&lt;your_source_release_date&gt;&#39;, -- when the source data was pulled
getdate(), -- or the date of ETL run
&#39;v5.4&#39;,
vocabulary_version,
756265 -- &#39;OMOP CDM Version 5.4.0&#39;
FROM @cdm_schema.vocabulary
WHERE vocabulary_id = &#39;None&#39;;</code></pre>
</div>
</div>
</div>
2020-10-06 15:11:13 +00:00
<div id="bonus-queries" class="section level2">
<h2><strong>Bonus Queries</strong></h2>
<div id="visit-concept-roll-up" class="section level3">
<h3>Visit Concept Roll-up</h3>
<p>The query below will utilize the Visit Concept hierarcy to find the
highest-level ancestors. In the case that both the VISIT_OCCURRENCE and
VISIT_DETAIL tables are populated, it is good practice (though not
required) to use the highest-level ancestors as the VISIT_CONCEPT_IDs in
the VISIT_OCCURRENCE table and their children as the
VISIT_DETAIL_CONCEPT_IDs in the VISIT_DETAIL table. This relationship
between the VISIT_OCCURRENCE and VISIT_DETAIL tables allow for
standardized Visit logic to be written, building Visits from Visit
Details. For more information on how this can be done, please see the <a
href="https://ohdsi.github.io/ETL-LambdaBuilder/Optum%20Clinformatics/Optum_visit_occurrence.html">Optum
Extended ETL documentation</a>.</p>
2020-10-06 15:11:13 +00:00
<pre class="sql"><code>SELECT concept_id, concept_name
2023-03-30 14:08:24 +00:00
FROM concept
LEFT JOIN concept_ancestor
ON concept_id=descendant_concept_id
AND ancestor_concept_id!=descendant_concept_id
WHERE domain_id=&#39;Visit&#39;
AND standard_concept=&#39;S&#39;
AND ancestor_concept_id IS NULL</code></pre>
2020-10-06 15:11:13 +00:00
</div>
</div>
</div>
</div>
</div>
</div>
<script>
// add bootstrap table styles to pandoc tables
function bootstrapStylePandocTables() {
2021-01-06 15:52:57 +00:00
$('tr.odd').parent('tbody').parent('table').addClass('table table-condensed');
2020-10-06 15:11:13 +00:00
}
$(document).ready(function () {
bootstrapStylePandocTables();
});
</script>
<!-- tabsets -->
<script>
$(document).ready(function () {
window.buildTabsets("TOC");
});
$(document).ready(function () {
$('.tabset-dropdown > .nav-tabs > li').click(function () {
$(this).parent().toggleClass('nav-tabs-open');
2020-10-06 15:11:13 +00:00
});
});
</script>
<!-- code folding -->
<script>
$(document).ready(function () {
// temporarily add toc-ignore selector to headers for the consistency with Pandoc
$('.unlisted.unnumbered').addClass('toc-ignore')
2020-10-06 15:11:13 +00:00
// move toc-ignore selectors from section div to header
$('div.section.toc-ignore')
.removeClass('toc-ignore')
.children('h1,h2,h3,h4,h5').addClass('toc-ignore');
// establish options
var options = {
selectors: "h1,h2,h3,h4,h5",
theme: "bootstrap3",
context: '.toc-content',
hashGenerator: function (text) {
2021-01-06 15:52:57 +00:00
return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_');
2020-10-06 15:11:13 +00:00
},
ignoreSelector: ".toc-ignore",
scrollTo: 0
};
options.showAndHide = true;
options.smoothScroll = true;
// tocify
var toc = $("#TOC").tocify(options).data("toc-tocify");
});
</script>
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
</body>
</html>