OMOP/docs/cdmPrivacy.html

915 lines
33 KiB
HTML
Raw Normal View History

<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<title>Preserving Privacy in an OMOP CDM Implementation</title>
<script src="site_libs/header-attrs-2.25/header-attrs.js"></script>
<script src="site_libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="site_libs/bootstrap-3.3.5/css/cosmo.min.css" rel="stylesheet" />
<script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script>
<style>h1 {font-size: 34px;}
h1.title {font-size: 38px;}
h2 {font-size: 30px;}
h3 {font-size: 24px;}
h4 {font-size: 18px;}
h5 {font-size: 16px;}
h6 {font-size: 12px;}
code {color: inherit; background-color: rgba(0, 0, 0, 0.04);}
pre:not([class]) { background-color: white }</style>
<script src="site_libs/jqueryui-1.13.2/jquery-ui.min.js"></script>
<link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" />
<script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script>
<script src="site_libs/navigation-1.1/tabsets.js"></script>
<link href="site_libs/highlightjs-9.12.0/default.css" rel="stylesheet" />
<script src="site_libs/highlightjs-9.12.0/highlight.js"></script>
<link href="site_libs/font-awesome-6.4.2/css/all.min.css" rel="stylesheet" />
<link href="site_libs/font-awesome-6.4.2/css/v4-shims.min.css" rel="stylesheet" />
<link rel='shortcut icon' type='image/x-icon' href='favicon.ico' />
<style type="text/css">
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
</style>
<style type="text/css">code{white-space: pre;}</style>
<script type="text/javascript">
if (window.hljs) {
hljs.configure({languages: []});
hljs.initHighlightingOnLoad();
if (document.readyState && document.readyState === "complete") {
window.setTimeout(function() { hljs.initHighlighting(); }, 0);
}
}
</script>
2022-07-27 01:48:28 +00:00
<link rel="stylesheet" href="style.css" type="text/css" />
<style type = "text/css">
.main-container {
max-width: 940px;
margin-left: auto;
margin-right: auto;
}
img {
max-width:100%;
}
.tabbed-pane {
padding-top: 12px;
}
.html-widget {
margin-bottom: 20px;
}
button.code-folding-btn:focus {
outline: none;
}
summary {
display: list-item;
}
2022-07-27 01:48:28 +00:00
details > summary > p:only-child {
display: inline;
}
pre code {
padding: 0;
}
</style>
<style type="text/css">
.dropdown-submenu {
position: relative;
}
.dropdown-submenu>.dropdown-menu {
top: 0;
left: 100%;
margin-top: -6px;
margin-left: -1px;
border-radius: 0 6px 6px 6px;
}
.dropdown-submenu:hover>.dropdown-menu {
display: block;
}
.dropdown-submenu>a:after {
display: block;
content: " ";
float: right;
width: 0;
height: 0;
border-color: transparent;
border-style: solid;
border-width: 5px 0 5px 5px;
border-left-color: #cccccc;
margin-top: 5px;
margin-right: -10px;
}
.dropdown-submenu:hover>a:after {
border-left-color: #adb5bd;
}
.dropdown-submenu.pull-left {
float: none;
}
.dropdown-submenu.pull-left>.dropdown-menu {
left: -100%;
margin-left: 10px;
border-radius: 6px 0 6px 6px;
}
</style>
<script type="text/javascript">
// manage active state of menu based on current page
$(document).ready(function () {
// active menu anchor
href = window.location.pathname
href = href.substr(href.lastIndexOf('/') + 1)
if (href === "")
href = "index.html";
var menuAnchor = $('a[href="' + href + '"]');
// mark the anchor link active (and if it's in a dropdown, also mark that active)
var dropdown = menuAnchor.closest('li.dropdown');
if (window.bootstrap) { // Bootstrap 4+
menuAnchor.addClass('active');
dropdown.find('> .dropdown-toggle').addClass('active');
} else { // Bootstrap 3
menuAnchor.parent().addClass('active');
dropdown.addClass('active');
}
// Navbar adjustments
var navHeight = $(".navbar").first().height() + 15;
var style = document.createElement('style');
var pt = "padding-top: " + navHeight + "px; ";
var mt = "margin-top: -" + navHeight + "px; ";
var css = "";
// offset scroll position for anchor links (for fixed navbar)
for (var i = 1; i <= 6; i++) {
css += ".section h" + i + "{ " + pt + mt + "}\n";
}
style.innerHTML = "body {" + pt + "padding-bottom: 40px; }\n" + css;
document.head.appendChild(style);
});
</script>
<!-- tabsets -->
<style type="text/css">
.tabset-dropdown > .nav-tabs {
display: inline-table;
max-height: 500px;
min-height: 44px;
overflow-y: auto;
border: 1px solid #ddd;
border-radius: 4px;
}
.tabset-dropdown > .nav-tabs > li.active:before, .tabset-dropdown > .nav-tabs.nav-tabs-open:before {
content: "\e259";
font-family: 'Glyphicons Halflings';
display: inline-block;
padding: 10px;
border-right: 1px solid #ddd;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open > li.active:before {
content: "\e258";
font-family: 'Glyphicons Halflings';
border: none;
}
.tabset-dropdown > .nav-tabs > li.active {
display: block;
}
.tabset-dropdown > .nav-tabs > li > a,
.tabset-dropdown > .nav-tabs > li > a:focus,
.tabset-dropdown > .nav-tabs > li > a:hover {
border: none;
display: inline-block;
border-radius: 4px;
background-color: transparent;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open > li {
display: block;
float: none;
}
.tabset-dropdown > .nav-tabs > li {
display: none;
}
</style>
<!-- code folding -->
<style type="text/css">
#TOC {
margin: 25px 0px 20px 0px;
}
@media (max-width: 768px) {
#TOC {
position: relative;
width: 100%;
}
}
@media print {
.toc-content {
/* see https://github.com/w3c/csswg-drafts/issues/4434 */
float: right;
}
}
.toc-content {
padding-left: 30px;
padding-right: 40px;
}
div.main-container {
max-width: 1200px;
}
div.tocify {
width: 20%;
max-width: 260px;
max-height: 85%;
}
@media (min-width: 768px) and (max-width: 991px) {
div.tocify {
width: 25%;
}
}
@media (max-width: 767px) {
div.tocify {
width: 100%;
max-width: none;
}
}
.tocify ul, .tocify li {
line-height: 20px;
}
.tocify-subheader .tocify-item {
font-size: 0.90em;
}
.tocify .list-group-item {
border-radius: 0px;
}
</style>
</head>
<body>
<div class="container-fluid main-container">
<!-- setup 3col/9col grid for toc_float and main content -->
<div class="row">
<div class="col-xs-12 col-sm-4 col-md-3">
<div id="TOC" class="tocify">
</div>
</div>
<div class="toc-content col-xs-12 col-sm-8 col-md-9">
<div class="navbar navbar-default navbar-fixed-top" role="navigation">
<div class="container">
<div class="navbar-header">
2022-07-27 01:48:28 +00:00
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-bs-toggle="collapse" data-target="#navbar" data-bs-target="#navbar">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="index.html"><div><img src="ohdsi16x16.png"></img> OMOP Common Data Model </div></a>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<li>
<a href="index.html">
<span class="fa fa-house"></span>
</a>
</li>
<li class="dropdown">
2022-07-27 01:48:28 +00:00
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<span class="fa fa-landmark"></span>
Background
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="background.html">Model Background</a>
</li>
<li>
<a href="cdmRefreshProcess.html">CDM Refresh Process</a>
</li>
<li>
<a href="vocabulary.html">How the Vocabulary is Built</a>
</li>
</ul>
</li>
<li class="dropdown">
2022-07-27 01:48:28 +00:00
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<span class="fa fa-list-alt"></span>
Conventions
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="dataModelConventions.html">General Conventions</a>
</li>
<li>
<a href="ehrObsPeriods.html">Observation Periods for EHR Data</a>
</li>
<li>
<a href="cdmPrivacy.html">Patient Privacy and OMOP</a>
</li>
</ul>
</li>
<li class="dropdown">
2022-07-27 01:48:28 +00:00
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<span class="fa fa-history"></span>
CDM Versions
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="cdm30.html">CDM v3.0</a>
</li>
<li>
<a href="cdm60.html">CDM v6.0</a>
</li>
<li>
<a href="cdm53.html">CDM v5.3</a>
</li>
<li class="dropdown-submenu">
2022-07-27 01:48:28 +00:00
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">NEW CDM v5.4</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="cdm54.html">CDM v5.4</a>
</li>
<li>
<a href="cdm54Changes.html">Changes from CDM v5.3</a>
</li>
<li>
<a href="cdm54erd.html">Entity Relationships</a>
</li>
</ul>
</li>
</ul>
</li>
<li class="dropdown">
2022-07-27 01:48:28 +00:00
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<span class="fa fa-plus-square"></span>
2022-07-27 01:48:28 +00:00
CDM Proposals
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
2022-07-27 01:48:28 +00:00
<li>
<a href="cdmRequestProcess.html">How to Propose Changes to the CDM</a>
</li>
<li>
<a href="https://github.com/OHDSI/CommonDataModel/issues?q=is%3Aopen+is%3Aissue+label%3AProposal">Under Review</a>
</li>
<li class="dropdown-submenu">
2022-07-27 01:48:28 +00:00
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">Accepted</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="https://github.com/OHDSI/CommonDataModel/issues/252">Region_concept_id</a>
</li>
</ul>
</li>
</ul>
</li>
<li class="dropdown">
2022-07-27 01:48:28 +00:00
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<span class="fa fa-question"></span>
How to
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="download.html">Download the DDL</a>
</li>
<li>
<a href="cdmRPackage.html">Use the CDM R Package</a>
</li>
<li>
<a href="drug_dose.html">Calculate Drug Dose</a>
</li>
</ul>
</li>
<li class="dropdown">
2022-07-27 01:48:28 +00:00
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
<span class="fa fa-life-ring"></span>
Support
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
2022-07-27 01:48:28 +00:00
<li>
<a href="cdmDecisionTree.html">Help! My Data Doesn't Fit!</a>
</li>
<li>
<a href="faq.html">FAQ</a>
</li>
<li>
<a href="sqlScripts.html">SQL Scripts</a>
</li>
<li>
<a href="contribute.html">Ask a Question</a>
</li>
</ul>
</li>
</ul>
<ul class="nav navbar-nav navbar-right">
<li>
<a href="https://github.com/OHDSI/CommonDataModel">
<span class="fa fa-github"></span>
</a>
</li>
</ul>
</div><!--/.nav-collapse -->
</div><!--/.container -->
</div><!--/.navbar -->
<div id="header">
<h1 class="title toc-ignore"><strong>Preserving Privacy in an OMOP CDM
Implementation</strong></h1>
</div>
<p><em>By Kristin Kostka</em></p>
<div id="background" class="section level1">
<h1>Background</h1>
<p>The OMOP CDM is a person-centric model. Being person-centric means
the model can retain attributes that may be considered personal
identified information (PII) or protected health information (PHI).
There are many different ways a site may treat their OMOP CDM to uphold
their privacy protocols. In this article we provide guidance on overall
process and the potential fields that should be monitored to adhere to
these various privacy preserving protocols.</p>
<div id="defining-pii-and-phi" class="section level3">
<h3>Defining PII and PHI</h3>
<ul>
<li>PII is defined as any representation of information that permits the
identity of an individual to whom the information applies to be
reasonably inferred to either direct or indirect means [1].</li>
<li>The United States Department of Health &amp; Human Services´ Office
for Civil Rights has defined PHI as any Personal Identifying Information
(PII) that individually or combined could potentially identify a
specific individual, their past, present or future healthcare, or the
method of payment. There are eighteen unique identifiers considered to
be PHI: 1) names, 2) geographic data, 3) all elements of dates, 4)
telephone numbers, 5) FAX numbers, 6) email addresses, 7) Social
Security numbers (SSN), 8) medical record numbers (MRN), 9) health plan
beneficiary numbers, 10) account numbers, 11) certificate/license
numbers, 12) vehicle identifiers and serial numbers including license
places, 13) device identifiers and serial numbers, 14) web URLs, 15)
internet protocol addresses, 16) biometric identifiers (i.e. retinal
scan, fingerprints), 17) full face photos and comparable images, and 18)
any unique identifying number, characteristic or code. PHI is no longer
considered PHI when it de-identified of these unique attributes. PHI is
commonly referred to in relation to the Health Insurance Portability and
Accountability Act (HIPAA) and associated legislation such as the Health
Information Technology for Economic and Clinical Health Act (HITECH)
[2].</li>
</ul>
</div>
</div>
<div id="the-data-holders-responsibility" class="section level1">
<h1>The Data Holders Responsibility</h1>
<p>In OHDSI, it is the responsibility of each data holder to know,
understand and follow local data governance processes related to use of
the OMOP CDM. In the United States, these processes will follow your
organizations local interpretation for maintaining compliance to PII
and PHI protection. In OMOP CDM implementations containing European
Union citizen data, local governance processes will include measures to
comply with General Data Protection Regulation (GDPR) [3]. As a
community, the OHDSI data network covers more than 330 databases from 34
countries. There is extensive community knowledge on the interpretation
of rule sets and exemplar IRB and local governance workflows that can be
made available to institutions navigating these processes for the first
time. If your organization does not have an established data governance
process, please reach out on the <a href="forums.ohdsi.org">OHDSI
Forums</a> under “Implementers” and the community can respond with
shared guidance from their own deployments. As a community, we aim to
conduct research that keeps patient-level data local and share only
aggregate results.</p>
</div>
<div id="complying-with-privacy-preservation" class="section level1">
<h1>Complying with Privacy Preservation</h1>
<p>Complying with local governance processes depends on the rule set
being used. There may be allowable times when data use agreements and
data transfer agreements exist between collaborating institutions to
facilitate sharing of PII and PHI. In this section we will discuss
common rule sets that organizations adhere to.</p>
<div id="limited-data-sets" class="section level2">
<h2>Limited Data Sets</h2>
<p>A limited data set (LDS) is defined as protected health information
that excludes certain direct identifiers of an individual or of
relatives, employers or household members of the individual — but may
include city, state, ZIP code and elements of dates. A LDS can be
disclosed only for purposes of research, public health or health care
operations. LDS requirements are dictated by the HIPAA Privacy Rule.</p>
</div>
<div id="de-identified-data-sets" class="section level2">
<h2>De-identified Data Sets</h2>
<p>A de-identified data, as defined by Section 164.514(a) of the HIPAA
Privacy rule, is health information that does not identify an individual
and with respect to which there is no reasonable basis to believe that
the information can be used to identify an individual is not
individually identifiable health information. There are two methods for
achieving de-identification in accordance with HIPAA [4].</p>
<ol style="list-style-type: decimal">
<li>Expert Determination (§164.514(a))- Implementation specifications:
requirements for de-identification of protected health information. A
covered entity may determine that health information is not individually
identifiable health information only if:</li>
</ol>
<ol style="list-style-type: decimal">
<li>A person with appropriate knowledge of and experience with generally
accepted statistical and scientific principles and methods for rendering
information not individually identifiable:</li>
</ol>
<ol style="list-style-type: lower-roman">
<li>Applying such principles and methods, determines that the risk is
very small that the information could be used, alone or in combination
with other reasonably available information, by an anticipated recipient
to identify an individual who is a subject of the information; and</li>
<li>Documents the methods and results of the analysis that justify such
determination.</li>
</ol>
<ol start="2" style="list-style-type: decimal">
<li>Safe Harbor (§164.514(b)) The eighteen unique identifiers are
obfuscated. This includes processes such as:</li>
</ol>
<ol style="list-style-type: upper-alpha">
<li>Dates of service are algorithmically shifted to protect patient
privacy.</li>
<li>Patient ZIP codes are truncated to the first three digits or removed
entirely if the ZIP code represents fewer than 20,000 individuals.</li>
<li>Removing and, when necessary, replacing unique identifiers AND The
entity does not have actual knowledge that the information could be used
alone or in combination with other information to identify an individual
who is a subject of the information.</li>
</ol>
</div>
<div id="field-level-implications-of-de-identification-processes"
class="section level2">
<h2>Field-level Implications of De-identification Processes</h2>
<div id="person-table-attributes" class="section level3">
<h3>PERSON Table Attributes</h3>
<p>In the OMOP CDM, the PERSON table serves as the central identity
management for all Persons in the database. It contains records that
uniquely identify each person or patient, and some demographic
information. It is a table that has a number of field-level implications
for privacy preserving protocols.</p>
<p>Considerations include: - PERSON.person_id should never contain
Medical Record Number, Social Security Number or similar uniquely
identifiable number. This should be a number that is essentially
meaningless but has the ability to be a primary key across tables. -
PERSON.year_of_birth, PERSON.month_of_birth and PERSON.date_of_birth,
PERSON.birth_datetime may require some redaction or modification
depending on interpretation of rule set. Consult local guidance on the
need to modify these fields when creating compliant views of
de-identified data. - PERSON.person_source_value may contain sensitive
information used to generate the person_id field. It is advised to
practice caution when creating views of these data. It would be wise to
obfuscate or redact this field if you are not sure what is contained in
the raw information being extracted, transformed and loaded into the
CDM.</p>
</div>
</div>
<div id="date-fields-across-domains" class="section level2">
<h2>Date Fields Across Domains</h2>
<p>Date fields are used across many OMOP domains including:
OBSERVATION_PERIOD, VISIT_OCCURRENCE, VISIT_DETAIL,
CONDITION_OCCURRENCE, DRUG_EXPOSURE, PROCEDURE_OCCURRENCE,
DEVICE_EXPOSURE, MEASUREMENT, OBSERVATION, DEATH, NOTE, NOTE_NLP,
SPECIMEN, PAYER_PLAN_PERIOD, DRUG_ERA, DOSE_ERA, and CONDITION_ERA.</p>
<p>As discussed previously, some rule sets may require algorithmically
shifting dates. It is advised that when date shifting is applied, it is
done holistically. This means that when shifting dates, you should not
treat each record independently. Instead, a robust date shifting
algorithm will link off the <em>.person_id (where </em> is the domain
name such as CONDITION_OCCURRENCE, etc) and apply the same offset to all
events. This allows researchers to have the ability to understand the
sequence of events while preserving patient privacy.</p>
<p>The implications of not holistically shifting all events together by
the same offset means that information may be out of sequence or
illogical. An example would be a death record that happens prior to
other event records (conditions, drugs, procedures, etc). When applying
an algorithmic shift of dates, it is important to educate your OMOP CDM
user group of the known offset. This is especially important in temporal
studies which may be looking to make statements about disease history
relative to the time when an event is observed.</p>
<p>Some rule sets do not require algorithmic shifting of dates, such as
Limited Data Sets. In these situations, a user of a LDS OMOP CDM would
not be expecting dates to the shifted. If a shift is applied, it should
be disclosed and the offset amount (e.g. +/- 7 days, +/- 30 days, etc)
should be made available to those who have received permission to use a
LDS dataset. Otherwise, these data are not upholding the assumptions of
the rule set applied.</p>
</div>
<div id="location-table-attributes" class="section level2">
<h2>LOCATION Table Attributes</h2>
<p>The LOCATION table represents a generic way to capture physical
location or address information of Persons and Care Sites. When applying
privacy preserving procedures, this table should be reviewed and
scrubbed relative to the rule set. The LOCATION.zip field should be
redacted relative to the type of process applied (e.g. 3-digit zip for
de-identified data). The LOCATION.location_source_value should be
reviewed for potential PII/PHI. It would be wise to obfuscate or redact
this field if you are not sure what is contained in the raw information
being extracted, transformed and loaded into the CDM.</p>
</div>
<div id="provider-table-attributes" class="section level2">
<h2>PROVIDER Table Attributes</h2>
<p>The PROVIDER table contains a list of uniquely identified healthcare
providers. These are individuals providing hands-on healthcare to
patients, such as physicians, nurses, midwives, physical therapists etc.
In some privacy preserving processes, the PROVIDER.npi and PROVIDER.dea
fields may be redacted. Please review this field and confirm that you
are adhering to privacy rule sets. The PROVIDER.year_of_birth field is
an optional field that may also require treatment in certain rule
sets.</p>
</div>
<div id="observation-table-attributes" class="section level2">
<h2>OBSERVATION Table Attributes</h2>
<p>The OBSERVATION table captures clinical facts about a Person obtained
in the context of examination, questioning or a procedure. Any data that
cannot be represented by any other domains, such as social and lifestyle
facts, medical history, family history, etc. are recorded here.</p>
<p>We strongly caution ETL teams to review the OBSERVATION table for
potential PII/PHI. In some source systems, there can be information
coming in these vocabularies that are not laboratory or clinical
observations but instead are patient identifiers. If you search in
ATHENA, you will find there are a number of standard terms in the SNOMED
and LOINC vocabularies that can represent phone numbers, emails, and
other PII information.</p>
<p>It is difficult to create an exhaustive list of terms because these
ontologies do not maintain or publish lists of terms that may contain
patient identifiers. It is, therefore, up to data holders to perform a
review of this domain with an eye for these potential privacy issues.
The National COVID Cohort Collaborative, a NIH consortium which uses the
OMOP CDM, has published a resource for sites needing assistance with
identifying these potentially problematic records. A “live” version of
this table that will track updates over time is hosted at <a
href="https://github.com/data2health/next-gen-data-sharing/blob/master/CodesWithPPIPotential.csv"
class="uri">https://github.com/data2health/next-gen-data-sharing/blob/master/CodesWithPPIPotential.csv</a>.
We welcome additions to this list from the community.</p>
</div>
<div id="scrutinizing-_source_value" class="section level2">
<h2>Scrutinizing *_source_value</h2>
<p>The *_source_value (where * is the domain name such as
CONDITION_OCCURRENCE, etc) fields present an opportunity for sites to
carry forward potential PII/PHI in ETL processes. Because the convention
of the OMOP CDM has minimal boundaries on what are retained in these
fields, it is important to treat all source_value fields as potentially
containing PII/PHI. We highly advise all data holders to scrutinize
these fields when applying privacy preserving processes. It is not
uncommon for fields to be overloaded and contain potential patient
identifiers. Please use caution when transmitting or making views of
these fields available to users.</p>
</div>
<div id="scrutinizing-string-fields" class="section level2">
<h2>Scrutinizing String Fields</h2>
<p>Across OMOP Domains, there are many fields which permit the use of
strings (e.g. DRUG_EXPOSURE.sig, MEASUREMENT.value_as_string,
OBSERVATION.value_as_string, Weve discussed some of these in prior
sections. It is advised that fields with strings often have the
potential to contain unintentional PII/PHI. Targeted regular expressions
can be built into ETL processes in order to “sniff” out any additional
PII (or potential PII)such as any data in the format of a phone number,
or a person or place name. (E.g., the regular expression “Mr.|Mrs.|Dr.|,
M.?D.?” will find any string with an English name prefix.) Depending on
risk tolerance, the expressions could err toward sensitivity or
specificity, and could be tweaked over time to meet different rule
sets.</p>
<p>Extensive regular expression matching during ETL may add significant
processing time and should therefore not be relied upon as a sole
solution, but rather an extra protection against edge cases. Other
algorithmic rules may also prove useful, such as automatically
quarantining records with lengthy string values (which could signal the
presence of free text). If these approaches are implemented, records
that match the regular expressions or rules can be quarantined in a
separate table or staging area to be manually reviewed by a data broker.
Thus, in addition to adding another layer of PII protection, another
advantage of these approaches is the potential to uncover ways that
underlying vocabularies may be contributing to unintentional sharing of
PII and create awareness for future privacy preserving processes shared
across the community.</p>
</div>
<div id="note-and-note_nlp-table-attributes" class="section level2">
<h2>NOTE and NOTE_NLP Table Attributes</h2>
<p>The NOTE table captures unstructured information that was recorded by
a provider about a patient in free text (in ASCII, or preferably in UTF8
format) notes on a given date. The NOTE_NLP table encodes all output of
NLP on clinical notes. Each row represents a single extracted term from
a note. There is a high potential these tables may retain information
that is considered PII/PHI. In addition to overall string searching,
these tables are likely to be dropped altogether to adhere to the most
stringent rule sets.</p>
<p>It is highly advised that if you are conducting a study with NOTE and
NOTE_NLP table information, please consult with your local governance
and privacy officers to ensure compliance with local rule sets.</p>
</div>
<div id="conclusion" class="section level2">
<h2>Conclusion</h2>
<p>Privacy preserving processes are not one-size fits all. There are
many different rule sets that can be applied to datasets. Data holders
are recommended to consult with their local privacy officer(s) to ensure
all processes applied to a database are compliant with local
interpretation of the selected rule set.</p>
</div>
</div>
<div id="references" class="section level1">
<h1>References</h1>
<ol style="list-style-type: decimal">
<li>BibText version: <span
class="citation">@MISC</span>{noauthor_undated-mg, title = “Guidance on
the Protection of Personal Identifiable Information”, abstract =
“Personal Identifiable Information (PII) is defined as:”, howpublished =
“”, note = “Accessed: 2021-8-18” }</li>
</ol>
<p>Regular citation: Guidance on the Protection of Personal Identifiable
Information. [cited 18 Aug 2021]. Available: <a
href="https://www.dol.gov/general/ppii"
class="uri">https://www.dol.gov/general/ppii</a></p>
<ol start="2" style="list-style-type: decimal">
<li>BibText version: <span
class="citation">@MISC</span>{HIPAA_Journal2017-yo, title = “What Does
{PHI} Stand For?”, author = “{HIPAA Journal}”, abstract = “PHI is a term
used in connection with health data, but what does PHI stand for? What
information is included in the definition of PHI.”, month = dec, year =
2017, howpublished = “”, note = “Accessed: 2021-8-18”, language = “en”
}</li>
</ol>
<p>Regular citation: HIPAA Journal. What Does PHI Stand For? 23 Dec 2017
[cited 18 Aug 2021]. Available: <a
href="https://www.hipaajournal.com/what-does-phi-stand-for/"
class="uri">https://www.hipaajournal.com/what-does-phi-stand-for/</a></p>
<ol start="3" style="list-style-type: decimal">
<li>BibText version:</li>
</ol>
<p><span class="citation">@MISC</span>{noauthor_2018-mt, title =
“General Data Protection Regulation ({GDPR}) Compliance Guidelines”,
abstract = “The EU General Data Protection Regulation went into effect
on May 25, 2018, replacing the Data Protection Directive 95/46/EC.
Designed to increase data privacy for EU citizens, the regulation levies
steep fines on organizations that dont follow the law.”, month = jun,
year = 2018, howpublished = “”, note = “Accessed: 2021-8-18”, language =
“en” }</p>
<p>Regular citation: General Data Protection Regulation (GDPR)
Compliance Guidelines. 18 Jun 2018 [cited 18 Aug 2021]. Available: <a
href="https://gdpr.eu/" class="uri">https://gdpr.eu/</a></p>
<ol start="4" style="list-style-type: decimal">
<li>BibText version:</li>
</ol>
<p><span
class="citation">@MISC</span>{Office_for_Civil_Rights_OCR_undated-zy,
title = “Methods for De-identification of {PHI}”, author = “{Office for
Civil Rights (OCR)}”, abstract = “Guidance about methods and approaches
to achieve de-identification in accordance with the Health Insurance
Portability and Accountability Act of 1996.”, howpublished = “”, note =
“Accessed: 2021-8-19” }</p>
<p>Regular citation: Office for Civil Rights (OCR). Methods for
De-identification of PHI. [cited 19 Aug 2021]. Available: <a
href="https://www.hhs.gov/hipaa/for-professionals/privacy/special-topics/de-identification/index.html"
class="uri">https://www.hhs.gov/hipaa/for-professionals/privacy/special-topics/de-identification/index.html</a></p>
</div>
</div>
</div>
</div>
<script>
// add bootstrap table styles to pandoc tables
function bootstrapStylePandocTables() {
$('tr.odd').parent('tbody').parent('table').addClass('table table-condensed');
}
$(document).ready(function () {
bootstrapStylePandocTables();
});
</script>
<!-- tabsets -->
<script>
$(document).ready(function () {
window.buildTabsets("TOC");
});
$(document).ready(function () {
$('.tabset-dropdown > .nav-tabs > li').click(function () {
$(this).parent().toggleClass('nav-tabs-open');
});
});
</script>
<!-- code folding -->
<script>
$(document).ready(function () {
// temporarily add toc-ignore selector to headers for the consistency with Pandoc
$('.unlisted.unnumbered').addClass('toc-ignore')
// move toc-ignore selectors from section div to header
$('div.section.toc-ignore')
.removeClass('toc-ignore')
.children('h1,h2,h3,h4,h5').addClass('toc-ignore');
// establish options
var options = {
selectors: "h1,h2,h3",
theme: "bootstrap3",
context: '.toc-content',
hashGenerator: function (text) {
return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_');
},
ignoreSelector: ".toc-ignore",
scrollTo: 0
};
options.showAndHide = true;
options.smoothScroll = true;
// tocify
var toc = $("#TOC").tocify(options).data("toc-tocify");
});
</script>
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
</body>
</html>