Merge pull request #18 from anthonysena/V4toV5Conversion
Initial commit for the CDM v4 to v5 conversion scripts.
This commit is contained in:
commit
21cf148715
|
@ -0,0 +1,304 @@
|
|||
/*********************************************************************************
|
||||
# Copyright 2015 Observational Health Data Sciences and Informatics
|
||||
#
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
********************************************************************************/
|
||||
|
||||
/*******************************************************************************
|
||||
|
||||
PURPOSE: Use this script is used to help perform Quality Assurance activities
|
||||
after you convert your OMOP V4 common data model to CDM V5.
|
||||
|
||||
last revised: 01 July 2015
|
||||
author: Anthony Sena
|
||||
|
||||
This script was authored against SQL Server and will require conversion to other
|
||||
dialects. Please keep this in mind if you plan to use this against another RDBMS.
|
||||
|
||||
General Notes
|
||||
---------------
|
||||
|
||||
The V4 to V5 conversion script utilizes the standard vocabularies to map data
|
||||
from the V4 source tables to the V5 target tables. As a result, comparing the
|
||||
rowcounts for the following tables is not adviseable since we are expecting
|
||||
certain entries to move from their source table to a different target:
|
||||
|
||||
Condition_Occurrence
|
||||
Drug_Exposure
|
||||
Observation
|
||||
Measurement
|
||||
Procedure_Occurrence
|
||||
|
||||
This script will produce 2 tables:
|
||||
|
||||
Table 1: This will contain the source table name (i.e. Condition_Occurrence),
|
||||
the target domain_id from the V5 vocabulary and the expected rowcount
|
||||
from the conversion. When there is no target defined in the V5 vocabulary,
|
||||
the source data is carried over to the same target table in V5 with
|
||||
a concept_id of 0. For example:
|
||||
|
||||
TableName Domain RowCount
|
||||
--------- ------ --------
|
||||
Condition_Occurrence condition 464849
|
||||
Condition_Occurrence measurement 8416
|
||||
Condition_Occurrence observation 31522
|
||||
Condition_Occurrence procedure 24298
|
||||
|
||||
Table 2: This will contain a summary of the V5 Target Domains and Rowcounts.
|
||||
I found this helpful to tie out the expected rowcounts and what actually
|
||||
happened during the conversion.
|
||||
|
||||
INSTRUCTIONS
|
||||
------------
|
||||
|
||||
1. This script has placeholders for your CDM V4 and CDMV5 database/schema.
|
||||
In order to make this file work in your environment, you
|
||||
should plan to do a global "FIND AND REPLACE" on this file to fill in the
|
||||
file with values that pertain to your environment. The following are the
|
||||
tokens you should use when doing your "FIND AND REPLACE" operation:
|
||||
|
||||
a. [SOURCE_CDMV4]
|
||||
b. [TARGET_CDMV5]
|
||||
|
||||
2. Run the resulting script on your target RDBDMS.
|
||||
|
||||
*********************************************************************************/
|
||||
|
||||
USE [TARGET_CDMV5]
|
||||
GO
|
||||
|
||||
/*
|
||||
* CONCEPT MAP
|
||||
*/
|
||||
IF OBJECT_ID('tempdb..#concept_map', 'U') IS NOT NULL
|
||||
DROP TABLE #concept_map;
|
||||
|
||||
--standard concepts
|
||||
SELECT concept_id AS source_concept_id
|
||||
,concept_id AS target_concept_id
|
||||
,domain_id
|
||||
,NULL AS source_concept_mapping_occurrence
|
||||
INTO #concept_map
|
||||
FROM dbo.concept
|
||||
WHERE standard_concept = 'S'
|
||||
AND invalid_reason IS NULL
|
||||
|
||||
UNION
|
||||
|
||||
--concepts with 'map to' standard
|
||||
SELECT DISTINCT c1.concept_id AS source_concept_id
|
||||
,c2.concept_id AS target_concept_id
|
||||
,c2.domain_id
|
||||
,NULL
|
||||
FROM (
|
||||
SELECT concept_id
|
||||
FROM dbo.concept
|
||||
WHERE (
|
||||
(
|
||||
standard_concept <> 'S'
|
||||
OR standard_concept IS NULL
|
||||
)
|
||||
OR invalid_reason IS NOT NULL
|
||||
)
|
||||
) c1
|
||||
INNER JOIN dbo.concept_relationship cr1 ON c1.concept_id = cr1.concept_id_1
|
||||
INNER JOIN dbo.concept c2 ON cr1.concept_id_2 = c2.concept_id
|
||||
WHERE c2.standard_concept = 'S'
|
||||
AND c2.invalid_reason IS NULL
|
||||
AND cr1.relationship_id IN ('Maps to')
|
||||
AND cr1.invalid_reason IS NULL
|
||||
|
||||
UNION
|
||||
|
||||
--concepts without 'map to' standard with another non 'is a' relation to standard
|
||||
SELECT DISTINCT c1.concept_id AS source_concept_id
|
||||
,c2.concept_id AS target_concept_id
|
||||
,c2.domain_id
|
||||
,NULL
|
||||
FROM (
|
||||
SELECT concept_id
|
||||
FROM dbo.concept
|
||||
WHERE (
|
||||
(
|
||||
standard_concept <> 'S'
|
||||
OR standard_concept IS NULL
|
||||
)
|
||||
OR invalid_reason IS NOT NULL
|
||||
)
|
||||
AND concept_id NOT IN (
|
||||
SELECT DISTINCT c1.concept_id
|
||||
FROM (
|
||||
SELECT concept_id
|
||||
FROM dbo.concept
|
||||
WHERE (
|
||||
(
|
||||
standard_concept <> 'S'
|
||||
OR standard_concept IS NULL
|
||||
)
|
||||
OR invalid_reason IS NOT NULL
|
||||
)
|
||||
) c1
|
||||
INNER JOIN dbo.concept_relationship cr1 ON c1.concept_id = cr1.concept_id_1
|
||||
INNER JOIN dbo.concept c2 ON cr1.concept_id_2 = c2.concept_id
|
||||
WHERE c2.standard_concept = 'S'
|
||||
AND c2.invalid_reason IS NULL
|
||||
AND cr1.relationship_id IN ('Maps to')
|
||||
AND cr1.invalid_reason IS NULL
|
||||
)
|
||||
) c1
|
||||
INNER JOIN dbo.concept_relationship cr1 ON c1.concept_id = cr1.concept_id_1
|
||||
INNER JOIN dbo.concept c2 ON cr1.concept_id_2 = c2.concept_id
|
||||
WHERE c2.standard_concept = 'S'
|
||||
AND c2.invalid_reason IS NULL
|
||||
AND cr1.relationship_id IN (
|
||||
'RxNorm replaced by'
|
||||
,'SNOMED replaced by'
|
||||
,'UCUM replaced by'
|
||||
,'Concept replaced by'
|
||||
,'ICD9P replaced by'
|
||||
,'LOINC replaced by'
|
||||
,'Concept same_as to'
|
||||
,'Concept was_a to'
|
||||
,'Concept alt_to to'
|
||||
)
|
||||
AND cr1.invalid_reason IS NULL
|
||||
|
||||
UNION
|
||||
|
||||
--concepts without 'map to' standard with 'is a' relation to standard
|
||||
SELECT DISTINCT c1.concept_id AS source_concept_id
|
||||
,c2.concept_id AS target_concept_id
|
||||
,c2.domain_id
|
||||
,NULL
|
||||
FROM (
|
||||
SELECT concept_id
|
||||
FROM dbo.concept
|
||||
WHERE (
|
||||
(
|
||||
standard_concept <> 'S'
|
||||
OR standard_concept IS NULL
|
||||
)
|
||||
OR invalid_reason IS NOT NULL
|
||||
)
|
||||
AND concept_id NOT IN (
|
||||
SELECT DISTINCT c1.concept_id
|
||||
FROM (
|
||||
SELECT concept_id
|
||||
FROM dbo.concept
|
||||
WHERE (
|
||||
(
|
||||
standard_concept <> 'S'
|
||||
OR standard_concept IS NULL
|
||||
)
|
||||
OR invalid_reason IS NOT NULL
|
||||
)
|
||||
) c1
|
||||
INNER JOIN dbo.concept_relationship cr1 ON c1.concept_id = cr1.concept_id_1
|
||||
INNER JOIN dbo.concept c2 ON cr1.concept_id_2 = c2.concept_id
|
||||
WHERE c2.standard_concept = 'S'
|
||||
AND c2.invalid_reason IS NULL
|
||||
AND cr1.relationship_id IN (
|
||||
'Maps to'
|
||||
,'RxNorm replaced by'
|
||||
,'SNOMED replaced by'
|
||||
,'UCUM replaced by'
|
||||
,'Concept replaced by'
|
||||
,'ICD9P replaced by'
|
||||
,'LOINC replaced by'
|
||||
,'Concept same_as to'
|
||||
,'Concept was_a to'
|
||||
,'Concept alt_to to'
|
||||
)
|
||||
AND cr1.invalid_reason IS NULL
|
||||
)
|
||||
) c1
|
||||
INNER JOIN dbo.concept_relationship cr1 ON c1.concept_id = cr1.concept_id_1
|
||||
INNER JOIN dbo.concept c2 ON cr1.concept_id_2 = c2.concept_id
|
||||
WHERE c2.standard_concept = 'S'
|
||||
AND c2.invalid_reason IS NULL
|
||||
AND cr1.relationship_id IN ('Is a')
|
||||
AND cr1.invalid_reason IS NULL;
|
||||
GO
|
||||
|
||||
-- Update the source_concept_mapping_occurrence column
|
||||
-- to contain a count to indicate the number of target_concept_ids
|
||||
-- map to that source_concept_id. This will be used elsewhere in
|
||||
-- the script to ensure that we generate new primary keys
|
||||
-- for the target tables when applicable
|
||||
UPDATE #concept_map
|
||||
SET #concept_map.source_concept_mapping_occurrence = A.[Rowcount]
|
||||
FROM #concept_map
|
||||
,(
|
||||
SELECT source_concept_id
|
||||
,domain_id
|
||||
,count(*) AS "rowcount"
|
||||
FROM #concept_map
|
||||
GROUP BY source_concept_id
|
||||
,domain_id
|
||||
) AS A
|
||||
WHERE #concept_map.source_concept_id = A.source_concept_id
|
||||
AND #concept_map.domain_id = A.domain_id
|
||||
|
||||
IF OBJECT_ID('tempdb..#concept_map_distinct', 'U') IS NOT NULL
|
||||
DROP TABLE #concept_map_distinct;
|
||||
|
||||
SELECT DISTINCT source_concept_id
|
||||
,domain_id
|
||||
,COUNT(*) AS "rowcount"
|
||||
INTO #concept_map_distinct
|
||||
FROM #concept_map
|
||||
GROUP BY source_concept_id
|
||||
,domain_id
|
||||
|
||||
|
||||
/*
|
||||
* V4 - Condition_Occurrence summary and mapping to #concept_map
|
||||
*/
|
||||
IF OBJECT_ID('tempdb..#classification_map', 'U') IS NOT NULL
|
||||
DROP TABLE #classification_map;
|
||||
|
||||
SELECT *
|
||||
INTO #classification_map
|
||||
FROM
|
||||
(
|
||||
SELECT 'Condition_Occurrence' as TableName, ISNULL(LOWER(cm.domain_id), 'condition') AS "Domain", COUNT(*) AS "RowCount"
|
||||
FROM [SOURCE_CDMV4].[dbo].[Condition_Occurrence] as CO
|
||||
LEFT JOIN #concept_map as CM ON co.condition_concept_id = cm.source_concept_id
|
||||
GROUP BY ISNULL(LOWER(cm.domain_id), 'condition')
|
||||
UNION
|
||||
SELECT 'Drug_Exposure' as TableName, ISNULL(LOWER(cm.domain_id), 'drug') AS "Domain", COUNT(*) AS "RowCount"
|
||||
FROM [SOURCE_CDMV4].[dbo].[Drug_Exposure] as de
|
||||
LEFT JOIN #concept_map as CM ON de.drug_concept_id = cm.source_concept_id
|
||||
GROUP BY ISNULL(LOWER(cm.domain_id), 'drug')
|
||||
UNION
|
||||
SELECT 'Observation' as TableName, ISNULL(LOWER(cm.domain_id), 'observation') AS "Domain", COUNT(*) AS "RowCount"
|
||||
FROM [SOURCE_CDMV4].[dbo].[Observation] as o
|
||||
LEFT JOIN #concept_map as CM ON o.observation_concept_id = cm.source_concept_id
|
||||
GROUP BY ISNULL(LOWER(cm.domain_id), 'observation')
|
||||
UNION
|
||||
SELECT 'Procedure_Occurrence' as TableName, ISNULL(LOWER(cm.domain_id), 'procedure') AS "Domain", COUNT(*) AS "RowCount"
|
||||
FROM [SOURCE_CDMV4].[dbo].[Procedure_Occurrence] as po
|
||||
LEFT JOIN #concept_map as CM ON po.PROCEDURE_CONCEPT_ID = cm.source_concept_id
|
||||
GROUP BY ISNULL(LOWER(cm.domain_id), 'procedure')
|
||||
) AS A
|
||||
ORDER by A.[TableName], A.[Domain]
|
||||
|
||||
select *
|
||||
from #classification_map
|
||||
order by [TableName], [Domain]
|
||||
|
||||
select domain, SUM([RowCount])
|
||||
from #classification_map
|
||||
group by domain
|
||||
order by domain
|
|
@ -0,0 +1,150 @@
|
|||
/*********************************************************************************
|
||||
# Copyright 2015 Observational Health Data Sciences and Informatics
|
||||
#
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
********************************************************************************/
|
||||
|
||||
/*******************************************************************************
|
||||
|
||||
PURPOSE: Use this script is used to help perform Quality Assurance activities
|
||||
after you convert your OMOP V4 common data model to CDM V5.
|
||||
|
||||
last revised: 01 July 2015
|
||||
author: Anthony Sena
|
||||
|
||||
This script was authored against SQL Server and will require conversion to other
|
||||
dialects. Please keep this in mind if you plan to use this against another RDBMS.
|
||||
|
||||
General Notes
|
||||
---------------
|
||||
|
||||
This script will use the metadata tables from the V4 and V5 tables to get a list
|
||||
of all of the tables from each database and the rowcounts for each table in an
|
||||
effort to help you see how your data has changed through the conversion process.
|
||||
|
||||
In the results, we include a column to identify the tables that were part of
|
||||
the migration process in an effort to hone in on the key tables.
|
||||
|
||||
There is a Part 2 of this QA script which will also show you how data moved
|
||||
amongst some specific tables.
|
||||
|
||||
|
||||
INSTRUCTIONS
|
||||
------------
|
||||
|
||||
1. This script has placeholders for your CDM V4 and CDMV5 database/schema.
|
||||
In order to make this file work in your environment, you
|
||||
should plan to do a global "FIND AND REPLACE" on this file to fill in the
|
||||
file with values that pertain to your environment. The following are the
|
||||
tokens you should use when doing your "FIND AND REPLACE" operation:
|
||||
|
||||
a. [SOURCE_CDMV4]
|
||||
b. [TARGET_CDMV5]
|
||||
|
||||
2. Run the resulting script on your target RDBDMS.
|
||||
|
||||
*********************************************************************************/
|
||||
USE [TARGET_CDMV5]
|
||||
GO
|
||||
|
||||
IF OBJECT_ID('tempdb..#v5_stats', 'U') IS NOT NULL
|
||||
DROP TABLE #v5_stats;
|
||||
|
||||
SELECT
|
||||
DB_NAME() as DBName,
|
||||
t.NAME AS TableName,
|
||||
p.[Rows]
|
||||
INTO #v5_stats
|
||||
FROM
|
||||
sys.tables t
|
||||
INNER JOIN
|
||||
sys.indexes i ON t.OBJECT_ID = i.object_id
|
||||
INNER JOIN
|
||||
sys.partitions p ON i.object_id = p.OBJECT_ID AND i.index_id = p.index_id
|
||||
INNER JOIN
|
||||
sys.allocation_units a ON p.partition_id = a.container_id
|
||||
WHERE
|
||||
t.NAME NOT LIKE 'dt%' AND
|
||||
i.OBJECT_ID > 255 AND
|
||||
i.index_id <= 1
|
||||
GROUP BY
|
||||
t.NAME, i.object_id, i.index_id, i.name, p.[Rows]
|
||||
ORDER BY
|
||||
object_name(i.object_id)
|
||||
|
||||
USE [SOURCE_CDMV4]
|
||||
GO
|
||||
|
||||
IF OBJECT_ID('tempdb..#v4_stats', 'U') IS NOT NULL
|
||||
DROP TABLE #v4_stats;
|
||||
|
||||
SELECT
|
||||
DB_NAME() as DBName,
|
||||
t.NAME AS TableName,
|
||||
p.[Rows]
|
||||
INTO #v4_stats
|
||||
FROM
|
||||
sys.tables t
|
||||
INNER JOIN
|
||||
sys.indexes i ON t.OBJECT_ID = i.object_id
|
||||
INNER JOIN
|
||||
sys.partitions p ON i.object_id = p.OBJECT_ID AND i.index_id = p.index_id
|
||||
INNER JOIN
|
||||
sys.allocation_units a ON p.partition_id = a.container_id
|
||||
WHERE
|
||||
t.NAME NOT LIKE 'dt%' AND
|
||||
i.OBJECT_ID > 255 AND
|
||||
i.index_id <= 1
|
||||
GROUP BY
|
||||
t.NAME, i.object_id, i.index_id, i.name, p.[Rows]
|
||||
ORDER BY
|
||||
object_name(i.object_id)
|
||||
|
||||
DECLARE @MigrationTarget TABLE (TableName varchar(100))
|
||||
|
||||
INSERT INTO @MigrationTarget SELECT 'care_site'
|
||||
INSERT INTO @MigrationTarget SELECT 'condition_era'
|
||||
INSERT INTO @MigrationTarget SELECT 'condition_occurrence'
|
||||
INSERT INTO @MigrationTarget SELECT 'death'
|
||||
INSERT INTO @MigrationTarget SELECT 'device_exposure'
|
||||
INSERT INTO @MigrationTarget SELECT 'drug_cost'
|
||||
INSERT INTO @MigrationTarget SELECT 'drug_era'
|
||||
INSERT INTO @MigrationTarget SELECT 'drug_exposure'
|
||||
INSERT INTO @MigrationTarget SELECT 'location'
|
||||
INSERT INTO @MigrationTarget SELECT 'measurement'
|
||||
INSERT INTO @MigrationTarget SELECT 'observation'
|
||||
INSERT INTO @MigrationTarget SELECT 'observation_period'
|
||||
INSERT INTO @MigrationTarget SELECT 'payer_plan_period'
|
||||
INSERT INTO @MigrationTarget SELECT 'person'
|
||||
INSERT INTO @MigrationTarget SELECT 'procedure_cost'
|
||||
INSERT INTO @MigrationTarget SELECT 'procedure_occurrence'
|
||||
INSERT INTO @MigrationTarget SELECT 'provider'
|
||||
INSERT INTO @MigrationTarget SELECT 'visit_occurrence'
|
||||
|
||||
select
|
||||
ISNULL(V4.DBName, 'No V4 Table Equivalent') as "Database Name",
|
||||
v4.TableName,
|
||||
v4.rows,
|
||||
ISNULL(V5.DBName, 'No V5 Table Equivalent') as "Database Name",
|
||||
v5.TableName,
|
||||
v5.rows,
|
||||
CASE WHEN mt.TableName IS NULL THEN 'N' ELSE 'Y' END AS "Migration Target",
|
||||
ISNULL(v5.Rows, 0) - ISNULL(v4.Rows, 0) AS "Row Count Change"
|
||||
from #v4_stats as v4
|
||||
full outer join #v5_stats as v5 ON v4.TableName = v5.TableName
|
||||
left join @MigrationTarget mt on v5.TableName = mt.TableName
|
||||
order by v5.TableName
|
||||
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,4 @@
|
|||
Common-Data-Model - Conversion from CDM v4 to CDM v5
|
||||
====================================================
|
||||
|
||||
Scrips for converting data from the Common Data Model V4 to V5.
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue