diff --git a/CodeExcerpts/DerivedTables/GenerateEra-parametized.sql b/CodeExcerpts/DerivedTables/GenerateEra-parametized.sql new file mode 100644 index 0000000..5887782 --- /dev/null +++ b/CodeExcerpts/DerivedTables/GenerateEra-parametized.sql @@ -0,0 +1,391 @@ +/********************************************************************************* +# Copyright 2017 Observational Health Data Sciences and Informatics +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +********************************************************************************/ +/******************************************************************************* + +PURPOSE: Use this script to convert your OMOP V4 common data model to CDM V5. + +last revised: Jun 2017 +authors: Patrick Ryan, Chris Knoll, Anthony Sena, Vojtech Huser + +!!!!!!!!!!!!!!!!!!!!! PLEASE READ THESE INSTRUCTIONS !!!!!!!!!!!!!!!!!!!!!!!!!!! + +This script was authored using OHDSI-SQL which will require you to run this +script through SqlRender to creat a version that is compatible with your target +RDBMS. We have pre-generated these scripts using SQL Render and have placed +them in folders for each RDBMS. Depending on which script you are viewing, your +instructions will be slightly different. + +General Assumptions +------------------- + +This script assumes that your V4 and V5 database are located on the same +RDBMS server. It also assumes that the V4 and V5 databases were created +using the standard data definition scripts for these databases. If you +altered your V4 database in any way, this script will likely require +some mo + +Getting Started +--------------- + +Before you can use this script, there are some prerequisites: + + 1. Create a target CDMv5 database on your database server using the + appropriate script from https://github.com/OHDSI/CommonDataModel + 2. Load VocabV5 into the target database/schema that will contain CDMv5 using + Athena: http://ohdsi.org/web/ATHENA + +OHDSI-SQL File Instructions +----------------------------- + + 1. Set parameter name of schema that contains CDMv4 instance + (@SOURCE_CDMV4, @SOURCE_CDMV4_SCHEMA) + 2. Set parameter name of schema that contains CDMv5 instance + (@TARGET_CDMV5, @TARGET_CDMV5_SCHEMA) + 3. Run this script through SqlRender to produce a script that will work in your + source dialect. SqlRender can be found here: https://github.com/OHDSI/SqlRender + 4. Run the script produced by SQL Render on your target RDBDMS. + + File Instructions +------------------------- + + 1. This script will hold a number of placeholders for your CDM V4 and CDMV5 + database/schema. In order to make this file work in your environment, you + should plan to do a global "FIND AND REPLACE" on this file to fill in the + file with values that pertain to your environment. The following are the + tokens you should use when doing your "FIND AND REPLACE" operation: + + a. [SOURCE_CDMV4] + b. [SOURCE_CDMV4].[SCHEMA] + c. [TARGET_CDMV5] + d. [TARGET_CDMV5].[SCHEMA] + + 2. Run the resulting script on your target RDBDMS. + +*********************************************************************************/ +/* SCRIPT PARAMETERS */ +{DEFAULT @SOURCE_CDMV4 = '[SOURCE_CDMV4]' } -- The CDMv4 database name + {DEFAULT @SOURCE_CDMV4_SCHEMA = '[SOURCE_CDMV4].[SCHEMA]' } -- The CDMv4 database plus schema + {DEFAULT @TARGET_CDMV5 = '[TARGET_CDMV5]' } -- The target CDMv5 database name + {DEFAULT @TARGET_CDMV5_SCHEMA = '[TARGET_CDMV5].[SCHEMA]' } -- the target CDMv5 database plus schema + +USE @TARGET_CDMV5; + + + +/**** + +DRUG ERA +Note: Eras derived from DRUG_EXPOSURE table, using 30d gap + + ****/ +IF OBJECT_ID('tempdb..#cteDrugTarget', 'U') IS NOT NULL + DROP TABLE #cteDrugTarget; + +/* / */ + +-- Normalize DRUG_EXPOSURE_END_DATE to either the existing drug exposure end date, or add days supply, or add 1 day to the start date +SELECT d.DRUG_EXPOSURE_ID + ,d.PERSON_ID + ,c.CONCEPT_ID + ,d.DRUG_TYPE_CONCEPT_ID + ,DRUG_EXPOSURE_START_DATE + ,COALESCE(DRUG_EXPOSURE_END_DATE, DATEADD(day, DAYS_SUPPLY, DRUG_EXPOSURE_START_DATE), DATEADD(day, 1, DRUG_EXPOSURE_START_DATE)) AS DRUG_EXPOSURE_END_DATE + ,c.CONCEPT_ID AS INGREDIENT_CONCEPT_ID +INTO #cteDrugTarget +FROM @TARGET_CDMV5_SCHEMA.DRUG_EXPOSURE d +INNER JOIN @TARGET_CDMV5_SCHEMA.CONCEPT_ANCESTOR ca ON ca.DESCENDANT_CONCEPT_ID = d.DRUG_CONCEPT_ID +INNER JOIN @TARGET_CDMV5_SCHEMA.CONCEPT c ON ca.ANCESTOR_CONCEPT_ID = c.CONCEPT_ID +WHERE c.VOCABULARY_ID = 'RxNorm' + AND c.CONCEPT_CLASS_ID = 'Ingredient'; + +/* / */ + +IF OBJECT_ID('tempdb..#cteEndDates', 'U') IS NOT NULL + DROP TABLE #cteEndDates; + +/* / */ + +SELECT PERSON_ID + ,INGREDIENT_CONCEPT_ID + ,DATEADD(day, - 30, EVENT_DATE) AS END_DATE -- unpad the end date +INTO #cteEndDates +FROM ( + SELECT E1.PERSON_ID + ,E1.INGREDIENT_CONCEPT_ID + ,E1.EVENT_DATE + ,COALESCE(E1.START_ORDINAL, MAX(E2.START_ORDINAL)) START_ORDINAL + ,E1.OVERALL_ORD + FROM ( + SELECT PERSON_ID + ,INGREDIENT_CONCEPT_ID + ,EVENT_DATE + ,EVENT_TYPE + ,START_ORDINAL + ,ROW_NUMBER() OVER ( + PARTITION BY PERSON_ID + ,INGREDIENT_CONCEPT_ID ORDER BY EVENT_DATE + ,EVENT_TYPE + ) AS OVERALL_ORD -- this re-numbers the inner UNION so all rows are numbered ordered by the event date + FROM ( + -- select the start dates, assigning a row number to each + SELECT PERSON_ID + ,INGREDIENT_CONCEPT_ID + ,DRUG_EXPOSURE_START_DATE AS EVENT_DATE + ,0 AS EVENT_TYPE + ,ROW_NUMBER() OVER ( + PARTITION BY PERSON_ID + ,INGREDIENT_CONCEPT_ID ORDER BY DRUG_EXPOSURE_START_DATE + ) AS START_ORDINAL + FROM #cteDrugTarget + + UNION ALL + + -- add the end dates with NULL as the row number, padding the end dates by 30 to allow a grace period for overlapping ranges. + SELECT PERSON_ID + ,INGREDIENT_CONCEPT_ID + ,DATEADD(day, 30, DRUG_EXPOSURE_END_DATE) + ,1 AS EVENT_TYPE + ,NULL + FROM #cteDrugTarget + ) RAWDATA + ) E1 + INNER JOIN ( + SELECT PERSON_ID + ,INGREDIENT_CONCEPT_ID + ,DRUG_EXPOSURE_START_DATE AS EVENT_DATE + ,ROW_NUMBER() OVER ( + PARTITION BY PERSON_ID + ,INGREDIENT_CONCEPT_ID ORDER BY DRUG_EXPOSURE_START_DATE + ) AS START_ORDINAL + FROM #cteDrugTarget + ) E2 ON E1.PERSON_ID = E2.PERSON_ID + AND E1.INGREDIENT_CONCEPT_ID = E2.INGREDIENT_CONCEPT_ID + AND E2.EVENT_DATE <= E1.EVENT_DATE + GROUP BY E1.PERSON_ID + ,E1.INGREDIENT_CONCEPT_ID + ,E1.EVENT_DATE + ,E1.START_ORDINAL + ,E1.OVERALL_ORD + ) E +WHERE 2 * E.START_ORDINAL - E.OVERALL_ORD = 0; + +/* / */ + +IF OBJECT_ID('tempdb..#cteDrugExpEnds', 'U') IS NOT NULL + DROP TABLE #cteDrugExpEnds; + +/* / */ + +SELECT d.PERSON_ID + ,d.INGREDIENT_CONCEPT_ID + ,d.DRUG_TYPE_CONCEPT_ID + ,d.DRUG_EXPOSURE_START_DATE + ,MIN(e.END_DATE) AS ERA_END_DATE +INTO #cteDrugExpEnds +FROM #cteDrugTarget d +INNER JOIN #cteEndDates e ON d.PERSON_ID = e.PERSON_ID + AND d.INGREDIENT_CONCEPT_ID = e.INGREDIENT_CONCEPT_ID + AND e.END_DATE >= d.DRUG_EXPOSURE_START_DATE +GROUP BY d.PERSON_ID + ,d.INGREDIENT_CONCEPT_ID + ,d.DRUG_TYPE_CONCEPT_ID + ,d.DRUG_EXPOSURE_START_DATE; + +/* / */ + +INSERT INTO @TARGET_CDMV5_SCHEMA.drug_era +SELECT row_number() OVER ( + ORDER BY person_id + ) AS drug_era_id + ,person_id + ,INGREDIENT_CONCEPT_ID + ,min(DRUG_EXPOSURE_START_DATE) AS drug_era_start_date + ,ERA_END_DATE + ,COUNT(*) AS DRUG_EXPOSURE_COUNT + ,30 AS gap_days +FROM #cteDrugExpEnds +GROUP BY person_id + ,INGREDIENT_CONCEPT_ID + ,drug_type_concept_id + ,ERA_END_DATE; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +/**** + +CONDITION ERA +Note: Eras derived from CONDITION_OCCURRENCE table, using 30d gap + + ****/ +IF OBJECT_ID('tempdb..#condition_era_phase_1', 'U') IS NOT NULL + DROP TABLE #condition_era_phase_1; + +/* / */ + +IF OBJECT_ID('tempdb..#cteConditionTarget', 'U') IS NOT NULL + DROP TABLE #cteConditionTarget; + +/* / */ + +-- create base eras from the concepts found in condition_occurrence +SELECT co.PERSON_ID + ,co.condition_concept_id + ,co.CONDITION_START_DATE + ,COALESCE(co.CONDITION_END_DATE, DATEADD(day, 1, CONDITION_START_DATE)) AS CONDITION_END_DATE +INTO #cteConditionTarget +FROM @TARGET_CDMV5_SCHEMA.CONDITION_OCCURRENCE co; + +/* / */ + +IF OBJECT_ID('tempdb..#cteCondEndDates', 'U') IS NOT NULL + DROP TABLE #cteCondEndDates; + +/* / */ + +SELECT PERSON_ID + ,CONDITION_CONCEPT_ID + ,DATEADD(day, - 30, EVENT_DATE) AS END_DATE -- unpad the end date +INTO #cteCondEndDates +FROM ( + SELECT E1.PERSON_ID + ,E1.CONDITION_CONCEPT_ID + ,E1.EVENT_DATE + ,COALESCE(E1.START_ORDINAL, MAX(E2.START_ORDINAL)) START_ORDINAL + ,E1.OVERALL_ORD + FROM ( + SELECT PERSON_ID + ,CONDITION_CONCEPT_ID + ,EVENT_DATE + ,EVENT_TYPE + ,START_ORDINAL + ,ROW_NUMBER() OVER ( + PARTITION BY PERSON_ID + ,CONDITION_CONCEPT_ID ORDER BY EVENT_DATE + ,EVENT_TYPE + ) AS OVERALL_ORD -- this re-numbers the inner UNION so all rows are numbered ordered by the event date + FROM ( + -- select the start dates, assigning a row number to each + SELECT PERSON_ID + ,CONDITION_CONCEPT_ID + ,CONDITION_START_DATE AS EVENT_DATE + ,- 1 AS EVENT_TYPE + ,ROW_NUMBER() OVER ( + PARTITION BY PERSON_ID + ,CONDITION_CONCEPT_ID ORDER BY CONDITION_START_DATE + ) AS START_ORDINAL + FROM #cteConditionTarget + + UNION ALL + + -- pad the end dates by 30 to allow a grace period for overlapping ranges. + SELECT PERSON_ID + ,CONDITION_CONCEPT_ID + ,DATEADD(day, 30, CONDITION_END_DATE) + ,1 AS EVENT_TYPE + ,NULL + FROM #cteConditionTarget + ) RAWDATA + ) E1 + INNER JOIN ( + SELECT PERSON_ID + ,CONDITION_CONCEPT_ID + ,CONDITION_START_DATE AS EVENT_DATE + ,ROW_NUMBER() OVER ( + PARTITION BY PERSON_ID + ,CONDITION_CONCEPT_ID ORDER BY CONDITION_START_DATE + ) AS START_ORDINAL + FROM #cteConditionTarget + ) E2 ON E1.PERSON_ID = E2.PERSON_ID + AND E1.CONDITION_CONCEPT_ID = E2.CONDITION_CONCEPT_ID + AND E2.EVENT_DATE <= E1.EVENT_DATE + GROUP BY E1.PERSON_ID + ,E1.CONDITION_CONCEPT_ID + ,E1.EVENT_DATE + ,E1.START_ORDINAL + ,E1.OVERALL_ORD + ) E +WHERE (2 * E.START_ORDINAL) - E.OVERALL_ORD = 0; + +/* / */ + +IF OBJECT_ID('tempdb..#cteConditionEnds', 'U') IS NOT NULL + DROP TABLE #cteConditionEnds; + +/* / */ + +SELECT c.PERSON_ID + ,c.CONDITION_CONCEPT_ID + ,c.CONDITION_START_DATE + ,MIN(e.END_DATE) AS ERA_END_DATE +INTO #cteConditionEnds +FROM #cteConditionTarget c +INNER JOIN #cteCondEndDates e ON c.PERSON_ID = e.PERSON_ID + AND c.CONDITION_CONCEPT_ID = e.CONDITION_CONCEPT_ID + AND e.END_DATE >= c.CONDITION_START_DATE +GROUP BY c.PERSON_ID + ,c.CONDITION_CONCEPT_ID + ,c.CONDITION_START_DATE; + +/* / */ + +INSERT INTO @TARGET_CDMV5_SCHEMA.condition_era ( + condition_era_id + ,person_id + ,condition_concept_id + ,condition_era_start_date + ,condition_era_end_date + ,condition_occurrence_count + ) +SELECT row_number() OVER ( + ORDER BY person_id + ) AS condition_era_id + ,person_id + ,CONDITION_CONCEPT_ID + ,min(CONDITION_START_DATE) AS CONDITION_ERA_START_DATE + ,ERA_END_DATE AS CONDITION_ERA_END_DATE + ,COUNT(*) AS CONDITION_OCCURRENCE_COUNT +FROM #cteConditionEnds +GROUP BY person_id + ,CONDITION_CONCEPT_ID + ,ERA_END_DATE; + diff --git a/CodeExcerpts/DerivedTables/TranslateSnippet.R b/CodeExcerpts/DerivedTables/TranslateSnippet.R new file mode 100644 index 0000000..307dd00 --- /dev/null +++ b/CodeExcerpts/DerivedTables/TranslateSnippet.R @@ -0,0 +1,34 @@ + + +library(SqlRender);library(RCurl) + +#specify URL of sql code written in parametized SQL (see some examples below) +# url<-'https://raw.githubusercontent.com/OHDSI/CommonDataModel/master/Version4%20To%20Version5%20Conversion/OMOP%20CDMv4%20to%20CDMv5%20-%20OHDSI-SQL.sql' +url<-'https://raw.githubusercontent.com/OHDSI/Achilles/master/inst/sql/sql_server/export_v5/drugera/sqlPrevalenceByGenderAgeYear.sql' + +#get the code +sql<-getURL(url) + + +#inspect what parameters are needed by searching for @ + +#decide your parameters +results='ccae' +vocab='public' + + +#fill in parameters +tta<-SqlRender::renderSql(sql,results_database_schema=results,vocab_database_schema=vocab) + + +#translate into target dialect + +ttb<-SqlRender::translateSql(tta$sql,targetDialect = 'postgresql') + + +#write final SQL to a local file +cat(ttb$sql,file='c:/temp/drug_era_2017.sql') +sql<-ttb$sql + + +