From aa2f8d0bbf02f92fb2624e462410cd7c20e6cd59 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Tue, 26 Nov 2024 11:34:18 -0600 Subject: [PATCH] notebook --- Notebook_2KCD3MVK1.zpln | 376 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 376 insertions(+) create mode 100644 Notebook_2KCD3MVK1.zpln diff --git a/Notebook_2KCD3MVK1.zpln b/Notebook_2KCD3MVK1.zpln new file mode 100644 index 0000000..3013297 --- /dev/null +++ b/Notebook_2KCD3MVK1.zpln @@ -0,0 +1,376 @@ +{ + "paragraphs": [ + { + "text": "import transport\nimport json\nimport pandas as pd\nimport numpy as np\n\nf \u003d open(\u0027/home/steve/git/mz/data/OMOP/output/5.4/omop-5.4.json\u0027)\n_omop \u003d json.loads(f.read().lower())\nf.close()\nedwr \u003d transport.get.reader(provider\u003d\u0027iceberg\u0027,catalog\u003d\u0027mz\u0027,database\u003d\u0027edw.mz\u0027,table\u003d\u0027person\u0027)\ntransport.__version__", + "user": "steve", + "dateUpdated": "2024-11-26 11:31:44.428", + "progress": 0, + "config": { + "colWidth": 6.0, + "fontSize": 13.0, + "enabled": true, + "results": {}, + "editorSetting": { + "language": "python", + "editOnDblClick": false, + "completionSupport": true + }, + "editorMode": "ace/mode/python" + }, + "settings": { + "params": {}, + "forms": {} + }, + "results": { + "code": "SUCCESS", + "msg": [ + { + "type": "TEXT", + "data": "\u00272.4.6\u0027\n" + } + ] + }, + "apps": [], + "runtimeInfos": {}, + "progressUpdateIntervalMs": 500, + "jobName": "paragraph_1730907719628_970278257", + "id": "paragraph_1730907719628_970278257", + "dateCreated": "2024-11-06 09:41:59.628", + "dateStarted": "2024-11-26 11:31:44.448", + "dateFinished": "2024-11-26 11:31:54.973", + "status": "FINISHED" + }, + { + "text": "#\n# create tables and verify table \n# edwr.apply(_omop[0].lower())\n#\n#\n# myr \u003d transport.get.reader(label\u003d\u0027mz-openmrs\u0027)\n\nsql \u003d \"\"\"\nSELECT \npatient.patient_id,\nperson.gender gender_source_value, _g.concept_id gender_concept_id,\nperson.birthdate birth_datetime,\nextract(DAY FROM person.birthdate) AS day_of_birth, extract(MONTH FROM person.birthdate) AS month_of_birth, extract(YEAR FROM person.birthdate) AS year_of_birth\nFROM openmrs.person \nINNER JOIN openmrs.patient ON patient_id \u003d person_id \nINNER JOIN edw.mz.concept _g ON _g.concept_code \u003d person.gender and _g.vocabulary_id \u003d \u0027Gender\u0027\nlimit 100\n\"\"\"\n", + "user": "anonymous", + "dateUpdated": "2024-11-19 20:09:11.196", + "progress": 0, + "config": { + "editorSetting": { + "language": "python", + "editOnDblClick": false, + "completionSupport": true + }, + "colWidth": 6.0, + "editorMode": "ace/mode/python", + "fontSize": 9.0, + "results": { + "0": { + "graph": { + "mode": "table", + "height": 300.0, + "optionOpen": false, + "setting": { + "table": { + "tableGridState": {}, + "tableColumnTypeState": { + "names": { + "SCHEMA_NAME": "string" + }, + "updated": false + }, + "tableOptionSpecHash": "[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]", + "tableOptionValue": { + "useFilter": false, + "showPagination": false, + "showAggregationFooter": false + }, + "updated": false, + "initialized": false + } + }, + "commonSetting": {} + } + } + }, + "enabled": true + }, + "settings": { + "params": {}, + "forms": {} + }, + "results": { + "code": "SUCCESS", + "msg": [] + }, + "apps": [], + "runtimeInfos": {}, + "progressUpdateIntervalMs": 500, + "jobName": "paragraph_1730909282331_624734416", + "id": "paragraph_1730909282331_624734416", + "dateCreated": "2024-11-06 10:08:02.331", + "dateStarted": "2024-11-19 20:09:11.198", + "dateFinished": "2024-11-19 20:09:12.053", + "status": "FINISHED" + }, + { + "text": "_omop[0]", + "user": "anonymous", + "dateUpdated": "2024-11-19 21:44:14.829", + "progress": 0, + "config": { + "colWidth": 12.0, + "fontSize": 9.0, + "enabled": true, + "results": {}, + "editorSetting": { + "language": "python", + "editOnDblClick": false, + "completionSupport": true + }, + "editorMode": "ace/mode/python" + }, + "settings": { + "params": {}, + "forms": {} + }, + "results": { + "code": "SUCCESS", + "msg": [ + { + "type": "TEXT", + "data": "\u0027create table edw.mz.person ( person_id integer not null, gender_concept_id integer not null, year_of_birth integer not null, month_of_birth integer , day_of_birth integer , birth_datetime timestamp , race_concept_id integer not null, ethnicity_concept_id integer not null, location_id integer , provider_id integer , care_site_id integer , person_source_value varchar(50) , gender_source_value varchar(50) , gender_source_concept_id integer , race_source_value varchar(50) , race_source_concept_id integer , ethnicity_source_value varchar(50) , ethnicity_source_concept_id integer ) using iceberg partitioned by (person_id)\u0027\n" + } + ] + }, + "apps": [], + "runtimeInfos": {}, + "progressUpdateIntervalMs": 500, + "jobName": "paragraph_1732035111985_1405533519", + "id": "paragraph_1732035111985_1405533519", + "dateCreated": "2024-11-19 10:51:51.986", + "dateStarted": "2024-11-19 21:44:14.833", + "dateFinished": "2024-11-19 21:44:14.976", + "status": "FINISHED" + }, + { + "text": "import pandas as pd\nimport numpy as np\n\n_schema \u003d edwr.meta(table\u003d\u0027person\u0027)\ndef _add(_data):\n print (\u0027***************\u0027)\n _attr \u003d [_item[\u0027name\u0027] for _item in _schema]\n _xattr\u003d list( set(_attr) - set(_data.columns) )\n return pd.concat([_data,pd.DataFrame(columns\u003d_xattr)])\n\n# dreader \u003d transport.get.reader(provider\u003d\u0027drill\u0027,database\u003d\u0027edw.mz\u0027)\n# _df \u003d dreader.read(sql\u003dsql)\n_conf \u003d {\u0027source\u0027:{\u0027provider\u0027:\u0027drill\u0027,\u0027database\u0027:\u0027edw.mz\u0027,\u0027chunksize\u0027:50,\u0027plugins\u0027:[_add],\u0027args\u0027:{\u0027sql\u0027:sql}},\u0027target\u0027:{\u0027provider\u0027:\u0027iceberg\u0027,\u0027database\u0027:\u0027edw.mz\u0027,\u0027table\u0027:\u0027persona\u0027}}\n_etl \u003d transport.get.etl(**_conf)\n_etl.run()\n# for row in _df:\n# print (row)\n# break\n", + "user": "anonymous", + "dateUpdated": "2024-11-19 20:09:16.461", + "progress": 0, + "config": { + "colWidth": 6.0, + "fontSize": 9.0, + "enabled": true, + "results": { + "0": { + "graph": { + "mode": "table", + "height": 300.0, + "optionOpen": false, + "setting": { + "table": { + "tableGridState": {}, + "tableColumnTypeState": { + "names": { + "person_id": "string", + "gender_concept_id": "string", + "year_of_birth": "string", + "month_of_birth": "string", + "day_of_birth": "string", + "birth_datetime": "string", + "race_concept_id": "string", + "ethnicity_concept_id": "string", + "location_id": "string", + "provider_id": "string", + "care_site_id": "string", + "person_source_value": "string", + "gender_source_value": "string", + "gender_source_concept_id": "string", + "race_source_value": "string", + "race_source_concept_id": "string", + "ethnicity_source_value": "string", + "ethnicity_source_concept_id": "string" + }, + "updated": false + }, + "tableOptionSpecHash": "[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]", + "tableOptionValue": { + "useFilter": false, + "showPagination": false, + "showAggregationFooter": false + }, + "updated": false, + "initialized": false + } + }, + "commonSetting": {} + } + } + }, + "editorSetting": { + "language": "python", + "editOnDblClick": false, + "completionSupport": true, + "completionKey": "TAB" + }, + "editorMode": "ace/mode/python" + }, + "settings": { + "params": {}, + "forms": {} + }, + "results": { + "code": "ERROR", + "msg": [ + { + "type": "TEXT", + "data": "Fail to execute line 14: _etl \u003d transport.get.etl(**_conf)\nTraceback (most recent call last):\n File \"/tmp/python12116617813075626551/zeppelin_python.py\", line 162, in \u003cmodule\u003e\n exec(code, _zcUserQueryNameSpace)\n File \"\u003cstdin\u003e\", line 14, in \u003cmodule\u003e\n File \"/data/sdk/python/3.9/lib/python3.9/site-packages/transport/__init__.py\", line 189, in etl\n return IETL(**_args)\n File \"/data/sdk/python/3.9/lib/python3.9/site-packages/transport/iowrapper.py\", line 173, in __init__\n super().__init__(transport.get.reader(**_source),_plugins)\n File \"/data/sdk/python/3.9/lib/python3.9/site-packages/transport/__init__.py\", line 156, in reader\n _handler \u003d instance(**_args)\n File \"/data/sdk/python/3.9/lib/python3.9/site-packages/transport/__init__.py\", line 135, in instance\n _datatransport \u003d IReader(_agent,_plugins,_logger) if _context \u003d\u003d \u0027read\u0027 else IWriter(_agent,_plugins,_logger)\n File \"/data/sdk/python/3.9/lib/python3.9/site-packages/transport/iowrapper.py\", line 98, in __init__\n super().__init__(_agent,_plugins,_logger)\n File \"/data/sdk/python/3.9/lib/python3.9/site-packages/transport/iowrapper.py\", line 41, in __init__\n self._init_plugins(plugins)\n File \"/data/sdk/python/3.9/lib/python3.9/site-packages/transport/iowrapper.py\", line 60, in _init_plugins\n self.log(action\u003d\u0027init-plugins\u0027,caller\u003d\u0027read\u0027, input \u003d[_name for _name in _items])\n File \"/data/sdk/python/3.9/lib/python3.9/site-packages/transport/iowrapper.py\", line 51, in log\n _data[key] \u003d str(_data[key]) if type(_data[key]) not in [list,dict] else json.dumps(_data[key])\n File \"/data/sdk/python/3.9/lib/python3.9/json/__init__.py\", line 231, in dumps\n return _default_encoder.encode(obj)\n File \"/data/sdk/python/3.9/lib/python3.9/json/encoder.py\", line 199, in encode\n chunks \u003d self.iterencode(o, _one_shot\u003dTrue)\n File \"/data/sdk/python/3.9/lib/python3.9/json/encoder.py\", line 257, in iterencode\n return _iterencode(o, 0)\n File \"/data/sdk/python/3.9/lib/python3.9/json/encoder.py\", line 179, in default\n raise TypeError(f\u0027Object of type {o.__class__.__name__} \u0027\nTypeError: Object of type function is not JSON serializable\n" + } + ] + }, + "apps": [], + "runtimeInfos": {}, + "progressUpdateIntervalMs": 500, + "jobName": "paragraph_1730911272293_1687589480", + "id": "paragraph_1730911272293_1687589480", + "dateCreated": "2024-11-06 10:41:12.293", + "dateStarted": "2024-11-19 20:09:16.463", + "dateFinished": "2024-11-19 20:09:18.297", + "status": "ERROR" + }, + { + "text": "%drill\n\n-- SELECT \n-- patient_id,\n-- gender, _g.concept_id gender_concept_id,\n-- birthdate birth_datetime\n-- FROM openmrs.person INNER JOIN openmrs.patient ON patient_id \u003d person_id \n-- INNER JOIN edw.mz.concept _g ON _g.concept_code \u003d gender and _g.vocabulary_id \u003d \u0027Gender\u0027\n-- limit 10\nselect * from edw.mz.persona limit 10\n\n", + "user": "anonymous", + "dateUpdated": "2024-11-19 16:10:34.717", + "progress": 0, + "config": { + "colWidth": 6.0, + "fontSize": 12.0, + "enabled": true, + "results": { + "0": { + "graph": { + "mode": "table", + "height": 300.0, + "optionOpen": false, + "setting": { + "table": { + "tableGridState": {}, + "tableColumnTypeState": { + "names": { + "patient_id": "string", + "gender_source_value": "string", + "gender_concept_id": "string", + "birth_datetime": "string", + "day_of_birth": "string", + "month_of_birth": "string", + "year_of_birth": "string" + }, + "updated": false + }, + "tableOptionSpecHash": "[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]", + "tableOptionValue": { + "useFilter": false, + "showPagination": false, + "showAggregationFooter": false + }, + "updated": false, + "initialized": false + } + }, + "commonSetting": {} + } + }, + "1": { + "graph": { + "mode": "table", + "height": 300.0, + "optionOpen": false, + "setting": { + "table": { + "tableGridState": {}, + "tableColumnTypeState": { + "names": { + "person_attribute_id": "string", + "person_id": "string", + "value": "string", + "person_attribute_type_id": "string", + "creator": "string", + "date_created": "string", + "changed_by": "string", + "date_changed": "string", + "voided": "string", + "voided_by": "string", + "date_voided": "string", + "void_reason": "string", + "uuid": "string" + }, + "updated": false + }, + "tableOptionSpecHash": "[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]", + "tableOptionValue": { + "useFilter": false, + "showPagination": false, + "showAggregationFooter": false + }, + "updated": false, + "initialized": false + } + }, + "commonSetting": {} + } + } + }, + "editorSetting": { + "language": "sql", + "editOnDblClick": false, + "completionSupport": true + }, + "editorMode": "ace/mode/sql", + "lineNumbers": true + }, + "settings": { + "params": {}, + "forms": {} + }, + "results": { + "code": "SUCCESS", + "msg": [ + { + "type": "TABLE", + "data": "patient_id\tgender_source_value\tgender_concept_id\tbirth_datetime\tday_of_birth\tmonth_of_birth\tyear_of_birth\n6633\tM\t8507\t1989-11-11\t11\t11\t1989\n6634\tM\t8507\t1979-06-03\t3\t6\t1979\n6635\tF\t8532\t1977-06-24\t24\t6\t1977\n6636\tF\t8532\t1980-06-23\t23\t6\t1980\n6637\tF\t8532\t1989-09-19\t19\t9\t1989\n6638\tM\t8507\t1985-07-14\t14\t7\t1985\n6639\tM\t8507\t1978-11-02\t2\t11\t1978\n6640\tF\t8532\t1982-05-21\t21\t5\t1982\n6641\tF\t8532\t2009-11-29\t29\t11\t2009\n6642\tM\t8507\t1979-08-05\t5\t8\t1979\n" + } + ] + }, + "apps": [], + "runtimeInfos": {}, + "progressUpdateIntervalMs": 500, + "jobName": "paragraph_1731007189281_974026699", + "id": "paragraph_1731007189281_974026699", + "dateCreated": "2024-11-07 13:19:49.281", + "dateStarted": "2024-11-19 16:10:33.845", + "dateFinished": "2024-11-19 16:10:34.607", + "status": "FINISHED" + }, + { + "text": "%drill\n", + "user": "anonymous", + "dateUpdated": "2024-11-19 10:52:57.800", + "progress": 0, + "config": {}, + "settings": { + "params": {}, + "forms": {} + }, + "apps": [], + "runtimeInfos": {}, + "progressUpdateIntervalMs": 500, + "jobName": "paragraph_1732035177800_906553724", + "id": "paragraph_1732035177800_906553724", + "dateCreated": "2024-11-19 10:52:57.800", + "status": "READY" + } + ], + "name": "Notebook", + "id": "2KCD3MVK1", + "defaultInterpreterGroup": "python", + "version": "0.11.1", + "noteParams": {}, + "noteForms": {}, + "angularObjects": {}, + "config": { + "isZeppelinNotebookCronEnable": false + }, + "info": {} +} \ No newline at end of file