This commit is contained in:
Steve Nyemba 2024-11-26 11:34:18 -06:00
commit aa2f8d0bbf
1 changed files with 376 additions and 0 deletions

376
Notebook_2KCD3MVK1.zpln Normal file
View File

@ -0,0 +1,376 @@
{
"paragraphs": [
{
"text": "import transport\nimport json\nimport pandas as pd\nimport numpy as np\n\nf \u003d open(\u0027/home/steve/git/mz/data/OMOP/output/5.4/omop-5.4.json\u0027)\n_omop \u003d json.loads(f.read().lower())\nf.close()\nedwr \u003d transport.get.reader(provider\u003d\u0027iceberg\u0027,catalog\u003d\u0027mz\u0027,database\u003d\u0027edw.mz\u0027,table\u003d\u0027person\u0027)\ntransport.__version__",
"user": "steve",
"dateUpdated": "2024-11-26 11:31:44.428",
"progress": 0,
"config": {
"colWidth": 6.0,
"fontSize": 13.0,
"enabled": true,
"results": {},
"editorSetting": {
"language": "python",
"editOnDblClick": false,
"completionSupport": true
},
"editorMode": "ace/mode/python"
},
"settings": {
"params": {},
"forms": {}
},
"results": {
"code": "SUCCESS",
"msg": [
{
"type": "TEXT",
"data": "\u00272.4.6\u0027\n"
}
]
},
"apps": [],
"runtimeInfos": {},
"progressUpdateIntervalMs": 500,
"jobName": "paragraph_1730907719628_970278257",
"id": "paragraph_1730907719628_970278257",
"dateCreated": "2024-11-06 09:41:59.628",
"dateStarted": "2024-11-26 11:31:44.448",
"dateFinished": "2024-11-26 11:31:54.973",
"status": "FINISHED"
},
{
"text": "#\n# create tables and verify table \n# edwr.apply(_omop[0].lower())\n#\n#\n# myr \u003d transport.get.reader(label\u003d\u0027mz-openmrs\u0027)\n\nsql \u003d \"\"\"\nSELECT \npatient.patient_id,\nperson.gender gender_source_value, _g.concept_id gender_concept_id,\nperson.birthdate birth_datetime,\nextract(DAY FROM person.birthdate) AS day_of_birth, extract(MONTH FROM person.birthdate) AS month_of_birth, extract(YEAR FROM person.birthdate) AS year_of_birth\nFROM openmrs.person \nINNER JOIN openmrs.patient ON patient_id \u003d person_id \nINNER JOIN edw.mz.concept _g ON _g.concept_code \u003d person.gender and _g.vocabulary_id \u003d \u0027Gender\u0027\nlimit 100\n\"\"\"\n",
"user": "anonymous",
"dateUpdated": "2024-11-19 20:09:11.196",
"progress": 0,
"config": {
"editorSetting": {
"language": "python",
"editOnDblClick": false,
"completionSupport": true
},
"colWidth": 6.0,
"editorMode": "ace/mode/python",
"fontSize": 9.0,
"results": {
"0": {
"graph": {
"mode": "table",
"height": 300.0,
"optionOpen": false,
"setting": {
"table": {
"tableGridState": {},
"tableColumnTypeState": {
"names": {
"SCHEMA_NAME": "string"
},
"updated": false
},
"tableOptionSpecHash": "[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]",
"tableOptionValue": {
"useFilter": false,
"showPagination": false,
"showAggregationFooter": false
},
"updated": false,
"initialized": false
}
},
"commonSetting": {}
}
}
},
"enabled": true
},
"settings": {
"params": {},
"forms": {}
},
"results": {
"code": "SUCCESS",
"msg": []
},
"apps": [],
"runtimeInfos": {},
"progressUpdateIntervalMs": 500,
"jobName": "paragraph_1730909282331_624734416",
"id": "paragraph_1730909282331_624734416",
"dateCreated": "2024-11-06 10:08:02.331",
"dateStarted": "2024-11-19 20:09:11.198",
"dateFinished": "2024-11-19 20:09:12.053",
"status": "FINISHED"
},
{
"text": "_omop[0]",
"user": "anonymous",
"dateUpdated": "2024-11-19 21:44:14.829",
"progress": 0,
"config": {
"colWidth": 12.0,
"fontSize": 9.0,
"enabled": true,
"results": {},
"editorSetting": {
"language": "python",
"editOnDblClick": false,
"completionSupport": true
},
"editorMode": "ace/mode/python"
},
"settings": {
"params": {},
"forms": {}
},
"results": {
"code": "SUCCESS",
"msg": [
{
"type": "TEXT",
"data": "\u0027create table edw.mz.person ( person_id integer not null, gender_concept_id integer not null, year_of_birth integer not null, month_of_birth integer , day_of_birth integer , birth_datetime timestamp , race_concept_id integer not null, ethnicity_concept_id integer not null, location_id integer , provider_id integer , care_site_id integer , person_source_value varchar(50) , gender_source_value varchar(50) , gender_source_concept_id integer , race_source_value varchar(50) , race_source_concept_id integer , ethnicity_source_value varchar(50) , ethnicity_source_concept_id integer ) using iceberg partitioned by (person_id)\u0027\n"
}
]
},
"apps": [],
"runtimeInfos": {},
"progressUpdateIntervalMs": 500,
"jobName": "paragraph_1732035111985_1405533519",
"id": "paragraph_1732035111985_1405533519",
"dateCreated": "2024-11-19 10:51:51.986",
"dateStarted": "2024-11-19 21:44:14.833",
"dateFinished": "2024-11-19 21:44:14.976",
"status": "FINISHED"
},
{
"text": "import pandas as pd\nimport numpy as np\n\n_schema \u003d edwr.meta(table\u003d\u0027person\u0027)\ndef _add(_data):\n print (\u0027***************\u0027)\n _attr \u003d [_item[\u0027name\u0027] for _item in _schema]\n _xattr\u003d list( set(_attr) - set(_data.columns) )\n return pd.concat([_data,pd.DataFrame(columns\u003d_xattr)])\n\n# dreader \u003d transport.get.reader(provider\u003d\u0027drill\u0027,database\u003d\u0027edw.mz\u0027)\n# _df \u003d dreader.read(sql\u003dsql)\n_conf \u003d {\u0027source\u0027:{\u0027provider\u0027:\u0027drill\u0027,\u0027database\u0027:\u0027edw.mz\u0027,\u0027chunksize\u0027:50,\u0027plugins\u0027:[_add],\u0027args\u0027:{\u0027sql\u0027:sql}},\u0027target\u0027:{\u0027provider\u0027:\u0027iceberg\u0027,\u0027database\u0027:\u0027edw.mz\u0027,\u0027table\u0027:\u0027persona\u0027}}\n_etl \u003d transport.get.etl(**_conf)\n_etl.run()\n# for row in _df:\n# print (row)\n# break\n",
"user": "anonymous",
"dateUpdated": "2024-11-19 20:09:16.461",
"progress": 0,
"config": {
"colWidth": 6.0,
"fontSize": 9.0,
"enabled": true,
"results": {
"0": {
"graph": {
"mode": "table",
"height": 300.0,
"optionOpen": false,
"setting": {
"table": {
"tableGridState": {},
"tableColumnTypeState": {
"names": {
"person_id": "string",
"gender_concept_id": "string",
"year_of_birth": "string",
"month_of_birth": "string",
"day_of_birth": "string",
"birth_datetime": "string",
"race_concept_id": "string",
"ethnicity_concept_id": "string",
"location_id": "string",
"provider_id": "string",
"care_site_id": "string",
"person_source_value": "string",
"gender_source_value": "string",
"gender_source_concept_id": "string",
"race_source_value": "string",
"race_source_concept_id": "string",
"ethnicity_source_value": "string",
"ethnicity_source_concept_id": "string"
},
"updated": false
},
"tableOptionSpecHash": "[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]",
"tableOptionValue": {
"useFilter": false,
"showPagination": false,
"showAggregationFooter": false
},
"updated": false,
"initialized": false
}
},
"commonSetting": {}
}
}
},
"editorSetting": {
"language": "python",
"editOnDblClick": false,
"completionSupport": true,
"completionKey": "TAB"
},
"editorMode": "ace/mode/python"
},
"settings": {
"params": {},
"forms": {}
},
"results": {
"code": "ERROR",
"msg": [
{
"type": "TEXT",
"data": "Fail to execute line 14: _etl \u003d transport.get.etl(**_conf)\nTraceback (most recent call last):\n File \"/tmp/python12116617813075626551/zeppelin_python.py\", line 162, in \u003cmodule\u003e\n exec(code, _zcUserQueryNameSpace)\n File \"\u003cstdin\u003e\", line 14, in \u003cmodule\u003e\n File \"/data/sdk/python/3.9/lib/python3.9/site-packages/transport/__init__.py\", line 189, in etl\n return IETL(**_args)\n File \"/data/sdk/python/3.9/lib/python3.9/site-packages/transport/iowrapper.py\", line 173, in __init__\n super().__init__(transport.get.reader(**_source),_plugins)\n File \"/data/sdk/python/3.9/lib/python3.9/site-packages/transport/__init__.py\", line 156, in reader\n _handler \u003d instance(**_args)\n File \"/data/sdk/python/3.9/lib/python3.9/site-packages/transport/__init__.py\", line 135, in instance\n _datatransport \u003d IReader(_agent,_plugins,_logger) if _context \u003d\u003d \u0027read\u0027 else IWriter(_agent,_plugins,_logger)\n File \"/data/sdk/python/3.9/lib/python3.9/site-packages/transport/iowrapper.py\", line 98, in __init__\n super().__init__(_agent,_plugins,_logger)\n File \"/data/sdk/python/3.9/lib/python3.9/site-packages/transport/iowrapper.py\", line 41, in __init__\n self._init_plugins(plugins)\n File \"/data/sdk/python/3.9/lib/python3.9/site-packages/transport/iowrapper.py\", line 60, in _init_plugins\n self.log(action\u003d\u0027init-plugins\u0027,caller\u003d\u0027read\u0027, input \u003d[_name for _name in _items])\n File \"/data/sdk/python/3.9/lib/python3.9/site-packages/transport/iowrapper.py\", line 51, in log\n _data[key] \u003d str(_data[key]) if type(_data[key]) not in [list,dict] else json.dumps(_data[key])\n File \"/data/sdk/python/3.9/lib/python3.9/json/__init__.py\", line 231, in dumps\n return _default_encoder.encode(obj)\n File \"/data/sdk/python/3.9/lib/python3.9/json/encoder.py\", line 199, in encode\n chunks \u003d self.iterencode(o, _one_shot\u003dTrue)\n File \"/data/sdk/python/3.9/lib/python3.9/json/encoder.py\", line 257, in iterencode\n return _iterencode(o, 0)\n File \"/data/sdk/python/3.9/lib/python3.9/json/encoder.py\", line 179, in default\n raise TypeError(f\u0027Object of type {o.__class__.__name__} \u0027\nTypeError: Object of type function is not JSON serializable\n"
}
]
},
"apps": [],
"runtimeInfos": {},
"progressUpdateIntervalMs": 500,
"jobName": "paragraph_1730911272293_1687589480",
"id": "paragraph_1730911272293_1687589480",
"dateCreated": "2024-11-06 10:41:12.293",
"dateStarted": "2024-11-19 20:09:16.463",
"dateFinished": "2024-11-19 20:09:18.297",
"status": "ERROR"
},
{
"text": "%drill\n\n-- SELECT \n-- patient_id,\n-- gender, _g.concept_id gender_concept_id,\n-- birthdate birth_datetime\n-- FROM openmrs.person INNER JOIN openmrs.patient ON patient_id \u003d person_id \n-- INNER JOIN edw.mz.concept _g ON _g.concept_code \u003d gender and _g.vocabulary_id \u003d \u0027Gender\u0027\n-- limit 10\nselect * from edw.mz.persona limit 10\n\n",
"user": "anonymous",
"dateUpdated": "2024-11-19 16:10:34.717",
"progress": 0,
"config": {
"colWidth": 6.0,
"fontSize": 12.0,
"enabled": true,
"results": {
"0": {
"graph": {
"mode": "table",
"height": 300.0,
"optionOpen": false,
"setting": {
"table": {
"tableGridState": {},
"tableColumnTypeState": {
"names": {
"patient_id": "string",
"gender_source_value": "string",
"gender_concept_id": "string",
"birth_datetime": "string",
"day_of_birth": "string",
"month_of_birth": "string",
"year_of_birth": "string"
},
"updated": false
},
"tableOptionSpecHash": "[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]",
"tableOptionValue": {
"useFilter": false,
"showPagination": false,
"showAggregationFooter": false
},
"updated": false,
"initialized": false
}
},
"commonSetting": {}
}
},
"1": {
"graph": {
"mode": "table",
"height": 300.0,
"optionOpen": false,
"setting": {
"table": {
"tableGridState": {},
"tableColumnTypeState": {
"names": {
"person_attribute_id": "string",
"person_id": "string",
"value": "string",
"person_attribute_type_id": "string",
"creator": "string",
"date_created": "string",
"changed_by": "string",
"date_changed": "string",
"voided": "string",
"voided_by": "string",
"date_voided": "string",
"void_reason": "string",
"uuid": "string"
},
"updated": false
},
"tableOptionSpecHash": "[{\"name\":\"useFilter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable filter for columns\"},{\"name\":\"showPagination\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable pagination for better navigation\"},{\"name\":\"showAggregationFooter\",\"valueType\":\"boolean\",\"defaultValue\":false,\"widget\":\"checkbox\",\"description\":\"Enable a footer for displaying aggregated values\"}]",
"tableOptionValue": {
"useFilter": false,
"showPagination": false,
"showAggregationFooter": false
},
"updated": false,
"initialized": false
}
},
"commonSetting": {}
}
}
},
"editorSetting": {
"language": "sql",
"editOnDblClick": false,
"completionSupport": true
},
"editorMode": "ace/mode/sql",
"lineNumbers": true
},
"settings": {
"params": {},
"forms": {}
},
"results": {
"code": "SUCCESS",
"msg": [
{
"type": "TABLE",
"data": "patient_id\tgender_source_value\tgender_concept_id\tbirth_datetime\tday_of_birth\tmonth_of_birth\tyear_of_birth\n6633\tM\t8507\t1989-11-11\t11\t11\t1989\n6634\tM\t8507\t1979-06-03\t3\t6\t1979\n6635\tF\t8532\t1977-06-24\t24\t6\t1977\n6636\tF\t8532\t1980-06-23\t23\t6\t1980\n6637\tF\t8532\t1989-09-19\t19\t9\t1989\n6638\tM\t8507\t1985-07-14\t14\t7\t1985\n6639\tM\t8507\t1978-11-02\t2\t11\t1978\n6640\tF\t8532\t1982-05-21\t21\t5\t1982\n6641\tF\t8532\t2009-11-29\t29\t11\t2009\n6642\tM\t8507\t1979-08-05\t5\t8\t1979\n"
}
]
},
"apps": [],
"runtimeInfos": {},
"progressUpdateIntervalMs": 500,
"jobName": "paragraph_1731007189281_974026699",
"id": "paragraph_1731007189281_974026699",
"dateCreated": "2024-11-07 13:19:49.281",
"dateStarted": "2024-11-19 16:10:33.845",
"dateFinished": "2024-11-19 16:10:34.607",
"status": "FINISHED"
},
{
"text": "%drill\n",
"user": "anonymous",
"dateUpdated": "2024-11-19 10:52:57.800",
"progress": 0,
"config": {},
"settings": {
"params": {},
"forms": {}
},
"apps": [],
"runtimeInfos": {},
"progressUpdateIntervalMs": 500,
"jobName": "paragraph_1732035177800_906553724",
"id": "paragraph_1732035177800_906553724",
"dateCreated": "2024-11-19 10:52:57.800",
"status": "READY"
}
],
"name": "Notebook",
"id": "2KCD3MVK1",
"defaultInterpreterGroup": "python",
"version": "0.11.1",
"noteParams": {},
"noteForms": {},
"angularObjects": {},
"config": {
"isZeppelinNotebookCronEnable": false
},
"info": {}
}