2019-09-17 04:08:43 +00:00
|
|
|
"""
|
2022-01-29 17:15:45 +00:00
|
|
|
Data Transport, The Phi Technology LLC
|
|
|
|
Steve L. Nyemba, steve@the-phi.com
|
2019-09-17 04:08:43 +00:00
|
|
|
|
2022-01-29 17:15:45 +00:00
|
|
|
This library is designed to serve as a wrapper to a set of supported data stores :
|
2019-09-17 04:08:43 +00:00
|
|
|
- couchdb
|
|
|
|
- mongodb
|
|
|
|
- Files (character delimited)
|
|
|
|
- Queues (RabbmitMq)
|
|
|
|
- Session (Flask)
|
|
|
|
- s3
|
2022-01-29 17:15:45 +00:00
|
|
|
- sqlite
|
2019-09-17 04:08:43 +00:00
|
|
|
The supported operations are read/write and providing meta data to the calling code
|
2024-03-28 20:34:39 +00:00
|
|
|
We separated reads from writes to mitigate accidents associated with writes.
|
|
|
|
Source Code is available under MIT License:
|
|
|
|
https://healthcareio.the-phi.com/data-transport
|
|
|
|
https://hiplab.mc.vanderbilt.edu/git/hiplab/data-transport
|
2019-09-17 04:08:43 +00:00
|
|
|
"""
|
2024-02-27 18:37:16 +00:00
|
|
|
import numpy as np
|
2024-04-01 17:52:06 +00:00
|
|
|
|
2024-04-01 21:12:04 +00:00
|
|
|
from transport import sql, nosql, cloud, other
|
2024-03-28 20:34:39 +00:00
|
|
|
import pandas as pd
|
|
|
|
import json
|
2021-11-18 21:21:26 +00:00
|
|
|
import os
|
2024-06-10 05:42:42 +00:00
|
|
|
from info import __version__,__author__,__email__,__license__,__app_name__
|
|
|
|
from transport.iowrapper import IWriter, IReader, IETL
|
2024-04-01 17:52:06 +00:00
|
|
|
from transport.plugins import PluginLoader
|
2024-04-01 20:41:39 +00:00
|
|
|
from transport import providers
|
2024-06-14 19:14:12 +00:00
|
|
|
import copy
|
|
|
|
from transport import registry
|
2024-04-01 21:04:00 +00:00
|
|
|
|
2024-03-28 20:34:39 +00:00
|
|
|
PROVIDERS = {}
|
2024-06-14 19:14:12 +00:00
|
|
|
|
2024-03-28 20:34:39 +00:00
|
|
|
def init():
|
|
|
|
global PROVIDERS
|
|
|
|
for _module in [cloud,sql,nosql,other] :
|
|
|
|
for _provider_name in dir(_module) :
|
2024-06-10 05:42:42 +00:00
|
|
|
if _provider_name.startswith('__') or _provider_name == 'common':
|
2024-03-28 20:34:39 +00:00
|
|
|
continue
|
|
|
|
PROVIDERS[_provider_name] = {'module':getattr(_module,_provider_name),'type':_module.__name__}
|
|
|
|
|
|
|
|
def instance (**_args):
|
|
|
|
"""
|
2024-06-10 05:42:42 +00:00
|
|
|
This function returns an object of to read or write from a supported database provider/vendor
|
|
|
|
@provider provider
|
|
|
|
@context read/write (default is read)
|
|
|
|
@auth_file: Optional if the database information provided is in a file. Useful for not sharing passwords
|
|
|
|
kwargs These are arguments that are provider/vendor specific
|
2024-03-28 20:34:39 +00:00
|
|
|
"""
|
|
|
|
global PROVIDERS
|
2024-06-14 20:30:09 +00:00
|
|
|
# if not registry.isloaded () :
|
|
|
|
# if ('path' in _args and registry.exists(_args['path'] )) or registry.exists():
|
|
|
|
# registry.load() if 'path' not in _args else registry.load(_args['path'])
|
|
|
|
# print ([' GOT IT'])
|
|
|
|
# if 'label' in _args and registry.isloaded():
|
|
|
|
# _info = registry.get(_args['label'])
|
|
|
|
# if _info :
|
|
|
|
# #
|
|
|
|
# _args = dict(_args,**_info)
|
|
|
|
|
2024-03-28 20:34:39 +00:00
|
|
|
if 'auth_file' in _args:
|
|
|
|
if os.path.exists(_args['auth_file']) :
|
2024-06-10 05:42:42 +00:00
|
|
|
#
|
|
|
|
# @TODO: add encryption module and decryption to enable this to be secure
|
|
|
|
#
|
|
|
|
|
2024-03-28 20:34:39 +00:00
|
|
|
f = open(_args['auth_file'])
|
2024-06-10 05:42:42 +00:00
|
|
|
#_args = dict (_args,** json.loads(f.read()) )
|
|
|
|
#
|
|
|
|
# we overrite file parameters with arguments passed
|
|
|
|
_args = dict (json.loads(f.read()),**_args )
|
2024-03-28 20:34:39 +00:00
|
|
|
f.close()
|
|
|
|
else:
|
|
|
|
filename = _args['auth_file']
|
|
|
|
raise Exception(f" {filename} was not found or is invalid")
|
2024-06-14 20:30:09 +00:00
|
|
|
if 'provider' not in _args and 'auth_file' not in _args :
|
|
|
|
if not registry.isloaded () :
|
|
|
|
if ('path' in _args and registry.exists(_args['path'] )) or registry.exists():
|
|
|
|
registry.load() if 'path' not in _args else registry.load(_args['path'])
|
|
|
|
if 'label' in _args and registry.isloaded():
|
|
|
|
_info = registry.get(_args['label'])
|
|
|
|
print(_info)
|
|
|
|
if _info :
|
|
|
|
#
|
|
|
|
_args = dict(_args,**_info)
|
|
|
|
|
2024-06-10 05:42:42 +00:00
|
|
|
if 'provider' in _args and _args['provider'] in PROVIDERS :
|
2024-03-28 20:34:39 +00:00
|
|
|
_info = PROVIDERS[_args['provider']]
|
|
|
|
_module = _info['module']
|
|
|
|
if 'context' in _args :
|
|
|
|
_context = _args['context']
|
|
|
|
else:
|
|
|
|
_context = 'read'
|
2024-04-01 17:52:06 +00:00
|
|
|
_pointer = getattr(_module,'Reader') if _context == 'read' else getattr(_module,'Writer')
|
|
|
|
_agent = _pointer (**_args)
|
|
|
|
#
|
|
|
|
loader = None
|
2024-06-10 05:42:42 +00:00
|
|
|
|
|
|
|
#
|
|
|
|
# @TODO:
|
|
|
|
# define a logger object here that will used by the wrapper
|
|
|
|
# this would allow us to know what the data-transport is doing and where/how it fails
|
|
|
|
#
|
2024-04-01 17:52:06 +00:00
|
|
|
|
2024-06-10 05:42:42 +00:00
|
|
|
# if 'plugins' in _args :
|
|
|
|
# _params = _args['plugins']
|
2024-04-01 17:52:06 +00:00
|
|
|
|
2024-06-10 05:42:42 +00:00
|
|
|
# if 'path' in _params and 'names' in _params :
|
|
|
|
# loader = PluginLoader(**_params)
|
|
|
|
# elif type(_params) == list:
|
|
|
|
# loader = PluginLoader()
|
|
|
|
# for _delegate in _params :
|
|
|
|
# loader.set(_delegate)
|
|
|
|
|
|
|
|
loader = None if 'plugins' not in _args else _args['plugins']
|
2024-04-01 17:52:06 +00:00
|
|
|
return IReader(_agent,loader) if _context == 'read' else IWriter(_agent,loader)
|
|
|
|
|
2024-03-28 20:34:39 +00:00
|
|
|
else:
|
2024-06-10 05:42:42 +00:00
|
|
|
#
|
|
|
|
# We can handle the case for an ETL object
|
|
|
|
#
|
2024-03-28 20:34:39 +00:00
|
|
|
raise Exception ("Missing or Unknown provider")
|
|
|
|
pass
|
2024-06-10 05:42:42 +00:00
|
|
|
class get :
|
|
|
|
"""
|
|
|
|
This class is just a wrapper to make the interface (API) more conversational and easy to understand
|
|
|
|
"""
|
|
|
|
@staticmethod
|
|
|
|
def reader (**_args):
|
2024-06-14 20:30:09 +00:00
|
|
|
if not _args :
|
|
|
|
_args['label'] = 'default'
|
2024-06-10 05:42:42 +00:00
|
|
|
_args['context'] = 'read'
|
|
|
|
return instance(**_args)
|
|
|
|
@staticmethod
|
|
|
|
def writer(**_args):
|
|
|
|
"""
|
|
|
|
This function is a wrapper that will return a writer to a database. It disambiguates the interface
|
|
|
|
"""
|
2024-06-14 20:30:09 +00:00
|
|
|
if not _args :
|
|
|
|
_args['label'] = 'default'
|
2024-06-10 05:42:42 +00:00
|
|
|
_args['context'] = 'write'
|
|
|
|
return instance(**_args)
|
|
|
|
@staticmethod
|
|
|
|
def etl (**_args):
|
|
|
|
if 'source' in _args and 'target' in _args :
|
|
|
|
return IETL(**_args)
|
|
|
|
else:
|
|
|
|
raise Exception ("Malformed input found, object must have both 'source' and 'target' attributes")
|
|
|
|
|
2024-03-28 20:34:39 +00:00
|
|
|
def supported ():
|
|
|
|
_info = {}
|
|
|
|
for _provider in PROVIDERS :
|
|
|
|
_item = PROVIDERS[_provider]
|
|
|
|
if _item['type'] not in _info :
|
|
|
|
_info[_item['type']] = []
|
|
|
|
_info[_item['type']].append(_provider)
|
|
|
|
_df = pd.DataFrame()
|
|
|
|
for _id in _info :
|
|
|
|
if not _df.shape[0] :
|
|
|
|
_df = pd.DataFrame(_info[_id],columns=[_id.replace('transport.','')])
|
|
|
|
else:
|
|
|
|
_df = pd.DataFrame(_info[_id],columns=[_id.replace('transport.','')]).join(_df, how='outer')
|
|
|
|
return _df.fillna('')
|
2019-09-17 04:08:43 +00:00
|
|
|
class factory :
|
2024-03-28 20:34:39 +00:00
|
|
|
pass
|
|
|
|
factory.instance = instance
|
|
|
|
init()
|