2019-09-17 04:08:43 +00:00
|
|
|
"""
|
2022-01-29 17:15:45 +00:00
|
|
|
Data Transport, The Phi Technology LLC
|
|
|
|
Steve L. Nyemba, steve@the-phi.com
|
2019-09-17 04:08:43 +00:00
|
|
|
|
2022-01-29 17:15:45 +00:00
|
|
|
This library is designed to serve as a wrapper to a set of supported data stores :
|
2019-09-17 04:08:43 +00:00
|
|
|
- couchdb
|
|
|
|
- mongodb
|
|
|
|
- Files (character delimited)
|
|
|
|
- Queues (RabbmitMq)
|
|
|
|
- Session (Flask)
|
|
|
|
- s3
|
2022-01-29 17:15:45 +00:00
|
|
|
- sqlite
|
2019-09-17 04:08:43 +00:00
|
|
|
The supported operations are read/write and providing meta data to the calling code
|
|
|
|
Requirements :
|
|
|
|
pymongo
|
|
|
|
boto
|
|
|
|
couldant
|
|
|
|
The configuration for the data-store is as follows :
|
2022-01-29 17:15:45 +00:00
|
|
|
e.g:
|
|
|
|
mongodb
|
|
|
|
provider:'mongodb',[port:27017],[host:localhost],db:<name>,doc:<_name>,context:<read|write>
|
2019-09-17 04:08:43 +00:00
|
|
|
"""
|
|
|
|
__author__ = 'The Phi Technology'
|
2023-07-09 21:37:30 +00:00
|
|
|
__version__= '1.7.8'
|
2021-07-08 22:31:29 +00:00
|
|
|
import pandas as pd
|
|
|
|
import numpy as np
|
2019-09-17 04:08:43 +00:00
|
|
|
import json
|
|
|
|
import importlib
|
2019-11-05 03:51:20 +00:00
|
|
|
import sys
|
2022-03-03 22:08:24 +00:00
|
|
|
import sqlalchemy
|
2019-11-05 03:51:20 +00:00
|
|
|
if sys.version_info[0] > 2 :
|
2022-04-11 23:34:32 +00:00
|
|
|
from transport.common import Reader, Writer,Console #, factory
|
|
|
|
from transport import disk
|
2020-05-18 02:57:18 +00:00
|
|
|
|
2022-04-11 23:34:32 +00:00
|
|
|
from transport import s3 as s3
|
|
|
|
from transport import rabbitmq as queue
|
|
|
|
from transport import couch as couch
|
|
|
|
from transport import mongo as mongo
|
|
|
|
from transport import sql as sql
|
|
|
|
from transport import etl as etl
|
2019-11-05 03:51:20 +00:00
|
|
|
else:
|
2022-04-11 23:34:32 +00:00
|
|
|
from common import Reader, Writer,Console #, factory
|
|
|
|
import disk
|
|
|
|
import queue
|
|
|
|
import couch
|
|
|
|
import mongo
|
|
|
|
import s3
|
|
|
|
import sql
|
|
|
|
import etl
|
2021-11-18 21:21:26 +00:00
|
|
|
import psycopg2 as pg
|
|
|
|
import mysql.connector as my
|
|
|
|
from google.cloud import bigquery as bq
|
|
|
|
import nzpy as nz #--- netezza drivers
|
|
|
|
import os
|
2019-11-05 03:51:20 +00:00
|
|
|
|
2022-08-10 14:17:30 +00:00
|
|
|
class providers :
|
|
|
|
POSTGRESQL = 'postgresql'
|
|
|
|
MONGODB = 'mongodb'
|
2023-05-25 14:39:51 +00:00
|
|
|
|
2022-08-10 14:17:30 +00:00
|
|
|
BIGQUERY ='bigquery'
|
|
|
|
FILE = 'file'
|
|
|
|
ETL = 'etl'
|
|
|
|
SQLITE = 'sqlite'
|
2022-09-19 15:01:34 +00:00
|
|
|
SQLITE3= 'sqlite'
|
2022-08-10 14:17:30 +00:00
|
|
|
REDSHIFT = 'redshift'
|
|
|
|
NETEZZA = 'netezza'
|
|
|
|
MYSQL = 'mysql'
|
|
|
|
RABBITMQ = 'rabbitmq'
|
|
|
|
MARIADB = 'mariadb'
|
|
|
|
COUCHDB = 'couch'
|
|
|
|
CONSOLE = 'console'
|
|
|
|
ETL = 'etl'
|
|
|
|
#
|
|
|
|
# synonyms of the above
|
|
|
|
BQ = BIGQUERY
|
|
|
|
MONGO = MONGODB
|
2023-05-25 14:39:51 +00:00
|
|
|
FERRETDB= MONGODB
|
2022-08-10 14:17:30 +00:00
|
|
|
PG = POSTGRESQL
|
|
|
|
PSQL = POSTGRESQL
|
2023-05-25 14:39:51 +00:00
|
|
|
PGSQL = POSTGRESQL
|
2021-12-09 21:25:58 +00:00
|
|
|
|
2022-08-10 14:17:30 +00:00
|
|
|
class IEncoder (json.JSONEncoder):
|
|
|
|
def default (self,object):
|
|
|
|
if type(object) == np.integer :
|
|
|
|
return int(object)
|
|
|
|
elif type(object) == np.floating:
|
|
|
|
return float(object)
|
|
|
|
elif type(object) == np.ndarray :
|
|
|
|
return object.tolist()
|
|
|
|
else:
|
|
|
|
return super(IEncoder,self).default(object)
|
2019-09-17 04:08:43 +00:00
|
|
|
class factory :
|
2021-11-18 21:21:26 +00:00
|
|
|
TYPE = {"sql":{"providers":["postgresql","mysql","neteeza","bigquery","mariadb","redshift"]}}
|
|
|
|
PROVIDERS = {
|
2022-05-16 16:27:36 +00:00
|
|
|
"etl":{"class":{"read":etl.instance,"write":etl.instance}},
|
2022-04-11 23:34:32 +00:00
|
|
|
"console":{"class":{"write":Console,"read":Console}},
|
2021-11-18 21:21:26 +00:00
|
|
|
"file":{"class":{"read":disk.DiskReader,"write":disk.DiskWriter}},
|
|
|
|
"sqlite":{"class":{"read":disk.SQLiteReader,"write":disk.SQLiteWriter}},
|
2022-06-25 19:00:22 +00:00
|
|
|
"postgresql":{"port":5432,"host":"localhost","database":None,"driver":pg,"default":{"type":"VARCHAR"},"class":{"read":sql.SQLReader,"write":sql.SQLWriter}},
|
|
|
|
"redshift":{"port":5432,"host":"localhost","database":None,"driver":pg,"default":{"type":"VARCHAR"},"class":{"read":sql.SQLReader,"write":sql.SQLWriter}},
|
2021-11-18 21:21:26 +00:00
|
|
|
"bigquery":{"class":{"read":sql.BQReader,"write":sql.BQWriter}},
|
2022-05-16 16:27:36 +00:00
|
|
|
"mysql":{"port":3306,"host":"localhost","default":{"type":"VARCHAR(256)"},"driver":my,"class":{"read":sql.SQLReader,"write":sql.SQLWriter}},
|
|
|
|
"mariadb":{"port":3306,"host":"localhost","default":{"type":"VARCHAR(256)"},"driver":my,"class":{"read":sql.SQLReader,"write":sql.SQLWriter}},
|
2021-12-09 21:25:58 +00:00
|
|
|
"mongo":{"port":27017,"host":"localhost","class":{"read":mongo.MongoReader,"write":mongo.MongoWriter}},
|
|
|
|
"couch":{"port":5984,"host":"localhost","class":{"read":couch.CouchReader,"write":couch.CouchWriter}},
|
2022-05-16 16:27:36 +00:00
|
|
|
"netezza":{"port":5480,"driver":nz,"default":{"type":"VARCHAR(256)"},"class":{"read":sql.SQLReader,"write":sql.SQLWriter}},
|
|
|
|
"rabbitmq":{"port":5672,"host":"localhost","class":{"read":queue.QueueReader,"write":queue.QueueWriter,"listen":queue.QueueListener,"listener":queue.QueueListener},"default":{"type":"application/json"}}}
|
2021-12-09 21:25:58 +00:00
|
|
|
#
|
|
|
|
# creating synonyms
|
|
|
|
PROVIDERS['mongodb'] = PROVIDERS['mongo']
|
|
|
|
PROVIDERS['couchdb'] = PROVIDERS['couch']
|
2022-05-11 16:17:27 +00:00
|
|
|
PROVIDERS['bq'] = PROVIDERS['bigquery']
|
2021-12-09 21:25:58 +00:00
|
|
|
PROVIDERS['sqlite3'] = PROVIDERS['sqlite']
|
2022-05-16 16:27:36 +00:00
|
|
|
PROVIDERS['rabbit'] = PROVIDERS['rabbitmq']
|
|
|
|
PROVIDERS['rabbitmq-server'] = PROVIDERS['rabbitmq']
|
2022-01-29 17:15:45 +00:00
|
|
|
|
2019-09-17 04:08:43 +00:00
|
|
|
@staticmethod
|
2022-01-29 17:15:45 +00:00
|
|
|
def instance(**_args):
|
|
|
|
if 'type' in _args :
|
|
|
|
#
|
|
|
|
# Legacy code being returned
|
|
|
|
return factory._instance(**_args);
|
|
|
|
else:
|
|
|
|
return instance(**_args)
|
|
|
|
@staticmethod
|
|
|
|
def _instance(**args):
|
2019-09-17 04:08:43 +00:00
|
|
|
"""
|
|
|
|
This class will create an instance of a transport when providing
|
|
|
|
:type name of the type we are trying to create
|
|
|
|
:args The arguments needed to create the instance
|
|
|
|
"""
|
|
|
|
source = args['type']
|
|
|
|
params = args['args']
|
|
|
|
anObject = None
|
|
|
|
|
|
|
|
if source in ['HttpRequestReader','HttpSessionWriter']:
|
|
|
|
#
|
|
|
|
# @TODO: Make sure objects are serializable, be smart about them !!
|
|
|
|
#
|
|
|
|
aClassName = ''.join([source,'(**params)'])
|
2019-09-17 16:21:42 +00:00
|
|
|
|
2019-09-17 04:08:43 +00:00
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
stream = json.dumps(params)
|
|
|
|
aClassName = ''.join([source,'(**',stream,')'])
|
2019-09-17 16:21:42 +00:00
|
|
|
|
2019-09-17 04:08:43 +00:00
|
|
|
try:
|
|
|
|
anObject = eval( aClassName)
|
|
|
|
#setattr(anObject,'name',source)
|
2019-11-05 03:51:20 +00:00
|
|
|
except Exception as e:
|
2019-11-05 22:04:54 +00:00
|
|
|
print(['Error ',e])
|
2019-09-17 04:08:43 +00:00
|
|
|
return anObject
|
|
|
|
|
2021-07-08 22:31:29 +00:00
|
|
|
import time
|
2021-12-09 21:25:58 +00:00
|
|
|
def instance(**_args):
|
2021-11-18 21:21:26 +00:00
|
|
|
"""
|
|
|
|
|
|
|
|
@param provider {file,sqlite,postgresql,redshift,bigquery,netezza,mongo,couch ...}
|
|
|
|
@param context read|write|rw
|
|
|
|
@param _args argument to got with the datastore (username,password,host,port ...)
|
|
|
|
"""
|
2021-12-09 21:25:58 +00:00
|
|
|
|
|
|
|
provider = _args['provider']
|
2022-01-29 17:15:45 +00:00
|
|
|
context = _args['context']if 'context' in _args else None
|
2022-05-11 16:17:27 +00:00
|
|
|
_id = context if context in list(factory.PROVIDERS[provider]['class'].keys()) else 'read'
|
2021-11-18 21:21:26 +00:00
|
|
|
if _id :
|
|
|
|
args = {'provider':_id}
|
|
|
|
for key in factory.PROVIDERS[provider] :
|
|
|
|
if key == 'class' :
|
|
|
|
continue
|
|
|
|
value = factory.PROVIDERS[provider][key]
|
|
|
|
args[key] = value
|
|
|
|
#
|
|
|
|
#
|
2022-01-29 17:15:45 +00:00
|
|
|
|
2021-11-18 21:21:26 +00:00
|
|
|
args = dict(args,**_args)
|
2019-09-17 04:08:43 +00:00
|
|
|
|
2021-11-18 21:21:26 +00:00
|
|
|
# print (provider in factory.PROVIDERS)
|
|
|
|
if 'class' in factory.PROVIDERS[provider]:
|
|
|
|
pointer = factory.PROVIDERS[provider]['class'][_id]
|
|
|
|
else:
|
|
|
|
pointer = sql.SQLReader if _id == 'read' else sql.SQLWriter
|
2022-03-03 22:08:24 +00:00
|
|
|
#
|
|
|
|
# Let us try to establish an sqlalchemy wrapper
|
|
|
|
try:
|
2022-08-10 14:17:30 +00:00
|
|
|
account = ''
|
2022-03-03 22:08:24 +00:00
|
|
|
host = ''
|
2022-08-10 14:17:30 +00:00
|
|
|
if provider not in [providers.BIGQUERY,providers.MONGODB, providers.COUCHDB, providers.SQLITE, providers.CONSOLE,providers.ETL, providers.FILE, providers.RABBITMQ] :
|
|
|
|
# if provider not in ['bigquery','mongodb','mongo','couchdb','sqlite','console','etl','file','rabbitmq'] :
|
2022-03-03 22:08:24 +00:00
|
|
|
#
|
|
|
|
# In these cases we are assuming RDBMS and thus would exclude NoSQL and BigQuery
|
|
|
|
username = args['username'] if 'username' in args else ''
|
|
|
|
password = args['password'] if 'password' in args else ''
|
|
|
|
if username == '' :
|
|
|
|
account = ''
|
|
|
|
else:
|
|
|
|
account = username + ':'+password+'@'
|
|
|
|
host = args['host']
|
|
|
|
if 'port' in args :
|
|
|
|
host = host+":"+str(args['port'])
|
|
|
|
|
|
|
|
database = args['database']
|
2022-08-10 14:17:30 +00:00
|
|
|
elif provider in [providers.SQLITE,providers.FILE]:
|
2022-03-03 22:08:24 +00:00
|
|
|
account = ''
|
|
|
|
host = ''
|
|
|
|
database = args['path'] if 'path' in args else args['database']
|
2022-08-10 14:17:30 +00:00
|
|
|
|
|
|
|
if provider not in [providers.MONGODB, providers.COUCHDB, providers.BIGQUERY, providers.CONSOLE, providers.ETL,providers.FILE,providers.RABBITMQ] :
|
|
|
|
# if provider not in ['mongodb','mongo','couchdb','bigquery','console','etl','file','rabbitmq'] :
|
2022-03-03 22:08:24 +00:00
|
|
|
uri = ''.join([provider,"://",account,host,'/',database])
|
|
|
|
|
2022-03-08 00:50:29 +00:00
|
|
|
e = sqlalchemy.create_engine (uri,future=True)
|
2022-03-03 22:08:24 +00:00
|
|
|
args['sqlalchemy'] = e
|
2022-03-31 22:13:24 +00:00
|
|
|
|
2022-03-03 22:08:24 +00:00
|
|
|
#
|
|
|
|
# @TODO: Include handling of bigquery with SQLAlchemy
|
|
|
|
except Exception as e:
|
2022-08-10 14:17:30 +00:00
|
|
|
print (_args)
|
2022-03-03 22:08:24 +00:00
|
|
|
print (e)
|
|
|
|
|
2022-04-11 23:34:32 +00:00
|
|
|
return pointer(**args)
|
2019-09-17 04:08:43 +00:00
|
|
|
|
2022-01-29 17:15:45 +00:00
|
|
|
return None
|