bug fix: support for netezza

This commit is contained in:
Steve Nyemba 2021-06-29 15:12:28 -05:00
parent 4c810fe57a
commit 7c2e945996
2 changed files with 44 additions and 15 deletions

View File

@ -8,12 +8,12 @@ def read(fname):
return open(os.path.join(os.path.dirname(__file__), fname)).read() return open(os.path.join(os.path.dirname(__file__), fname)).read()
args = { args = {
"name":"data-transport", "name":"data-transport",
"version":"1.3.8.4", "version":"1.3.8.6.1",
"author":"The Phi Technology LLC","author_email":"info@the-phi.com", "author":"The Phi Technology LLC","author_email":"info@the-phi.com",
"license":"MIT", "license":"MIT",
"packages":["transport"]} "packages":["transport"]}
args["keywords"]=['mongodb','couchdb','rabbitmq','file','read','write','s3','sqlite'] args["keywords"]=['mongodb','couchdb','rabbitmq','file','read','write','s3','sqlite']
args["install_requires"] = ['pymongo','numpy','cloudant','pika','boto3','boto','pyarrow','google-cloud-bigquery','google-cloud-bigquery-storage','flask-session','smart_open','botocore','psycopg2-binary','mysql-connector-python'] args["install_requires"] = ['pymongo','numpy','cloudant','pika','nzpy','boto3','boto','pyarrow','google-cloud-bigquery','google-cloud-bigquery-storage','flask-session','smart_open','botocore','psycopg2-binary','mysql-connector-python']
args["url"] = "https://healthcareio.the-phi.com/git/code/transport.git" args["url"] = "https://healthcareio.the-phi.com/git/code/transport.git"
if sys.version_info[0] == 2 : if sys.version_info[0] == 2 :

View File

@ -22,23 +22,32 @@ from google.cloud import bigquery as bq
from multiprocessing import Lock from multiprocessing import Lock
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import nzpy as nz #--- netezza drivers
import copy import copy
class SQLRW : class SQLRW :
PROVIDERS = {"postgresql":"5432","redshift":"5432","mysql":"3306","mariadb":"3306"} PROVIDERS = {"postgresql":"5432","redshift":"5432","mysql":"3306","mariadb":"3306","netezza":5480}
DRIVERS = {"postgresql":pg,"redshift":pg,"mysql":my,"mariadb":my} DRIVERS = {"postgresql":pg,"redshift":pg,"mysql":my,"mariadb":my,"netezza":nz}
REFERENCE = {
"netezza":{"port":5480,"handler":nz,"dtype":"VARCHAR(512)"},
"postgresql":{"port":5432,"handler":pg,"dtype":"VARCHAR"},
"redshift":{"port":5432,"handler":pg,"dtype":"VARCHAR"},
"mysql":{"port":3360,"handler":my,"dtype":"VARCHAR(256)"},
"mariadb":{"port":3360,"handler":my,"dtype":"VARCHAR(256)"},
}
def __init__(self,**_args): def __init__(self,**_args):
_info = {} _info = {}
_info['dbname'] = _args['db'] _info['dbname'] = _args['db'] if 'db' in _args else _args['database']
self.table = _args['table'] self.table = _args['table']
self.fields = _args['fields'] if 'fields' in _args else [] self.fields = _args['fields'] if 'fields' in _args else []
_provider = _args['provider']
if 'host' in _args : if 'host' in _args :
_info['host'] = 'localhost' if 'host' not in _args else _args['host'] _info['host'] = 'localhost' if 'host' not in _args else _args['host']
_info['port'] = SQLWriter.PROVIDERS[_args['provider']] if 'port' not in _args else _args['port'] # _info['port'] = SQLWriter.PROVIDERS[_args['provider']] if 'port' not in _args else _args['port']
_info['port'] = SQLWriter.REFERENCE[_provider]['port'] if 'port' not in _args else _args['port']
if 'username' in _args or 'user' in _args: if 'username' in _args or 'user' in _args:
key = 'username' if 'username' in _args else 'user' key = 'username' if 'username' in _args else 'user'
@ -46,7 +55,13 @@ class SQLRW :
_info['password'] = _args['password'] _info['password'] = _args['password']
# #
# We need to load the drivers here to see what we are dealing with ... # We need to load the drivers here to see what we are dealing with ...
_handler = SQLWriter.DRIVERS[_args['provider']] # _handler = SQLWriter.DRIVERS[_args['provider']]
_handler = SQLWriter.REFERENCE[_provider]['handler']
self._dtype = SQLWriter.REFERENCE[_provider]['dtype'] if 'dtype' not in _args else _args['dtype']
if _handler == nz :
_info['database'] = _info['dbname']
_info['securityLevel'] = 0
del _info['dbname']
self.conn = _handler.connect(**_info) self.conn = _handler.connect(**_info)
def isready(self): def isready(self):
@ -118,11 +133,13 @@ class SQLWriter(SQLRW,Writer):
def make(self,fields): def make(self,fields):
self.fields = fields self.fields = fields
sql = " ".join(["CREATE TABLE",self.table," (", ",".join(fields),")"])
sql = " ".join(["CREATE TABLE",self.table," (", ",".join([ name +' '+ self._dtype for name in fields]),")"])
cursor = self.conn.cursor() cursor = self.conn.cursor()
try: try:
cursor.execute(sql) cursor.execute(sql)
except Exception as e : except Exception as e :
print (e)
pass pass
finally: finally:
cursor.close() cursor.close()
@ -136,12 +153,14 @@ class SQLWriter(SQLRW,Writer):
_fields = info.keys() if type(info) == dict else info[0].keys() _fields = info.keys() if type(info) == dict else info[0].keys()
_fields = list (_fields) _fields = list (_fields)
self.init(_fields) self.init(_fields)
#
# @TODO: Use pandas/odbc ? Not sure b/c it requires sqlalchemy
#
if type(info) != list : if type(info) != list :
info = [info] info = [info]
cursor = self.conn.cursor() cursor = self.conn.cursor()
try: try:
_sql = "INSERT INTO :table (:fields) values (:values)".replace(":table",self.table) #.replace(":table",self.table).replace(":fields",_fields) _sql = "INSERT INTO :table (:fields) VALUES (:values)".replace(":table",self.table) #.replace(":table",self.table).replace(":fields",_fields)
if self._inspect : if self._inspect :
for _row in info : for _row in info :
fields = list(_row.keys()) fields = list(_row.keys())
@ -161,15 +180,19 @@ class SQLWriter(SQLRW,Writer):
pass pass
else: else:
_fields = ",".join(self.fields) _fields = ",".join(self.fields)
_sql = _sql.replace(":fields",_fields) # _sql = _sql.replace(":fields",_fields)
_sql = _sql.replace(":values",",".join(["%("+name+")s" for name in self.fields])) # _sql = _sql.replace(":values",",".join(["%("+name+")s" for name in self.fields]))
_sql = _sql.replace("(:fields)","")
values = ", ".join('?'*len(self.fields))
_sql = _sql.replace(":values",values)
# for row in info : # for row in info :
# values = ["'".join(["",value,""]) if not str(value).isnumeric() else value for value in row.values()] # values = ["'".join(["",value,""]) if not str(value).isnumeric() else value for value in row.values()]
cursor.executemany(_sql,info) cursor.executemany(_sql,info)
# self.conn.commit() # self.conn.commit()
except Exception as e: except Exception as e:
print (e) pass
finally: finally:
self.conn.commit() self.conn.commit()
cursor.close() cursor.close()
@ -265,7 +288,13 @@ class BQWriter(BigQuery,Writer):
_df.to_gbq(**self.mode) #if_exists='append',destination_table=partial,credentials=credentials,chunksize=90000) _df.to_gbq(**self.mode) #if_exists='append',destination_table=partial,credentials=credentials,chunksize=90000)
pass pass
# import transport import transport
try:
_args = {'type':'sql.SQLWriter','args':{'provider':'netezza','host':'ori-netezza.vumc.org','table':'IBM_CCS_DX','username':'nyembsl1','password':'Innovat10n','database':'MALIN_OMOP_RD'}}
df = pd
reader = SQLReader(**_args['args'])
except Exception as error :
print (error)
# reader = transport.factory.instance(type="sql.BQReader",args={"service_key":"/home/steve/dev/google-cloud-sdk/accounts/curation-prod.json"}) # reader = transport.factory.instance(type="sql.BQReader",args={"service_key":"/home/steve/dev/google-cloud-sdk/accounts/curation-prod.json"})
# _df = reader.read(sql="select * from `2019q1r4_combined.person` limit 10") # _df = reader.read(sql="select * from `2019q1r4_combined.person` limit 10")
# writer = transport.factory.instance(type="sql.BQWriter",args={"service_key":"/home/steve/dev/google-cloud-sdk/accounts/curation-prod.json"}) # writer = transport.factory.instance(type="sql.BQWriter",args={"service_key":"/home/steve/dev/google-cloud-sdk/accounts/curation-prod.json"})