bug fix, enhancement with pandas
This commit is contained in:
parent
185158f006
commit
79cdc0c0d0
|
@ -41,6 +41,7 @@ class Post(Process):
|
||||||
self.rows = args['rows']
|
self.rows = args['rows']
|
||||||
def run(self):
|
def run(self):
|
||||||
_info = {"values":self.rows} if 'couch' in self.PROVIDER else self.rows
|
_info = {"values":self.rows} if 'couch' in self.PROVIDER else self.rows
|
||||||
|
|
||||||
self.writer.write(_info)
|
self.writer.write(_info)
|
||||||
self.writer.close()
|
self.writer.close()
|
||||||
|
|
||||||
|
@ -70,7 +71,7 @@ class ETL (Process):
|
||||||
rows = np.array_split(np.arange(idf.shape[0]),self.JOB_COUNT)
|
rows = np.array_split(np.arange(idf.shape[0]),self.JOB_COUNT)
|
||||||
jobs = []
|
jobs = []
|
||||||
for i in rows :
|
for i in rows :
|
||||||
segment = idf.loc[i,:].to_dict(orient='records')
|
segment = idf.loc[i,:] #.to_dict(orient='records')
|
||||||
proc = Post(target = self._oargs,rows = segment)
|
proc = Post(target = self._oargs,rows = segment)
|
||||||
jobs.append(proc)
|
jobs.append(proc)
|
||||||
proc.start()
|
proc.start()
|
||||||
|
@ -89,6 +90,6 @@ if __name__ == '__main__' :
|
||||||
if 'source' in SYS_ARGS :
|
if 'source' in SYS_ARGS :
|
||||||
_config['source'] = {"type":"disk.DiskReader","args":{"path":SYS_ARGS['source'],"delimiter":","}}
|
_config['source'] = {"type":"disk.DiskReader","args":{"path":SYS_ARGS['source'],"delimiter":","}}
|
||||||
|
|
||||||
_config['jobs'] = 10 if 'jobs' not in SYS_ARGS else SYS_ARGS['jobs']
|
_config['jobs'] = 10 if 'jobs' not in SYS_ARGS else int(SYS_ARGS['jobs'])
|
||||||
etl = ETL (**_config)
|
etl = ETL (**_config)
|
||||||
etl.start()
|
etl.start()
|
2
setup.py
2
setup.py
|
@ -8,7 +8,7 @@ def read(fname):
|
||||||
return open(os.path.join(os.path.dirname(__file__), fname)).read()
|
return open(os.path.join(os.path.dirname(__file__), fname)).read()
|
||||||
args = {
|
args = {
|
||||||
"name":"data-transport",
|
"name":"data-transport",
|
||||||
"version":"1.3.9.0",
|
"version":"1.3.9.2",
|
||||||
"author":"The Phi Technology LLC","author_email":"info@the-phi.com",
|
"author":"The Phi Technology LLC","author_email":"info@the-phi.com",
|
||||||
"license":"MIT",
|
"license":"MIT",
|
||||||
"packages":["transport"]}
|
"packages":["transport"]}
|
||||||
|
|
|
@ -142,8 +142,8 @@ class MongoWriter(Mongo,Writer):
|
||||||
# if type(info) == list :
|
# if type(info) == list :
|
||||||
# self.db[self.uid].insert_many(info)
|
# self.db[self.uid].insert_many(info)
|
||||||
# else:
|
# else:
|
||||||
if (type(info) == list) :
|
if type(info) == list or type(info) == pd.DataFrame :
|
||||||
self.db[self.uid].insert_many(info)
|
self.db[self.uid].insert_many(info if type(info) == list else info.to_dict(orient='records'))
|
||||||
else:
|
else:
|
||||||
self.db[self.uid].insert_one(info)
|
self.db[self.uid].insert_one(info)
|
||||||
def set(self,document):
|
def set(self,document):
|
||||||
|
|
|
@ -157,14 +157,23 @@ class SQLWriter(SQLRW,Writer):
|
||||||
# inspect = False if 'inspect' not in _args else _args['inspect']
|
# inspect = False if 'inspect' not in _args else _args['inspect']
|
||||||
# cast = False if 'cast' not in _args else _args['cast']
|
# cast = False if 'cast' not in _args else _args['cast']
|
||||||
if not self.fields :
|
if not self.fields :
|
||||||
_fields = info.keys() if type(info) == dict else info[0].keys()
|
if type(info) == list :
|
||||||
|
_fields = info[0].keys()
|
||||||
|
elif type(info) == dict :
|
||||||
|
_fields = info.keys()
|
||||||
|
elif type(info) == pd.DataFrame :
|
||||||
|
_fields = info.columns
|
||||||
|
|
||||||
|
# _fields = info.keys() if type(info) == dict else info[0].keys()
|
||||||
_fields = list (_fields)
|
_fields = list (_fields)
|
||||||
self.init(_fields)
|
self.init(_fields)
|
||||||
#
|
#
|
||||||
# @TODO: Use pandas/odbc ? Not sure b/c it requires sqlalchemy
|
# @TODO: Use pandas/odbc ? Not sure b/c it requires sqlalchemy
|
||||||
#
|
#
|
||||||
if type(info) != list :
|
if type(info) != list :
|
||||||
info = [info]
|
#
|
||||||
|
# We are assuming 2 cases i.e dict or pd.DataFrame
|
||||||
|
info = [info] if type(info) == dict else info.values.tolist()
|
||||||
cursor = self.conn.cursor()
|
cursor = self.conn.cursor()
|
||||||
try:
|
try:
|
||||||
_sql = "INSERT INTO :table (:fields) VALUES (:values)".replace(":table",self.table) #.replace(":table",self.table).replace(":fields",_fields)
|
_sql = "INSERT INTO :table (:fields) VALUES (:values)".replace(":table",self.table) #.replace(":table",self.table).replace(":fields",_fields)
|
||||||
|
|
Loading…
Reference in New Issue