optimizations mongodb

This commit is contained in:
Steve Nyemba 2022-03-19 00:02:53 -05:00
parent 38e1bce6c2
commit e5fadc64a0
2 changed files with 21 additions and 11 deletions

View File

@ -63,8 +63,8 @@ class Post(Process):
else: else:
self.PROVIDER = args['target']['provider'] self.PROVIDER = args['target']['provider']
args['target']['context'] = 'write' args['target']['context'] = 'write'
self.store = args['target']
self.writer = transport.instance(**args['target']) # self.writer = transport.instance(**args['target'])
# #
# If the table doesn't exists maybe create it ? # If the table doesn't exists maybe create it ?
# #
@ -86,9 +86,9 @@ class Post(Process):
else: else:
value = '' value = ''
_info[name] = _info[name].fillna(value) _info[name] = _info[name].fillna(value)
writer = transport.factory.instance(**self.store)
self.writer.write(_info) writer.write(_info)
self.writer.close() writer.close()
class ETL (Process): class ETL (Process):
@ -139,11 +139,11 @@ class ETL (Process):
# #
# @TODO: locks # @TODO: locks
for i in np.arange(self.JOB_COUNT) : for i in np.arange(self.JOB_COUNT) :
print ()
print (i)
_id = 'segment # '.join([str(i),' ',self.name]) _id = 'segment # '.join([str(i),' ',self.name])
indexes = rows[i] indexes = rows[i]
segment = idf.loc[indexes,:].copy() #.to_dict(orient='records') segment = idf.loc[indexes,:].copy() #.to_dict(orient='records')
if segment.shape[0] == 0 :
continue
proc = Post(target = self._oargs,rows = segment,name=_id) proc = Post(target = self._oargs,rows = segment,name=_id)
self.jobs.append(proc) self.jobs.append(proc)
proc.start() proc.start()

View File

@ -20,7 +20,9 @@ else:
from common import Reader, Writer from common import Reader, Writer
import json import json
import re import re
from multiprocessing import Lock, RLock
class Mongo : class Mongo :
lock = RLock()
""" """
Basic mongodb functions are captured here Basic mongodb functions are captured here
""" """
@ -44,6 +46,7 @@ class Mongo :
self.uid = args['doc'] #-- document identifier self.uid = args['doc'] #-- document identifier
self.dbname = args['dbname'] if 'dbname' in args else args['db'] self.dbname = args['dbname'] if 'dbname' in args else args['db']
self.db = self.client[self.dbname] self.db = self.client[self.dbname]
self._lock = False if 'lock' not in args else args['lock']
def isready(self): def isready(self):
p = self.dbname in self.client.list_database_names() p = self.dbname in self.client.list_database_names()
@ -144,10 +147,17 @@ class MongoWriter(Mongo,Writer):
# if type(info) == list : # if type(info) == list :
# self.db[self.uid].insert_many(info) # self.db[self.uid].insert_many(info)
# else: # else:
if type(info) == list or type(info) == pd.DataFrame : try:
self.db[self.uid].insert_many(info if type(info) == list else info.to_dict(orient='records'))
else: if self._lock :
self.db[self.uid].insert_one(info) Mongo.lock.acquire()
if type(info) == list or type(info) == pd.DataFrame :
self.db[self.uid].insert_many(info if type(info) == list else info.to_dict(orient='records'))
else:
self.db[self.uid].insert_one(info)
finally:
if self._lock :
Mongo.lock.release()
def set(self,document): def set(self,document):
""" """
if no identifier is provided the function will delete the entire collection and set the new document. if no identifier is provided the function will delete the entire collection and set the new document.