optimizations mongodb

2022-03-19 00:02:53 -05:00 · 2022-03-19 00:02:53 -05:00 · e5fadc64a0
parent 38e1bce6c2
commit e5fadc64a0
2 changed files with 21 additions and 11 deletions
--- a/bin/transport
+++ b/bin/transport
@ -63,8 +63,8 @@ class Post(Process):
 		else:
 			self.PROVIDER = args['target']['provider']
 			args['target']['context'] = 'write'
-			
+			self.store = args['target']
-			self.writer = transport.instance(**args['target'])
+			# self.writer = transport.instance(**args['target'])
 		#
 		# If the table doesn't exists maybe create it ?
 		#
@ -86,9 +86,9 @@ class Post(Process):
 			else:
 				value = ''
 			_info[name] = _info[name].fillna(value)
-		
+		writer = transport.factory.instance(**self.store)
-		self.writer.write(_info)
+		writer.write(_info)
-		self.writer.close()
+		writer.close()
 class ETL (Process):
@ -139,11 +139,11 @@ class ETL (Process):
 			#
 			# @TODO: locks
 			for i in np.arange(self.JOB_COUNT) :
 				print ()
 				print (i)
 				_id = 'segment # '.join([str(i),' ',self.name])
 				indexes = rows[i]
 				segment = idf.loc[indexes,:].copy() #.to_dict(orient='records')
 				if segment.shape[0] == 0 :
 					continue
 				proc = Post(target = self._oargs,rows = segment,name=_id)
 				self.jobs.append(proc)
 				proc.start()
--- a/transport/mongo.py
+++ b/transport/mongo.py
@ -20,7 +20,9 @@ else:
 	from common import Reader, Writer
 import json
 import re
 from multiprocessing import Lock, RLock
 class Mongo :
    lock = RLock()
    """
    Basic mongodb functions are captured here
    """
@ -44,6 +46,7 @@ class Mongo :
        self.uid    = args['doc']  #-- document identifier
        self.dbname = args['dbname'] if 'dbname' in args else args['db']
        self.db = self.client[self.dbname]
        self._lock = False if 'lock' not in args else args['lock']
    def isready(self):
        p = self.dbname in self.client.list_database_names() 
@ -144,10 +147,17 @@ class MongoWriter(Mongo,Writer):
        # if type(info) == list :
        #     self.db[self.uid].insert_many(info)
        # else:
-        if type(info) == list or type(info) == pd.DataFrame :
+        try:
-            self.db[self.uid].insert_many(info if type(info) == list else info.to_dict(orient='records'))
+
-        else:
+            if self._lock :
-            self.db[self.uid].insert_one(info)
+                Mongo.lock.acquire()
            if type(info) == list or type(info) == pd.DataFrame :
                self.db[self.uid].insert_many(info if type(info) == list else info.to_dict(orient='records'))
            else:
                self.db[self.uid].insert_one(info)
        finally:
            if self._lock :
                Mongo.lock.release()
    def set(self,document):
        """
        if no identifier is provided the function will delete the entire collection and set the new document.