bug fix: sqlite and cursors and transport

2023-09-30 00:18:37 -05:00 · 2023-09-30 00:18:37 -05:00 · 2bb07aedec
parent 3f7f3d7306
commit 2bb07aedec
3 changed files with 80 additions and 63 deletions
--- a/bin/transport
+++ b/bin/transport
@ -46,6 +46,7 @@ import time
 from multiprocessing import Process
 import typer
 import os
 import transport
 from transport import etl
 from transport import providers
@ -88,7 +89,7 @@ def move (path,index=None):
            _config = _config[ int(index)]
            etl.instance(**_config)
        else:
-            etl.instance(_config)
+            etl.instance(config=_config)
        #
        # if type(_config) == dict :
@ -109,19 +110,30 @@ def move (path,index=None):
        #         jobs.append(thread())
        #         if _config.index(_args) == 0 :
        #             thread.join()
-            wait(jobs)
+            # wait(jobs)
-
+@app.command()
 def version():
     print (transport.version.__version__)
@app.command()
 def generate (path:str):
 	__doc__="""
 	"""
-	_config = [{"source":{"provider":"http","url":"https://cdn.wsform.com/wp-content/uploads/2020/06/agreement.csv"},"target":{"provider":"file","path":"addresses.csv","delimiter":"csv"}}]
+	This function will generate a configuration template to give a sense of how to create one
 	"""
 	_config = [
          {
               "source":{"provider":"http","url":"https://raw.githubusercontent.com/codeforamerica/ohana-api/master/data/sample-csv/addresses.csv"},
             "target":
            [{"provider":"file","path":"addresses.csv","delimiter":"csv"},{"provider":"sqlite","database":"sample.db3","table":"addresses"}]
            }
            ]
 	file = open(path,'w')
 	file.write(json.dumps(_config))
 	file.close()
-	
+@app.command()
-# if __name__ == '__main__' :
+def usage():
     print (__doc__)	
 if __name__ == '__main__' :
     app()
 # 	#
 # 	# Load information from the file ...
 # 	if 'help' in SYS_ARGS :
--- a/transport/disk.py
+++ b/transport/disk.py
@ -62,34 +62,25 @@ class DiskWriter(Writer):
 	"""
 	THREAD_LOCK = Lock()
 	def __init__(self,**params):
-		Writer.__init__(self)
+		super().__init__()
-		self.cache['meta'] = {'cols':0,'rows':0,'delimiter':None}
+		self._path = params['path']
-		if 'path' in params:
+		self._delimiter = params['delimiter']
-			self.path = params['path']
+		
-		else:
+	# def meta(self):
-			self.path = 'data-transport.log'
+	# 	return self.cache['meta']
-		self.delimiter = params['delimiter'] if 'delimiter' in params else None
+	# def isready(self):
-		# if 'name' in params:
+	# 	"""
-		# 	self.name = params['name'];
+	# 		This function determines if the class is ready for execution or not
-		# else:
+	# 		i.e it determines if the preconditions of met prior execution
-		# 	self.name = 'data-transport.log'
+	# 	"""
-		# if os.path.exists(self.path) == False:
+	# 	return True
-		# 	os.mkdir(self.path)
+	# 	# p =  self.path is not None and os.path.exists(self.path)
-	def meta(self):
+	# 	# q = self.name is not None 
-		return self.cache['meta']
+	# 	# return p and q
-	def isready(self):
+	# def format (self,row):
-		"""
+	# 	self.cache['meta']['cols'] += len(row) if isinstance(row,list) else len(row.keys())
-			This function determines if the class is ready for execution or not
+	# 	self.cache['meta']['rows'] += 1
-			i.e it determines if the preconditions of met prior execution
+	# 	return (self.delimiter.join(row) if self.delimiter else json.dumps(row))+"\n"
 		"""
 		return True
 		# p =  self.path is not None and os.path.exists(self.path)
 		# q = self.name is not None 
 		# return p and q
 	def format (self,row):
 		self.cache['meta']['cols'] += len(row) if isinstance(row,list) else len(row.keys())
 		self.cache['meta']['rows'] += 1
 		return (self.delimiter.join(row) if self.delimiter else json.dumps(row))+"\n"
 	def write(self,info,**_args):
 		"""
 			This function writes a record to a designated file
@ -97,21 +88,30 @@ class DiskWriter(Writer):
 			@param	row	row to be written
 		"""
 		try:
 			_mode = 'a' if 'overwrite' not in _args else 'w'
 			DiskWriter.THREAD_LOCK.acquire()
-			f = open(self.path,_mode)
+			# # _path = _args['path'] if 'path' in _args else self.path
-			if self.delimiter :
+			# # _delim= _args['delimiter'] if 'delimiter' in _args else self._delimiter
-				if type(info) == list :
+			# # info.to_csv(_path,sep=_delim)
-					for row in info :
+			# info.to_csv(self.path)
-						f.write(self.format(row))
+			# f = open(self.path,_mode)
-				else:
+			# if self.delimiter :
-					f.write(self.format(info))
+			# 	if type(info) == list :
-			else:
+			# 		for row in info :
-				if not type(info) == str :
+			# 			f.write(self.format(row))
-					f.write(json.dumps(info)+"\n")
+			# 	else:
-				else:
+			# 		f.write(self.format(info))
-					f.write(info)
+			# else:
-			f.close()
+			# 	if not type(info) == str :
 			# 		f.write(json.dumps(info)+"\n")
 			# 	else:
 			# 		f.write(info)
 			# f.close()
 			_delim = self._delimiter if 'delimiter' not in _args else _args['delimiter']
 			_path = self.path if 'path' not  in _args else _args['path']
 			info.to_csv(_path,index=False,sep=_delim)
 			pass
 		except Exception as e:
 			#
 			# Not sure what should be done here ...
@ -220,16 +220,19 @@ class SQLiteWriter(SQLite,DiskWriter) :
 		#
 		# If the table doesn't exist we should create it
 		#
-	def write(self,info):
+	def write(self,info,**_args):
 		"""
 		"""
 		if not self.fields :
 			if type(info) == pd.DataFrame :
 				_columns = list(info.columns) 
 			self.init(list(info.keys()))
 		if type(info) == dict :
 			info = [info]
 		elif type(info) == pd.DataFrame :
 			info = info.fillna('')
 			info = info.to_dict(orient='records')
 		SQLiteWriter.LOCK.acquire()
--- a/transport/etl.py
+++ b/transport/etl.py
@ -70,7 +70,7 @@ class Transporter(Process):
        # self._onerror = _args['onError']
        self._source = _args['source']
        self._target = _args['target']
-
+        
        #
        # Let's insure we can support multiple targets
        self._target = [self._target] if type(self._target) != list else self._target
@ -90,16 +90,18 @@ class Transporter(Process):
        This function will write a data-frame to a designated data-store, The function is built around a delegation design pattern
        :data   data-frame or object to be written
        """
-        for _target in self._target :
+        if _data.shape[0] > 0 :
-            if 'write' not in _target :
+            for _target in self._target :
-                _target['context'] = 'write'
+                if 'write' not in _target :
-                _target['lock'] = True
+                    _target['context'] = 'write'
-            else:
+                    # _target['lock'] = True
-                _target['write']['lock'] = True
+                else:
-            _writer = transport.factory.instance(**_target)
+                    # _target['write']['lock'] = True
-            _writer.write(_data,**_args)
+                    pass
-            if hasattr(_writer,'close') :
+                _writer = transport.factory.instance(**_target)
-                _writer.close()
+                _writer.write(_data.copy(),**_args)
                if hasattr(_writer,'close') :
                    _writer.close()
    def write(self,_df,**_args):
        """
@ -109,12 +111,12 @@ class Transporter(Process):
        # _df = self.read()
        _segments = np.array_split(np.range(_df.shape[0]),SEGMENT_COUNT) if _df.shape[0] > MAX_ROWS else np.array( [np.arange(_df.shape[0])])
        # _index = 0
-       
+        
        for _indexes in _segments :
            _fwd_args = {} if not _args else _args
-            self._delegate_write(_df.iloc[_indexes],**_fwd_args)
+            self._delegate_write(_df.iloc[_indexes])
            #
            # @TODO: Perhaps consider writing up each segment in a thread/process (speeds things up?)
            pass