bug fix: sqlite and cursors and transport
This commit is contained in:
parent
3f7f3d7306
commit
2bb07aedec
|
@ -46,6 +46,7 @@ import time
|
||||||
from multiprocessing import Process
|
from multiprocessing import Process
|
||||||
import typer
|
import typer
|
||||||
import os
|
import os
|
||||||
|
import transport
|
||||||
from transport import etl
|
from transport import etl
|
||||||
from transport import providers
|
from transport import providers
|
||||||
|
|
||||||
|
@ -88,7 +89,7 @@ def move (path,index=None):
|
||||||
_config = _config[ int(index)]
|
_config = _config[ int(index)]
|
||||||
etl.instance(**_config)
|
etl.instance(**_config)
|
||||||
else:
|
else:
|
||||||
etl.instance(_config)
|
etl.instance(config=_config)
|
||||||
|
|
||||||
#
|
#
|
||||||
# if type(_config) == dict :
|
# if type(_config) == dict :
|
||||||
|
@ -109,19 +110,30 @@ def move (path,index=None):
|
||||||
# jobs.append(thread())
|
# jobs.append(thread())
|
||||||
# if _config.index(_args) == 0 :
|
# if _config.index(_args) == 0 :
|
||||||
# thread.join()
|
# thread.join()
|
||||||
wait(jobs)
|
# wait(jobs)
|
||||||
|
@app.command()
|
||||||
|
def version():
|
||||||
|
print (transport.version.__version__)
|
||||||
@app.command()
|
@app.command()
|
||||||
def generate (path:str):
|
def generate (path:str):
|
||||||
__doc__="""
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
_config = [{"source":{"provider":"http","url":"https://cdn.wsform.com/wp-content/uploads/2020/06/agreement.csv"},"target":{"provider":"file","path":"addresses.csv","delimiter":"csv"}}]
|
This function will generate a configuration template to give a sense of how to create one
|
||||||
|
"""
|
||||||
|
_config = [
|
||||||
|
{
|
||||||
|
"source":{"provider":"http","url":"https://raw.githubusercontent.com/codeforamerica/ohana-api/master/data/sample-csv/addresses.csv"},
|
||||||
|
"target":
|
||||||
|
[{"provider":"file","path":"addresses.csv","delimiter":"csv"},{"provider":"sqlite","database":"sample.db3","table":"addresses"}]
|
||||||
|
}
|
||||||
|
]
|
||||||
file = open(path,'w')
|
file = open(path,'w')
|
||||||
file.write(json.dumps(_config))
|
file.write(json.dumps(_config))
|
||||||
file.close()
|
file.close()
|
||||||
|
@app.command()
|
||||||
# if __name__ == '__main__' :
|
def usage():
|
||||||
|
print (__doc__)
|
||||||
|
if __name__ == '__main__' :
|
||||||
|
app()
|
||||||
# #
|
# #
|
||||||
# # Load information from the file ...
|
# # Load information from the file ...
|
||||||
# if 'help' in SYS_ARGS :
|
# if 'help' in SYS_ARGS :
|
||||||
|
|
|
@ -62,34 +62,25 @@ class DiskWriter(Writer):
|
||||||
"""
|
"""
|
||||||
THREAD_LOCK = Lock()
|
THREAD_LOCK = Lock()
|
||||||
def __init__(self,**params):
|
def __init__(self,**params):
|
||||||
Writer.__init__(self)
|
super().__init__()
|
||||||
self.cache['meta'] = {'cols':0,'rows':0,'delimiter':None}
|
self._path = params['path']
|
||||||
if 'path' in params:
|
self._delimiter = params['delimiter']
|
||||||
self.path = params['path']
|
|
||||||
else:
|
# def meta(self):
|
||||||
self.path = 'data-transport.log'
|
# return self.cache['meta']
|
||||||
self.delimiter = params['delimiter'] if 'delimiter' in params else None
|
# def isready(self):
|
||||||
# if 'name' in params:
|
# """
|
||||||
# self.name = params['name'];
|
# This function determines if the class is ready for execution or not
|
||||||
# else:
|
# i.e it determines if the preconditions of met prior execution
|
||||||
# self.name = 'data-transport.log'
|
# """
|
||||||
# if os.path.exists(self.path) == False:
|
# return True
|
||||||
# os.mkdir(self.path)
|
# # p = self.path is not None and os.path.exists(self.path)
|
||||||
def meta(self):
|
# # q = self.name is not None
|
||||||
return self.cache['meta']
|
# # return p and q
|
||||||
def isready(self):
|
# def format (self,row):
|
||||||
"""
|
# self.cache['meta']['cols'] += len(row) if isinstance(row,list) else len(row.keys())
|
||||||
This function determines if the class is ready for execution or not
|
# self.cache['meta']['rows'] += 1
|
||||||
i.e it determines if the preconditions of met prior execution
|
# return (self.delimiter.join(row) if self.delimiter else json.dumps(row))+"\n"
|
||||||
"""
|
|
||||||
return True
|
|
||||||
# p = self.path is not None and os.path.exists(self.path)
|
|
||||||
# q = self.name is not None
|
|
||||||
# return p and q
|
|
||||||
def format (self,row):
|
|
||||||
self.cache['meta']['cols'] += len(row) if isinstance(row,list) else len(row.keys())
|
|
||||||
self.cache['meta']['rows'] += 1
|
|
||||||
return (self.delimiter.join(row) if self.delimiter else json.dumps(row))+"\n"
|
|
||||||
def write(self,info,**_args):
|
def write(self,info,**_args):
|
||||||
"""
|
"""
|
||||||
This function writes a record to a designated file
|
This function writes a record to a designated file
|
||||||
|
@ -97,21 +88,30 @@ class DiskWriter(Writer):
|
||||||
@param row row to be written
|
@param row row to be written
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
|
||||||
_mode = 'a' if 'overwrite' not in _args else 'w'
|
_mode = 'a' if 'overwrite' not in _args else 'w'
|
||||||
DiskWriter.THREAD_LOCK.acquire()
|
DiskWriter.THREAD_LOCK.acquire()
|
||||||
f = open(self.path,_mode)
|
# # _path = _args['path'] if 'path' in _args else self.path
|
||||||
if self.delimiter :
|
# # _delim= _args['delimiter'] if 'delimiter' in _args else self._delimiter
|
||||||
if type(info) == list :
|
# # info.to_csv(_path,sep=_delim)
|
||||||
for row in info :
|
# info.to_csv(self.path)
|
||||||
f.write(self.format(row))
|
# f = open(self.path,_mode)
|
||||||
else:
|
# if self.delimiter :
|
||||||
f.write(self.format(info))
|
# if type(info) == list :
|
||||||
else:
|
# for row in info :
|
||||||
if not type(info) == str :
|
# f.write(self.format(row))
|
||||||
f.write(json.dumps(info)+"\n")
|
# else:
|
||||||
else:
|
# f.write(self.format(info))
|
||||||
f.write(info)
|
# else:
|
||||||
f.close()
|
# if not type(info) == str :
|
||||||
|
# f.write(json.dumps(info)+"\n")
|
||||||
|
# else:
|
||||||
|
# f.write(info)
|
||||||
|
# f.close()
|
||||||
|
_delim = self._delimiter if 'delimiter' not in _args else _args['delimiter']
|
||||||
|
_path = self.path if 'path' not in _args else _args['path']
|
||||||
|
info.to_csv(_path,index=False,sep=_delim)
|
||||||
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
#
|
#
|
||||||
# Not sure what should be done here ...
|
# Not sure what should be done here ...
|
||||||
|
@ -220,16 +220,19 @@ class SQLiteWriter(SQLite,DiskWriter) :
|
||||||
#
|
#
|
||||||
# If the table doesn't exist we should create it
|
# If the table doesn't exist we should create it
|
||||||
#
|
#
|
||||||
def write(self,info):
|
def write(self,info,**_args):
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if not self.fields :
|
if not self.fields :
|
||||||
|
if type(info) == pd.DataFrame :
|
||||||
|
_columns = list(info.columns)
|
||||||
self.init(list(info.keys()))
|
self.init(list(info.keys()))
|
||||||
|
|
||||||
if type(info) == dict :
|
if type(info) == dict :
|
||||||
info = [info]
|
info = [info]
|
||||||
elif type(info) == pd.DataFrame :
|
elif type(info) == pd.DataFrame :
|
||||||
|
info = info.fillna('')
|
||||||
info = info.to_dict(orient='records')
|
info = info.to_dict(orient='records')
|
||||||
|
|
||||||
SQLiteWriter.LOCK.acquire()
|
SQLiteWriter.LOCK.acquire()
|
||||||
|
|
|
@ -70,7 +70,7 @@ class Transporter(Process):
|
||||||
# self._onerror = _args['onError']
|
# self._onerror = _args['onError']
|
||||||
self._source = _args['source']
|
self._source = _args['source']
|
||||||
self._target = _args['target']
|
self._target = _args['target']
|
||||||
|
|
||||||
#
|
#
|
||||||
# Let's insure we can support multiple targets
|
# Let's insure we can support multiple targets
|
||||||
self._target = [self._target] if type(self._target) != list else self._target
|
self._target = [self._target] if type(self._target) != list else self._target
|
||||||
|
@ -90,16 +90,18 @@ class Transporter(Process):
|
||||||
This function will write a data-frame to a designated data-store, The function is built around a delegation design pattern
|
This function will write a data-frame to a designated data-store, The function is built around a delegation design pattern
|
||||||
:data data-frame or object to be written
|
:data data-frame or object to be written
|
||||||
"""
|
"""
|
||||||
for _target in self._target :
|
if _data.shape[0] > 0 :
|
||||||
if 'write' not in _target :
|
for _target in self._target :
|
||||||
_target['context'] = 'write'
|
if 'write' not in _target :
|
||||||
_target['lock'] = True
|
_target['context'] = 'write'
|
||||||
else:
|
# _target['lock'] = True
|
||||||
_target['write']['lock'] = True
|
else:
|
||||||
_writer = transport.factory.instance(**_target)
|
# _target['write']['lock'] = True
|
||||||
_writer.write(_data,**_args)
|
pass
|
||||||
if hasattr(_writer,'close') :
|
_writer = transport.factory.instance(**_target)
|
||||||
_writer.close()
|
_writer.write(_data.copy(),**_args)
|
||||||
|
if hasattr(_writer,'close') :
|
||||||
|
_writer.close()
|
||||||
|
|
||||||
def write(self,_df,**_args):
|
def write(self,_df,**_args):
|
||||||
"""
|
"""
|
||||||
|
@ -109,12 +111,12 @@ class Transporter(Process):
|
||||||
# _df = self.read()
|
# _df = self.read()
|
||||||
_segments = np.array_split(np.range(_df.shape[0]),SEGMENT_COUNT) if _df.shape[0] > MAX_ROWS else np.array( [np.arange(_df.shape[0])])
|
_segments = np.array_split(np.range(_df.shape[0]),SEGMENT_COUNT) if _df.shape[0] > MAX_ROWS else np.array( [np.arange(_df.shape[0])])
|
||||||
# _index = 0
|
# _index = 0
|
||||||
|
|
||||||
|
|
||||||
for _indexes in _segments :
|
for _indexes in _segments :
|
||||||
_fwd_args = {} if not _args else _args
|
_fwd_args = {} if not _args else _args
|
||||||
|
|
||||||
self._delegate_write(_df.iloc[_indexes],**_fwd_args)
|
self._delegate_write(_df.iloc[_indexes])
|
||||||
#
|
#
|
||||||
# @TODO: Perhaps consider writing up each segment in a thread/process (speeds things up?)
|
# @TODO: Perhaps consider writing up each segment in a thread/process (speeds things up?)
|
||||||
pass
|
pass
|
||||||
|
|
Loading…
Reference in New Issue