bug fixes can now be used as a library

This commit is contained in:
Steve Nyemba 2022-05-16 11:11:33 -05:00
parent 4b4647d200
commit 42ccca5f8d
2 changed files with 29 additions and 15 deletions

View File

@ -82,7 +82,7 @@ class Learner(Process):
pass
def get_schema(self):
if self.store['source']['provider'] != 'bigquery' :
return [{'name':self._df.dtypes.index.tolist()[i],'type':self._df.dtypes.astype(str).tolist()[i]}for i in range(self._df.dtypes.shape[0])]
return [] #{'name':self._df.dtypes.index.tolist()[i],'type':self._df.dtypes.astype(str).tolist()[i]}for i in range(self._df.dtypes.shape[0])]
else:
reader = transport.factory.instance(**self.store['source'])
return reader.meta(table=self.info['from'])
@ -276,12 +276,15 @@ class Generator (Learner):
pass
def format(self,_df,_schema):
r = {}
for _item in _schema :
name = _item['name']
if _item['type'].upper() in ['DATE','DATETIME','TIMESTAMP'] :
FORMAT = '%Y-%m-%d'
try:
#
#-- Sometimes data isn't all it's meant to be
if 'format' in self.info and name in self.info['format'] :
FORMAT = self.info['format'][name]
elif _item['type'] in ['DATETIME','TIMESTAMP'] :
@ -290,10 +293,18 @@ class Generator (Learner):
r[name] = FORMAT
_df[name] = pd.to_datetime(_df[name], format=FORMAT) #.astype('datetime64[ns]')
if _item['type'] in ['DATETIME','TIMESTAMP']:
_df[name] = _df[name].astype('datetime64[ns]')
_df[name] = _df[name].fillna('').astype('datetime64[ns]')
else:
_df[name] = _df[name].astype(str)
except Exception as e:
pass
finally:
pass
else:
# print (_item)
pass
_df = _df.replace('NaT','')
if r :
self.log(**{'action':'format','input':r})
return _df
@ -391,4 +402,7 @@ class factory :
elif _args['apply'] == 'generate' :
return Generator(**_args)
else:
return Trainer(**_args)
pthread= Trainer(**_args)
if 'start' in _args and _args['start'] == True :
pthread.start()
return pthread

View File

@ -4,7 +4,7 @@ import sys
def read(fname):
return open(os.path.join(os.path.dirname(__file__), fname)).read()
args = {"name":"data-maker","version":"1.5.3",
args = {"name":"data-maker","version":"1.5.4",
"author":"Vanderbilt University Medical Center","author_email":"steve.l.nyemba@vumc.org","license":"MIT",
"packages":find_packages(),"keywords":["healthcare","data","transport","protocol"]}
args["install_requires"] = ['data-transport@git+https://dev.the-phi.com/git/steve/data-transport.git','tensorflow']