From 42ccca5f8dd1c707ba56567cb58d9863d348a9e8 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Mon, 16 May 2022 11:11:33 -0500 Subject: [PATCH] bug fixes can now be used as a library --- data/maker/__init__.py | 42 ++++++++++++++++++++++++++++-------------- setup.py | 2 +- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/data/maker/__init__.py b/data/maker/__init__.py index 3c4d45f..50abfd2 100644 --- a/data/maker/__init__.py +++ b/data/maker/__init__.py @@ -82,7 +82,7 @@ class Learner(Process): pass def get_schema(self): if self.store['source']['provider'] != 'bigquery' : - return [{'name':self._df.dtypes.index.tolist()[i],'type':self._df.dtypes.astype(str).tolist()[i]}for i in range(self._df.dtypes.shape[0])] + return [] #{'name':self._df.dtypes.index.tolist()[i],'type':self._df.dtypes.astype(str).tolist()[i]}for i in range(self._df.dtypes.shape[0])] else: reader = transport.factory.instance(**self.store['source']) return reader.meta(table=self.info['from']) @@ -276,24 +276,35 @@ class Generator (Learner): pass def format(self,_df,_schema): r = {} + for _item in _schema : name = _item['name'] if _item['type'].upper() in ['DATE','DATETIME','TIMESTAMP'] : FORMAT = '%Y-%m-%d' - - if 'format' in self.info and name in self.info['format'] : - FORMAT = self.info['format'][name] - elif _item['type'] in ['DATETIME','TIMESTAMP'] : - FORMAT = '%Y-%m-%d %H:%M:%S' - - r[name] = FORMAT - _df[name] = pd.to_datetime(_df[name], format=FORMAT) #.astype('datetime64[ns]') - if _item['type'] in ['DATETIME','TIMESTAMP']: - _df[name] = _df[name].astype('datetime64[ns]') - else: - _df[name] = _df[name].astype(str) + try: + # + #-- Sometimes data isn't all it's meant to be + if 'format' in self.info and name in self.info['format'] : + FORMAT = self.info['format'][name] + elif _item['type'] in ['DATETIME','TIMESTAMP'] : + FORMAT = '%Y-%m-%d %H:%M:%S' + + r[name] = FORMAT + _df[name] = pd.to_datetime(_df[name], format=FORMAT) #.astype('datetime64[ns]') + if _item['type'] in ['DATETIME','TIMESTAMP']: + _df[name] = _df[name].fillna('').astype('datetime64[ns]') + else: + _df[name] = _df[name].astype(str) + except Exception as e: + pass + finally: + pass + else: + # print (_item) + pass _df = _df.replace('NaT','') + if r : self.log(**{'action':'format','input':r}) return _df @@ -391,4 +402,7 @@ class factory : elif _args['apply'] == 'generate' : return Generator(**_args) else: - return Trainer(**_args) \ No newline at end of file + pthread= Trainer(**_args) + if 'start' in _args and _args['start'] == True : + pthread.start() + return pthread \ No newline at end of file diff --git a/setup.py b/setup.py index 801dc48..b5d3733 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import sys def read(fname): return open(os.path.join(os.path.dirname(__file__), fname)).read() -args = {"name":"data-maker","version":"1.5.3", +args = {"name":"data-maker","version":"1.5.4", "author":"Vanderbilt University Medical Center","author_email":"steve.l.nyemba@vumc.org","license":"MIT", "packages":find_packages(),"keywords":["healthcare","data","transport","protocol"]} args["install_requires"] = ['data-transport@git+https://dev.the-phi.com/git/steve/data-transport.git','tensorflow']