From 593251366614f3eaa1b9490ae3bf7ca627ae06bf Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Wed, 9 Aug 2023 10:04:26 -0500 Subject: [PATCH] bug fix: format, should be handled on post processing --- data/maker/__init__.py | 64 ++---------------------------------------- data/maker/version.py | 2 +- 2 files changed, 3 insertions(+), 63 deletions(-) diff --git a/data/maker/__init__.py b/data/maker/__init__.py index e3af9de..888bbc0 100644 --- a/data/maker/__init__.py +++ b/data/maker/__init__.py @@ -432,67 +432,7 @@ class Generator (Learner): return _date.strftime(FORMAT) pass - def format(self,_df,_schema): - r = {} - - for _item in _schema : - name = _item['name'] - - if _item['type'].upper() in ['DATE','DATETIME','TIMESTAMP'] : - FORMAT = '%Y-%m-%d' - - try: - # - #-- Sometimes data isn't all it's meant to be - SIZE = -1 - if 'format' in self.info and name in self.info['format'] : - FORMAT = self.info['format'][name] - SIZE = 10 - elif _item['type'] in ['DATETIME','TIMESTAMP'] : - FORMAT = '%Y-%m-%-d %H:%M:%S' - SIZE = 19 - - # if SIZE > 0 : - - # values = pd.to_datetime(_df[name], format=FORMAT).astype(np.datetime64) - # # _df[name] = [_date[:SIZE].strip() for _date in values] - - - # _df[name] = _df[name].astype(str) - r[name] = FORMAT - # _df[name] = pd.to_datetime(_df[name], format=FORMAT) #.astype('datetime64[ns]') - if _item['type'] in ['DATETIME','TIMESTAMP']: - pass #;_df[name] = _df[name].fillna('').astype('datetime64[ns]') - - except Exception as e: - print (e) - pass - finally: - pass - else: - - # - # Because types are inferred on the basis of the sample being processed they can sometimes be wrong - # To help disambiguate we add the schema information - _type = None - - if 'int' in _df[name].dtypes.name or 'int' in _item['type'].lower(): - _type = np.int - - elif 'float' in _df[name].dtypes.name or 'float' in _item['type'].lower(): - _type = np.float - if _type : - - _df[name] = _df[name].fillna(0).replace(' ',0).replace('',0).replace('NA',0).replace('nan',0).astype(_type) - # else: - # _df[name] = _df[name].astype(str) - # _df = _df.replace('NaT','').replace('NA','') - - if r : - self.log(**{'action':'format','input':r}) - return _df - - pass + def post(self,_candidates): if 'target' in self.store : @@ -540,7 +480,7 @@ class Generator (Learner): - _df = self.format(_df,_schema) + # _df = self.format(_df,_schema) # _log = [{"name":_schema[i]['name'],"dataframe":_df[_df.columns[i]].dtypes.name,"schema":_schema[i]['type']} for i in np.arange(len(_schema)) ] self.log(**{"action":"consolidate","input":{"rows":N,"candidate":_candidates.index(_iodf)}}) diff --git a/data/maker/version.py b/data/maker/version.py index 625b565..2dfabf1 100644 --- a/data/maker/version.py +++ b/data/maker/version.py @@ -1 +1 @@ -__version__='1.7.4' +__version__='1.7.5'