bug fix: format, should be handled on post processing

2023-08-09 10:04:26 -05:00 · 2023-08-09 10:04:26 -05:00 · 5932513666
parent ca09ea0202
commit 5932513666
2 changed files with 3 additions and 63 deletions
--- a/data/maker/init.py
+++ b/data/maker/init.py
@ -432,67 +432,7 @@ class Generator (Learner):
            return _date.strftime(FORMAT)
        pass
    def format(self,_df,_schema):
        r = {}
        for _item in _schema :
            name = _item['name']
            if _item['type'].upper() in ['DATE','DATETIME','TIMESTAMP'] :
                FORMAT = '%Y-%m-%d'
                try:
                    #
                    #-- Sometimes data isn't all it's meant to be
                    SIZE = -1
                    if 'format' in self.info and name in self.info['format'] :
                        FORMAT = self.info['format'][name]
                        SIZE = 10
                    elif _item['type'] in ['DATETIME','TIMESTAMP'] :
                            FORMAT = '%Y-%m-%-d %H:%M:%S'
                            SIZE = 19
                    # if SIZE > 0 :
                    #     values = pd.to_datetime(_df[name], format=FORMAT).astype(np.datetime64)
                    #     # _df[name] = [_date[:SIZE].strip() for _date in values]
                    # _df[name] = _df[name].astype(str)
                    r[name] = FORMAT
                    # _df[name] = pd.to_datetime(_df[name], format=FORMAT) #.astype('datetime64[ns]')
                    if _item['type'] in ['DATETIME','TIMESTAMP']:                   
                        pass #;_df[name] = _df[name].fillna('').astype('datetime64[ns]')
                except Exception as e:
                    print (e)
                    pass
                finally:
                    pass
            else:
                #
                # Because types are inferred on the basis of the sample being processed they can sometimes be wrong
                #   To help disambiguate we add the schema information
                _type = None
                if 'int' in _df[name].dtypes.name or 'int' in _item['type'].lower():                    
                    _type = np.int
                elif 'float' in _df[name].dtypes.name or 'float' in _item['type'].lower():
                    _type = np.float
                if _type :
                    _df[name] = _df[name].fillna(0).replace(' ',0).replace('',0).replace('NA',0).replace('nan',0).astype(_type)
                # else:
                #     _df[name] = _df[name].astype(str)
        # _df = _df.replace('NaT','').replace('NA','')
        if r :
            self.log(**{'action':'format','input':r})
        return _df
        pass
    def post(self,_candidates):
        if 'target'  in self.store :
@ -540,7 +480,7 @@ class Generator (Learner):
-            _df = self.format(_df,_schema)
+            # _df = self.format(_df,_schema)
            # _log = [{"name":_schema[i]['name'],"dataframe":_df[_df.columns[i]].dtypes.name,"schema":_schema[i]['type']} for i in np.arange(len(_schema)) ]
            self.log(**{"action":"consolidate","input":{"rows":N,"candidate":_candidates.index(_iodf)}})
--- a/data/maker/version.py
+++ b/data/maker/version.py
@ -1 +1 @@
-__version__='1.7.4'
+__version__='1.7.5'