bug fix: format, should be handled on post processing

2023-08-09 10:04:26 -05:00 · 2023-08-09 10:04:26 -05:00 · 5932513666
parent ca09ea0202
commit 5932513666
2 changed files with 3 additions and 63 deletions
--- a/data/maker/init.py
+++ b/data/maker/init.py
@ -432,67 +432,7 @@ class Generator (Learner):
            return _date.strftime(FORMAT)

        pass
-    def format(self,_df,_schema):
-        r = {}
    
-        for _item in _schema :
-            name = _item['name']
-            
-            if _item['type'].upper() in ['DATE','DATETIME','TIMESTAMP'] :
-                FORMAT = '%Y-%m-%d'
-                
-                try:
-                    #
-                    #-- Sometimes data isn't all it's meant to be
-                    SIZE = -1
-                    if 'format' in self.info and name in self.info['format'] :
-                        FORMAT = self.info['format'][name]
-                        SIZE = 10
-                    elif _item['type'] in ['DATETIME','TIMESTAMP'] :
-                            FORMAT = '%Y-%m-%-d %H:%M:%S'
-                            SIZE = 19
-                    
-                    # if SIZE > 0 :
-                        
-                    #     values = pd.to_datetime(_df[name], format=FORMAT).astype(np.datetime64)
-                    #     # _df[name] = [_date[:SIZE].strip() for _date in values]
-                        
-                       
-                    # _df[name] = _df[name].astype(str)
-                    r[name] = FORMAT
-                    # _df[name] = pd.to_datetime(_df[name], format=FORMAT) #.astype('datetime64[ns]')
-                    if _item['type'] in ['DATETIME','TIMESTAMP']:                   
-                        pass #;_df[name] = _df[name].fillna('').astype('datetime64[ns]')
-                    
-                except Exception as e:
-                    print (e)
-                    pass
-                finally:
-                    pass
-            else:
-                
-                #
-                # Because types are inferred on the basis of the sample being processed they can sometimes be wrong
-                #   To help disambiguate we add the schema information
-                _type = None
-                
-                if 'int' in _df[name].dtypes.name or 'int' in _item['type'].lower():                    
-                    _type = np.int
-                    
-                elif 'float' in _df[name].dtypes.name or 'float' in _item['type'].lower():
-                    _type = np.float
-                if _type :
-                    
-                    _df[name] = _df[name].fillna(0).replace(' ',0).replace('',0).replace('NA',0).replace('nan',0).astype(_type)
-                # else:
-                #     _df[name] = _df[name].astype(str)
-        # _df = _df.replace('NaT','').replace('NA','')
-        
-        if r :
-            self.log(**{'action':'format','input':r})
-        return _df
-
-        pass
    def post(self,_candidates):
        
        if 'target'  in self.store :
@ -540,7 +480,7 @@ class Generator (Learner):
           
            
            
-            _df = self.format(_df,_schema)
+            # _df = self.format(_df,_schema)
            # _log = [{"name":_schema[i]['name'],"dataframe":_df[_df.columns[i]].dtypes.name,"schema":_schema[i]['type']} for i in np.arange(len(_schema)) ]
            self.log(**{"action":"consolidate","input":{"rows":N,"candidate":_candidates.index(_iodf)}})

--- a/data/maker/version.py
+++ b/data/maker/version.py
@ -1 +1 @@
-__version__='1.7.4'
+__version__='1.7.5'