bug fix: format, should be handled on post processing
This commit is contained in:
parent
ca09ea0202
commit
5932513666
|
@ -432,67 +432,7 @@ class Generator (Learner):
|
||||||
return _date.strftime(FORMAT)
|
return _date.strftime(FORMAT)
|
||||||
|
|
||||||
pass
|
pass
|
||||||
def format(self,_df,_schema):
|
|
||||||
r = {}
|
|
||||||
|
|
||||||
for _item in _schema :
|
|
||||||
name = _item['name']
|
|
||||||
|
|
||||||
if _item['type'].upper() in ['DATE','DATETIME','TIMESTAMP'] :
|
|
||||||
FORMAT = '%Y-%m-%d'
|
|
||||||
|
|
||||||
try:
|
|
||||||
#
|
|
||||||
#-- Sometimes data isn't all it's meant to be
|
|
||||||
SIZE = -1
|
|
||||||
if 'format' in self.info and name in self.info['format'] :
|
|
||||||
FORMAT = self.info['format'][name]
|
|
||||||
SIZE = 10
|
|
||||||
elif _item['type'] in ['DATETIME','TIMESTAMP'] :
|
|
||||||
FORMAT = '%Y-%m-%-d %H:%M:%S'
|
|
||||||
SIZE = 19
|
|
||||||
|
|
||||||
# if SIZE > 0 :
|
|
||||||
|
|
||||||
# values = pd.to_datetime(_df[name], format=FORMAT).astype(np.datetime64)
|
|
||||||
# # _df[name] = [_date[:SIZE].strip() for _date in values]
|
|
||||||
|
|
||||||
|
|
||||||
# _df[name] = _df[name].astype(str)
|
|
||||||
r[name] = FORMAT
|
|
||||||
# _df[name] = pd.to_datetime(_df[name], format=FORMAT) #.astype('datetime64[ns]')
|
|
||||||
if _item['type'] in ['DATETIME','TIMESTAMP']:
|
|
||||||
pass #;_df[name] = _df[name].fillna('').astype('datetime64[ns]')
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print (e)
|
|
||||||
pass
|
|
||||||
finally:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
|
|
||||||
#
|
|
||||||
# Because types are inferred on the basis of the sample being processed they can sometimes be wrong
|
|
||||||
# To help disambiguate we add the schema information
|
|
||||||
_type = None
|
|
||||||
|
|
||||||
if 'int' in _df[name].dtypes.name or 'int' in _item['type'].lower():
|
|
||||||
_type = np.int
|
|
||||||
|
|
||||||
elif 'float' in _df[name].dtypes.name or 'float' in _item['type'].lower():
|
|
||||||
_type = np.float
|
|
||||||
if _type :
|
|
||||||
|
|
||||||
_df[name] = _df[name].fillna(0).replace(' ',0).replace('',0).replace('NA',0).replace('nan',0).astype(_type)
|
|
||||||
# else:
|
|
||||||
# _df[name] = _df[name].astype(str)
|
|
||||||
# _df = _df.replace('NaT','').replace('NA','')
|
|
||||||
|
|
||||||
if r :
|
|
||||||
self.log(**{'action':'format','input':r})
|
|
||||||
return _df
|
|
||||||
|
|
||||||
pass
|
|
||||||
def post(self,_candidates):
|
def post(self,_candidates):
|
||||||
|
|
||||||
if 'target' in self.store :
|
if 'target' in self.store :
|
||||||
|
@ -540,7 +480,7 @@ class Generator (Learner):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
_df = self.format(_df,_schema)
|
# _df = self.format(_df,_schema)
|
||||||
# _log = [{"name":_schema[i]['name'],"dataframe":_df[_df.columns[i]].dtypes.name,"schema":_schema[i]['type']} for i in np.arange(len(_schema)) ]
|
# _log = [{"name":_schema[i]['name'],"dataframe":_df[_df.columns[i]].dtypes.name,"schema":_schema[i]['type']} for i in np.arange(len(_schema)) ]
|
||||||
self.log(**{"action":"consolidate","input":{"rows":N,"candidate":_candidates.index(_iodf)}})
|
self.log(**{"action":"consolidate","input":{"rows":N,"candidate":_candidates.index(_iodf)}})
|
||||||
|
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
__version__='1.7.4'
|
__version__='1.7.5'
|
||||||
|
|
Loading…
Reference in New Issue