bugfix: date type casting bug

This commit is contained in:
Steve Nyemba 2022-04-14 18:07:17 -05:00
parent 0e4148d4e7
commit 93ebe8ee1b
1 changed files with 27 additions and 16 deletions

View File

@ -235,6 +235,7 @@ class Generator (Learner):
if _args['year'] in ['',None,np.nan] :
return None
year = int(_args['year'])
offset = _args['offset'] if 'offset' in _args else 0
month = np.random.randint(1,13)
if month == 2:
@ -244,11 +245,11 @@ class Generator (Learner):
day = np.random.randint(1,_end)
#-- synthetic date
_date = datetime(year=year,month=month,day=day) #,minute=0,hour=0,second=0)
FORMAT = '%Y-%d-%m'
_date = datetime(year=year,month=month,day=day,minute=0,hour=0,second=0)
FORMAT = '%Y-%m-%d'
_name = _args['field'] if 'field' in _args else None
if 'format' in self.info and _name in self.info['format']:
_name = _args['field']
# _name = _args['field']
FORMAT = self.info['format'][_name]
@ -258,7 +259,7 @@ class Generator (Learner):
r = [_date.strftime(FORMAT)]
for _delta in offset :
_date = _date + timedelta(_delta)
r.append(_date.strftime(FORMAT))
r.append(_date.strptime(FORMAT))
return r
else:
return _date.strftime(FORMAT)
@ -270,16 +271,19 @@ class Generator (Learner):
name = _item['name']
if _item['type'].upper() in ['DATE','DATETIME','TIMESTAMP'] :
FORMAT = '%Y-%d-%m'
FORMAT = '%Y-%m-%d'
if 'format' in self.info and name in self.info['format'] :
FORMAT = self.info['format'][name]
else:
if _item['type'] == ['DATETIME','TIMESTAMP'] :
FORMAT = '%Y-%d-%m %H:%M:%S'
elif _item['type'] in ['DATETIME','TIMESTAMP'] :
FORMAT = '%Y-%m-%d %H:%M:%S'
r[name] = FORMAT
_df[name] = pd.to_datetime(_df[name], format=FORMAT).astype('datetime64[ns]')
_df[name] = pd.to_datetime(_df[name], format=FORMAT) #.astype('datetime64[ns]')
if _item['type'] in ['DATETIME','TIMESTAMP']:
_df[name] = _df[name].astype('datetime64[ns]')
else:
_df[name] = _df[name].astype(str)
if r :
self.log(**{'action':'format','input':r})
return _df
@ -309,10 +313,12 @@ class Generator (Learner):
iname = self.info['make_date'][name]
years = _df[iname]
_dates = [self.make_date(year=year,field=name) for year in years]
_dates = [self.make_date(year=_year,field=name) for _year in years]
if _dates :
_df[name] = _dates
_schema = self.get_schema()
_schema = [{'name':_item.name,'type':_item.field_type} for _item in _schema]
_df = self.format(_df,_schema)
@ -341,7 +347,12 @@ class Shuffle(Generator):
_log = {'action':'io-data','input':{'candidates':1,'rows':int(self._df.shape[0])}}
self.log(**_log)
try:
self.post([self._df])
self.log(**{'action':'completed','input':{'candidates':1,'rows':int(self._df.shape[0])}})
except Exception as e :
# print (e)
self.log(**{'action':'failed','input':{'msg':e,'info':self.info}})
class factory :
_infocache = {}
@staticmethod