From 93ebe8ee1b2b20f29c80281799b48eec65bf90eb Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Thu, 14 Apr 2022 18:07:17 -0500 Subject: [PATCH] bugfix: date type casting bug --- data/maker/__init__.py | 43 ++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/data/maker/__init__.py b/data/maker/__init__.py index 184bca4..6c2a463 100644 --- a/data/maker/__init__.py +++ b/data/maker/__init__.py @@ -235,6 +235,7 @@ class Generator (Learner): if _args['year'] in ['',None,np.nan] : return None year = int(_args['year']) + offset = _args['offset'] if 'offset' in _args else 0 month = np.random.randint(1,13) if month == 2: @@ -244,13 +245,13 @@ class Generator (Learner): day = np.random.randint(1,_end) #-- synthetic date - _date = datetime(year=year,month=month,day=day) #,minute=0,hour=0,second=0) - FORMAT = '%Y-%d-%m' + _date = datetime(year=year,month=month,day=day,minute=0,hour=0,second=0) + FORMAT = '%Y-%m-%d' _name = _args['field'] if 'field' in _args else None if 'format' in self.info and _name in self.info['format']: - _name = _args['field'] + # _name = _args['field'] FORMAT = self.info['format'][_name] - + # print ([_name,FORMAT, _date.strftime(FORMAT)]) r = [] @@ -258,7 +259,7 @@ class Generator (Learner): r = [_date.strftime(FORMAT)] for _delta in offset : _date = _date + timedelta(_delta) - r.append(_date.strftime(FORMAT)) + r.append(_date.strptime(FORMAT)) return r else: return _date.strftime(FORMAT) @@ -270,16 +271,19 @@ class Generator (Learner): name = _item['name'] if _item['type'].upper() in ['DATE','DATETIME','TIMESTAMP'] : - FORMAT = '%Y-%d-%m' + FORMAT = '%Y-%m-%d' + if 'format' in self.info and name in self.info['format'] : FORMAT = self.info['format'][name] - else: - if _item['type'] == ['DATETIME','TIMESTAMP'] : - FORMAT = '%Y-%d-%m %H:%M:%S' + elif _item['type'] in ['DATETIME','TIMESTAMP'] : + FORMAT = '%Y-%m-%d %H:%M:%S' + r[name] = FORMAT - - - _df[name] = pd.to_datetime(_df[name], format=FORMAT).astype('datetime64[ns]') + _df[name] = pd.to_datetime(_df[name], format=FORMAT) #.astype('datetime64[ns]') + if _item['type'] in ['DATETIME','TIMESTAMP']: + _df[name] = _df[name].astype('datetime64[ns]') + else: + _df[name] = _df[name].astype(str) if r : self.log(**{'action':'format','input':r}) return _df @@ -309,10 +313,12 @@ class Generator (Learner): iname = self.info['make_date'][name] years = _df[iname] - _dates = [self.make_date(year=year,field=name) for year in years] - if _dates : + _dates = [self.make_date(year=_year,field=name) for _year in years] + if _dates : _df[name] = _dates - + + + _schema = self.get_schema() _schema = [{'name':_item.name,'type':_item.field_type} for _item in _schema] _df = self.format(_df,_schema) @@ -341,7 +347,12 @@ class Shuffle(Generator): _log = {'action':'io-data','input':{'candidates':1,'rows':int(self._df.shape[0])}} self.log(**_log) - self.post([self._df]) + try: + self.post([self._df]) + self.log(**{'action':'completed','input':{'candidates':1,'rows':int(self._df.shape[0])}}) + except Exception as e : + # print (e) + self.log(**{'action':'failed','input':{'msg':e,'info':self.info}}) class factory : _infocache = {} @staticmethod