bugfix: date/timestamp conversions

This commit is contained in:
Steve Nyemba 2022-04-14 13:37:47 -05:00
parent febcaa5883
commit 0e4148d4e7
1 changed files with 13 additions and 10 deletions

View File

@ -64,7 +64,7 @@ class Learner(Process):
# self.logpath= _args['logpath'] if 'logpath' in _args else 'logs' # self.logpath= _args['logpath'] if 'logpath' in _args else 'logs'
# sel.max_epoc # sel.max_epoc
def log(self,**_args): def log(self,**_args):
self.lock.acquire() # self.lock.acquire()
try: try:
logger = transport.factory.instance(**self.store['logger']) if 'logger' in self.store else transport.factory.instance(provider='console',context='write',lock=True) logger = transport.factory.instance(**self.store['logger']) if 'logger' in self.store else transport.factory.instance(provider='console',context='write',lock=True)
_args = dict({'ndx':self.ndx,'module':self.name,'table':self.info['from'],'info':self.info['context'],**_args}) _args = dict({'ndx':self.ndx,'module':self.name,'table':self.info['from'],'info':self.info['context'],**_args})
@ -78,7 +78,8 @@ class Learner(Process):
print (e) print (e)
pass pass
finally: finally:
self.lock.release() # self.lock.release()
pass
def get_schema(self): def get_schema(self):
if self.store['source']['provider'] != 'bigquery' : if self.store['source']['provider'] != 'bigquery' :
return [{'name':self._df.dtypes.index.tolist()[i],'type':self._df.dtypes.astype(str).tolist()[i]}for i in range(self._df.dtypes.shape[0])] return [{'name':self._df.dtypes.index.tolist()[i],'type':self._df.dtypes.astype(str).tolist()[i]}for i in range(self._df.dtypes.shape[0])]
@ -222,7 +223,7 @@ class Generator (Learner):
values[index] = values[index].astype(_type) values[index] = values[index].astype(_type)
x += values.tolist() x += values.tolist()
if x : if x :
_log['input']['diff_pct'] = 100 * (1 - np.divide( (_df[name].dropna() == x).sum(),_df[name].dropna().size)) _log['input']['identical_percentage'] = 100 * (1 - np.divide( (_df[name].dropna() == x).sum(),_df[name].dropna().size))
_df[name] = x #np.array(x,dtype=np.int64) if 'int' in _type else np.arry(x,dtype=np.float64) _df[name] = x #np.array(x,dtype=np.int64) if 'int' in _type else np.arry(x,dtype=np.float64)
self.log(**_log) self.log(**_log)
@ -243,14 +244,15 @@ class Generator (Learner):
day = np.random.randint(1,_end) day = np.random.randint(1,_end)
#-- synthetic date #-- synthetic date
_date = datetime(year=year,month=month,day=day) _date = datetime(year=year,month=month,day=day) #,minute=0,hour=0,second=0)
FORMAT = '%Y-%m-%d' FORMAT = '%Y-%d-%m'
if 'format' in self.info and 'field' in _args and _args['field'] in self.info['format']: _name = _args['field'] if 'field' in _args else None
if 'format' in self.info and _name in self.info['format']:
_name = _args['field'] _name = _args['field']
FORMAT = self.info['format'][_name] FORMAT = self.info['format'][_name]
# print ([_name,FORMAT, _date.strftime(FORMAT)])
r = [] r = []
if offset : if offset :
r = [_date.strftime(FORMAT)] r = [_date.strftime(FORMAT)]
@ -277,7 +279,7 @@ class Generator (Learner):
r[name] = FORMAT r[name] = FORMAT
_df[name] = pd.to_datetime(_df[name], format=FORMAT).astype(str) #.astype('datetime64[ns]') _df[name] = pd.to_datetime(_df[name], format=FORMAT).astype('datetime64[ns]')
if r : if r :
self.log(**{'action':'format','input':r}) self.log(**{'action':'format','input':r})
return _df return _df
@ -308,12 +310,13 @@ class Generator (Learner):
years = _df[iname] years = _df[iname]
_dates = [self.make_date(year=year,field=name) for year in years] _dates = [self.make_date(year=year,field=name) for year in years]
if _dates : if _dates :
_df[name] = _dates _df[name] = _dates
_schema = self.get_schema() _schema = self.get_schema()
_schema = [{'name':_item.name,'type':_item.field_type} for _item in _schema] _schema = [{'name':_item.name,'type':_item.field_type} for _item in _schema]
_df = self.format(_df,_schema) _df = self.format(_df,_schema)
writer.write(_df,schema=_schema) writer.write(_df,schema=_schema)
self.log(**{'action':'write','input':{'rows':N,'candidates':len(_candidates)}}) self.log(**{'action':'write','input':{'rows':N,'candidates':len(_candidates)}})