diff --git a/data/maker/__init__.py b/data/maker/__init__.py index 21e38c5..77effb3 100644 --- a/data/maker/__init__.py +++ b/data/maker/__init__.py @@ -121,7 +121,7 @@ class Trainer(Learner): # # At this point we have the binary matrix, we can initiate training # - beg = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + beg = datetime.now() #.strftime('%Y-%m-%d %H:%M:%S') gTrain = gan.Train(**_args) gTrain.apply() @@ -138,8 +138,9 @@ class Trainer(Learner): g = Generator(**_args) # g.run() - end = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - _logs = {'action':'train','input':{'start':beg,'end':end,"unique_counts":self._encoder._io[0]}} + end = datetime.now() #.strftime('%Y-%m-%d %H:%M:%S') + _min = float(timedelta(end,beg).seconds/ 60) + _logs = {'action':'train','input':{'start':beg.strftime('%Y-%m-%d %H:%M:%S'),'minutes':_min,"unique_counts":self._encoder._io[0]}} self.log(**_logs) self.generate = g if self.autopilot : @@ -158,6 +159,7 @@ class Generator (Learner): # self.network_args['candidates'] = int(_args['candidates']) if 'candidates' in _args else 1 filename = os.sep.join([self.network_args['logs'],'output',self.network_args['context'],'map.json']) + self.log(**{'action':'init-map','input':{'filename':filename,'exists':os.path.exists(filename)}}) file = open(filename) self._map = json.loads(file.read()) file.close() @@ -291,6 +293,12 @@ class Generator (Learner): writer.write(_df,schema=_schema) self.log(**{'action':'write','input':{'rows':N,'candidates':len(_candidates)}}) +class Shuffle(Trainer): + """ + This is a method that will yield data with low utility + """ + def __init__(self,**_args): + super().__init__(self) class factory : _infocache = {} @staticmethod diff --git a/data/maker/prepare/__init__.py b/data/maker/prepare/__init__.py index 3ef494e..1bf4872 100644 --- a/data/maker/prepare/__init__.py +++ b/data/maker/prepare/__init__.py @@ -96,7 +96,7 @@ class Input : # self._columns = cols if cols else _df.apply(lambda col:None if col[0] == row_count or col[0] < MIN_SPACE_SIZE else col.name).dropna().tolist() # self._io = _df.to_dict(orient='records') _df = self.df.nunique().T / self.df.shape[0] - self._io = pd.DataFrame(_df).to_dict(orient='records') + self._io = pd.DataFrame(_df).astype(float).to_dict(orient='records') except Exception as e: print (e) self._io = []