bug fix: epochs, process control (generator)

2020-03-26 23:39:59 -05:00 · 2020-03-26 23:39:59 -05:00 · 6e0f89cd3c
parent a1ac97fbca
commit 6e0f89cd3c
2 changed files with 15 additions and 14 deletions
--- a/data/gan.py
+++ b/data/gan.py
@ -508,7 +508,7 @@ class Train (GNet):
                                        logs.append({"epoch":epoch,"distance":-w_sum/(self.STEPS_PER_EPOCH*2) })
                                        # if epoch % self.MAX_EPOCHS == 0:
-                                        if epoch in [5,10,50, self.MAX_EPOCHS] :
+                                        if epoch in [5,10,20,50,75, self.MAX_EPOCHS] :
                                                # suffix = "-".join(self.ATTRIBUTES['synthetic']) if isinstance(self.ATTRIBUTES['synthetic'],list) else self.ATTRIBUTES['synthetic']
                                                suffix = self.get.suffix()
                                                _name  = os.sep.join([self.train_dir,suffix])
--- a/pipeline.py
+++ b/pipeline.py
@ -178,13 +178,14 @@ class Components :
 		#
 		info = {"module":"generate","action":"io.metrics","input":{"rows":data_comp.shape[0],"partition":partition,"logs":[]}}
 		x = {}
-		for name in args['columns'] :
+		# for name in args['columns'] :
-			ident = data_comp.apply(lambda row: 1*(row[name]==row[name+'_io']),axis=1).sum()
+		# 	ident = data_comp.apply(lambda row: 1*(row[name]==row[name+'_io']),axis=1).sum()
-			count = data_comp[name].unique().size
+		# 	count = data_comp[name].unique().size
-			_ident= data_comp.shape[1] - ident
+		# 	_ident= data_comp.shape[1] - ident
-			_count= data_comp[name+'_io'].unique().size
+		# 	_count= data_comp[name+'_io'].unique().size
 		# 	_count= len(set(data_comp[name+'_io'].values.tolist()))
-			info['input']['logs'] += [{"name":name,"identical":int(ident),"no_identical":int(_ident),"original_count":count,"synthetic_count":_count}]
+		# 	info['input']['logs'] += [{"name":name,"identical":int(ident),"no_identical":int(_ident),"original_count":count,"synthetic_count":_count}]
 		# for name in data_comp.columns.tolist() :
 			# g = pd.DataFrame(data_comp.groupby([name]).size())						
 			# g.columns = ['counts']
@ -192,17 +193,17 @@ class Components :
 			# g.index = np.arange(g.shape[0])
 			# logs.append({"name":name,"counts": g.to_dict(orient='records')})
 		# info['input']['logs'] = logs
-		logger.write(info)
+		# logger.write(info)
 		base_cols = list(set(_args['data'].columns) - set(args['columns']))	#-- rebuilt the dataset (and store it)
 		cols = _dc.columns.tolist()
-		for name in cols :
+		# for name in cols :
-			_args['data'][name] = _dc[name]
+		# 	_args['data'][name] = _dc[name]
-			info = {"module":"generate","action":"io","input":{"rows":_dc[name].shape[0],"name":name}}
+		# 	info = {"module":"generate","action":"io","input":{"rows":_dc[name].shape[0],"name":name}}
-			if partition != '' :
+		# 	if partition != '' :
-				info['partition'] = int(partition)
+		# 		info['partition'] = int(partition)
-			logger.write(info)
+		# 	logger.write(info)
 			# filename = os.sep.join([log_folder,'output',name+'.csv'])
 			# data_comp[[name]].to_csv(filename,index=False)