bug fix: epochs, process control (generator)
This commit is contained in:
parent
a1ac97fbca
commit
6e0f89cd3c
|
@ -508,7 +508,7 @@ class Train (GNet):
|
||||||
logs.append({"epoch":epoch,"distance":-w_sum/(self.STEPS_PER_EPOCH*2) })
|
logs.append({"epoch":epoch,"distance":-w_sum/(self.STEPS_PER_EPOCH*2) })
|
||||||
|
|
||||||
# if epoch % self.MAX_EPOCHS == 0:
|
# if epoch % self.MAX_EPOCHS == 0:
|
||||||
if epoch in [5,10,50, self.MAX_EPOCHS] :
|
if epoch in [5,10,20,50,75, self.MAX_EPOCHS] :
|
||||||
# suffix = "-".join(self.ATTRIBUTES['synthetic']) if isinstance(self.ATTRIBUTES['synthetic'],list) else self.ATTRIBUTES['synthetic']
|
# suffix = "-".join(self.ATTRIBUTES['synthetic']) if isinstance(self.ATTRIBUTES['synthetic'],list) else self.ATTRIBUTES['synthetic']
|
||||||
suffix = self.get.suffix()
|
suffix = self.get.suffix()
|
||||||
_name = os.sep.join([self.train_dir,suffix])
|
_name = os.sep.join([self.train_dir,suffix])
|
||||||
|
|
27
pipeline.py
27
pipeline.py
|
@ -178,13 +178,14 @@ class Components :
|
||||||
#
|
#
|
||||||
info = {"module":"generate","action":"io.metrics","input":{"rows":data_comp.shape[0],"partition":partition,"logs":[]}}
|
info = {"module":"generate","action":"io.metrics","input":{"rows":data_comp.shape[0],"partition":partition,"logs":[]}}
|
||||||
x = {}
|
x = {}
|
||||||
for name in args['columns'] :
|
# for name in args['columns'] :
|
||||||
ident = data_comp.apply(lambda row: 1*(row[name]==row[name+'_io']),axis=1).sum()
|
# ident = data_comp.apply(lambda row: 1*(row[name]==row[name+'_io']),axis=1).sum()
|
||||||
count = data_comp[name].unique().size
|
# count = data_comp[name].unique().size
|
||||||
_ident= data_comp.shape[1] - ident
|
# _ident= data_comp.shape[1] - ident
|
||||||
_count= data_comp[name+'_io'].unique().size
|
# _count= data_comp[name+'_io'].unique().size
|
||||||
|
# _count= len(set(data_comp[name+'_io'].values.tolist()))
|
||||||
|
|
||||||
info['input']['logs'] += [{"name":name,"identical":int(ident),"no_identical":int(_ident),"original_count":count,"synthetic_count":_count}]
|
# info['input']['logs'] += [{"name":name,"identical":int(ident),"no_identical":int(_ident),"original_count":count,"synthetic_count":_count}]
|
||||||
# for name in data_comp.columns.tolist() :
|
# for name in data_comp.columns.tolist() :
|
||||||
# g = pd.DataFrame(data_comp.groupby([name]).size())
|
# g = pd.DataFrame(data_comp.groupby([name]).size())
|
||||||
# g.columns = ['counts']
|
# g.columns = ['counts']
|
||||||
|
@ -192,17 +193,17 @@ class Components :
|
||||||
# g.index = np.arange(g.shape[0])
|
# g.index = np.arange(g.shape[0])
|
||||||
# logs.append({"name":name,"counts": g.to_dict(orient='records')})
|
# logs.append({"name":name,"counts": g.to_dict(orient='records')})
|
||||||
# info['input']['logs'] = logs
|
# info['input']['logs'] = logs
|
||||||
logger.write(info)
|
# logger.write(info)
|
||||||
|
|
||||||
|
|
||||||
base_cols = list(set(_args['data'].columns) - set(args['columns'])) #-- rebuilt the dataset (and store it)
|
base_cols = list(set(_args['data'].columns) - set(args['columns'])) #-- rebuilt the dataset (and store it)
|
||||||
cols = _dc.columns.tolist()
|
cols = _dc.columns.tolist()
|
||||||
for name in cols :
|
# for name in cols :
|
||||||
_args['data'][name] = _dc[name]
|
# _args['data'][name] = _dc[name]
|
||||||
info = {"module":"generate","action":"io","input":{"rows":_dc[name].shape[0],"name":name}}
|
# info = {"module":"generate","action":"io","input":{"rows":_dc[name].shape[0],"name":name}}
|
||||||
if partition != '' :
|
# if partition != '' :
|
||||||
info['partition'] = int(partition)
|
# info['partition'] = int(partition)
|
||||||
logger.write(info)
|
# logger.write(info)
|
||||||
|
|
||||||
# filename = os.sep.join([log_folder,'output',name+'.csv'])
|
# filename = os.sep.join([log_folder,'output',name+'.csv'])
|
||||||
# data_comp[[name]].to_csv(filename,index=False)
|
# data_comp[[name]].to_csv(filename,index=False)
|
||||||
|
|
Loading…
Reference in New Issue