bug fix: epochs, process control (generator)
This commit is contained in:
parent
a1ac97fbca
commit
6e0f89cd3c
|
@ -508,7 +508,7 @@ class Train (GNet):
|
|||
logs.append({"epoch":epoch,"distance":-w_sum/(self.STEPS_PER_EPOCH*2) })
|
||||
|
||||
# if epoch % self.MAX_EPOCHS == 0:
|
||||
if epoch in [5,10,50, self.MAX_EPOCHS] :
|
||||
if epoch in [5,10,20,50,75, self.MAX_EPOCHS] :
|
||||
# suffix = "-".join(self.ATTRIBUTES['synthetic']) if isinstance(self.ATTRIBUTES['synthetic'],list) else self.ATTRIBUTES['synthetic']
|
||||
suffix = self.get.suffix()
|
||||
_name = os.sep.join([self.train_dir,suffix])
|
||||
|
|
27
pipeline.py
27
pipeline.py
|
@ -178,13 +178,14 @@ class Components :
|
|||
#
|
||||
info = {"module":"generate","action":"io.metrics","input":{"rows":data_comp.shape[0],"partition":partition,"logs":[]}}
|
||||
x = {}
|
||||
for name in args['columns'] :
|
||||
ident = data_comp.apply(lambda row: 1*(row[name]==row[name+'_io']),axis=1).sum()
|
||||
count = data_comp[name].unique().size
|
||||
_ident= data_comp.shape[1] - ident
|
||||
_count= data_comp[name+'_io'].unique().size
|
||||
# for name in args['columns'] :
|
||||
# ident = data_comp.apply(lambda row: 1*(row[name]==row[name+'_io']),axis=1).sum()
|
||||
# count = data_comp[name].unique().size
|
||||
# _ident= data_comp.shape[1] - ident
|
||||
# _count= data_comp[name+'_io'].unique().size
|
||||
# _count= len(set(data_comp[name+'_io'].values.tolist()))
|
||||
|
||||
info['input']['logs'] += [{"name":name,"identical":int(ident),"no_identical":int(_ident),"original_count":count,"synthetic_count":_count}]
|
||||
# info['input']['logs'] += [{"name":name,"identical":int(ident),"no_identical":int(_ident),"original_count":count,"synthetic_count":_count}]
|
||||
# for name in data_comp.columns.tolist() :
|
||||
# g = pd.DataFrame(data_comp.groupby([name]).size())
|
||||
# g.columns = ['counts']
|
||||
|
@ -192,17 +193,17 @@ class Components :
|
|||
# g.index = np.arange(g.shape[0])
|
||||
# logs.append({"name":name,"counts": g.to_dict(orient='records')})
|
||||
# info['input']['logs'] = logs
|
||||
logger.write(info)
|
||||
# logger.write(info)
|
||||
|
||||
|
||||
base_cols = list(set(_args['data'].columns) - set(args['columns'])) #-- rebuilt the dataset (and store it)
|
||||
cols = _dc.columns.tolist()
|
||||
for name in cols :
|
||||
_args['data'][name] = _dc[name]
|
||||
info = {"module":"generate","action":"io","input":{"rows":_dc[name].shape[0],"name":name}}
|
||||
if partition != '' :
|
||||
info['partition'] = int(partition)
|
||||
logger.write(info)
|
||||
# for name in cols :
|
||||
# _args['data'][name] = _dc[name]
|
||||
# info = {"module":"generate","action":"io","input":{"rows":_dc[name].shape[0],"name":name}}
|
||||
# if partition != '' :
|
||||
# info['partition'] = int(partition)
|
||||
# logger.write(info)
|
||||
|
||||
# filename = os.sep.join([log_folder,'output',name+'.csv'])
|
||||
# data_comp[[name]].to_csv(filename,index=False)
|
||||
|
|
Loading…
Reference in New Issue