diff --git a/data/gan.py b/data/gan.py index 41daa3d..c54f5bd 100644 --- a/data/gan.py +++ b/data/gan.py @@ -508,7 +508,7 @@ class Train (GNet): logs.append({"epoch":epoch,"distance":-w_sum/(self.STEPS_PER_EPOCH*2) }) # if epoch % self.MAX_EPOCHS == 0: - if epoch in [5,10,50, self.MAX_EPOCHS] : + if epoch in [5,10,20,50,75, self.MAX_EPOCHS] : # suffix = "-".join(self.ATTRIBUTES['synthetic']) if isinstance(self.ATTRIBUTES['synthetic'],list) else self.ATTRIBUTES['synthetic'] suffix = self.get.suffix() _name = os.sep.join([self.train_dir,suffix]) diff --git a/pipeline.py b/pipeline.py index 0d19e60..884609f 100644 --- a/pipeline.py +++ b/pipeline.py @@ -178,13 +178,14 @@ class Components : # info = {"module":"generate","action":"io.metrics","input":{"rows":data_comp.shape[0],"partition":partition,"logs":[]}} x = {} - for name in args['columns'] : - ident = data_comp.apply(lambda row: 1*(row[name]==row[name+'_io']),axis=1).sum() - count = data_comp[name].unique().size - _ident= data_comp.shape[1] - ident - _count= data_comp[name+'_io'].unique().size + # for name in args['columns'] : + # ident = data_comp.apply(lambda row: 1*(row[name]==row[name+'_io']),axis=1).sum() + # count = data_comp[name].unique().size + # _ident= data_comp.shape[1] - ident + # _count= data_comp[name+'_io'].unique().size + # _count= len(set(data_comp[name+'_io'].values.tolist())) - info['input']['logs'] += [{"name":name,"identical":int(ident),"no_identical":int(_ident),"original_count":count,"synthetic_count":_count}] + # info['input']['logs'] += [{"name":name,"identical":int(ident),"no_identical":int(_ident),"original_count":count,"synthetic_count":_count}] # for name in data_comp.columns.tolist() : # g = pd.DataFrame(data_comp.groupby([name]).size()) # g.columns = ['counts'] @@ -192,17 +193,17 @@ class Components : # g.index = np.arange(g.shape[0]) # logs.append({"name":name,"counts": g.to_dict(orient='records')}) # info['input']['logs'] = logs - logger.write(info) + # logger.write(info) base_cols = list(set(_args['data'].columns) - set(args['columns'])) #-- rebuilt the dataset (and store it) cols = _dc.columns.tolist() - for name in cols : - _args['data'][name] = _dc[name] - info = {"module":"generate","action":"io","input":{"rows":_dc[name].shape[0],"name":name}} - if partition != '' : - info['partition'] = int(partition) - logger.write(info) + # for name in cols : + # _args['data'][name] = _dc[name] + # info = {"module":"generate","action":"io","input":{"rows":_dc[name].shape[0],"name":name}} + # if partition != '' : + # info['partition'] = int(partition) + # logger.write(info) # filename = os.sep.join([log_folder,'output',name+'.csv']) # data_comp[[name]].to_csv(filename,index=False)