bug fix, with logs and partitioning
This commit is contained in:
parent
266bdc8bd2
commit
e07c355388
11
data/gan.py
11
data/gan.py
|
@ -59,6 +59,7 @@ class GNet :
|
|||
self.logs = {}
|
||||
|
||||
self.NUM_GPUS = 1 if 'num_gpu' not in args else args['num_gpu']
|
||||
self.PARTITION = args['partition']
|
||||
# if self.NUM_GPUS > 1 :
|
||||
# os.environ['CUDA_VISIBLE_DEVICES'] = "4"
|
||||
|
||||
|
@ -356,7 +357,7 @@ class Train (GNet):
|
|||
self.meta = self.log_meta()
|
||||
if(self.logger):
|
||||
|
||||
self.logger.write({"module":"gan-train","action":"start","input":self.meta} )
|
||||
self.logger.write({"module":"gan-train","action":"start","input":{"partition":self.PARTITION,"meta":self.meta} } )
|
||||
|
||||
# self.log (real_shape=list(self._REAL.shape),label_shape = self._LABEL.shape,meta_data=self.meta)
|
||||
def load_meta(self, column):
|
||||
|
@ -408,7 +409,7 @@ class Train (GNet):
|
|||
# losses = tf.compat.v1.get_collection(flag, scope)
|
||||
|
||||
total_loss = tf.add_n(losses, name='total_loss')
|
||||
|
||||
print (total_loss)
|
||||
return total_loss, w
|
||||
def input_fn(self):
|
||||
"""
|
||||
|
@ -514,7 +515,7 @@ class Train (GNet):
|
|||
#
|
||||
#
|
||||
if self.logger :
|
||||
row = {"module":"gan-train","action":"logs","input":logs} #,"model":pickle.dump(sess)}
|
||||
row = {"module":"gan-train","action":"logs","input":{"partition":self.PARTITION,"logs":logs}} #,"model":pickle.dump(sess)}
|
||||
self.logger.write(row)
|
||||
#
|
||||
# @TODO:
|
||||
|
@ -623,6 +624,7 @@ class Predict(GNet):
|
|||
|
||||
# r = np.zeros((self.ROW_COUNT,len(columns)))
|
||||
# r = np.zeros(self.ROW_COUNT)
|
||||
|
||||
if self.logger :
|
||||
info = {"found":len(found),"rows":df.shape[0],"cols":df.shape[1],"expected":len(self.values)}
|
||||
if INDEX > 0 :
|
||||
|
@ -631,6 +633,7 @@ class Predict(GNet):
|
|||
|
||||
info['selected'] = -1
|
||||
info['ratio'] = __ratio
|
||||
info['partition'] = self.PARTITION
|
||||
self.logger.write({"module":"gan-generate","action":"generate","input":info})
|
||||
df.columns = self.values
|
||||
if len(found) or df.columns.size == len(self.values):
|
||||
|
@ -658,7 +661,7 @@ class Predict(GNet):
|
|||
df = df[columns[0]].append(pd.Series(missing))
|
||||
if self.logger :
|
||||
|
||||
info= {"missing": i.size,"rows":df.shape[0],"cols":1}
|
||||
info= {"missing": i.size,"rows":df.shape[0],"cols":1,'partition':self.PARTITION}
|
||||
self.logger.write({"module":"gan-generate","action":"compile.io","input":info})
|
||||
|
||||
|
||||
|
|
|
@ -111,7 +111,7 @@ def train (**args) :
|
|||
BIN_SIZE = 4 if 'bin_size' not in args else int(args['bin_size'])
|
||||
args['real'] = ContinuousToDiscrete.binary(df[col],BIN_SIZE).astype(np.float32)
|
||||
else:
|
||||
df.to_csv('tmp-'+args['logs'].replace('/','_')+'-'+col+'.csv',index=False)
|
||||
# df.to_csv('tmp-'+args['logs'].replace('/','_')+'-'+col+'.csv',index=False)
|
||||
# print (df[col].dtypes)
|
||||
# print (df[col].dropna/(axis=1).unique())
|
||||
args['real'] = pd.get_dummies(df[col].dropna()).astype(np.float32).values
|
||||
|
@ -124,7 +124,7 @@ def train (**args) :
|
|||
args['store']['args']['doc'] = context
|
||||
logger = factory.instance(**args['store'])
|
||||
args['logger'] = logger
|
||||
info = {"rows":args['real'].shape[0],"cols":args['real'].shape[1],"name":col}
|
||||
info = {"rows":args['real'].shape[0],"cols":args['real'].shape[1],"name":col,"partition":args['partition']}
|
||||
logger.write({"module":"gan-train","action":"data-prep","input":info})
|
||||
|
||||
else:
|
||||
|
|
|
@ -89,7 +89,8 @@ class Components :
|
|||
_args['gpu'] = 0
|
||||
_args['num_gpu'] = 1
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = str(args['gpu'])
|
||||
|
||||
_args['partition'] = int(partition)
|
||||
_args['continuous']= args['continuous'] if 'continuous' in args else []
|
||||
_args['store'] = {'type':'mongo.MongoWriter','args':{'dbname':'aou','doc':args['context']}}
|
||||
_args['data'] = args['data']
|
||||
|
||||
|
@ -144,7 +145,8 @@ class Components :
|
|||
# df = pd.DataFrame(df[ int (partition) ],columns = columns)
|
||||
info = {"parition":int(partition),"gpu":_args["gpu"],"rows":df.shape[0],"cols":df.shape[1],"part_size":PART_SIZE}
|
||||
logger.write({"module":"generate","action":"partition","input":info})
|
||||
|
||||
_args['partition'] = int(partition)
|
||||
_args['continuous']= args['continuous'] if 'continuous' in args else []
|
||||
_args['data'] = df
|
||||
# _args['data'] = reader()
|
||||
#_args['data'] = _args['data'].astype(object)
|
||||
|
@ -194,7 +196,7 @@ class Components :
|
|||
data_comp.to_gbq(if_exists='append',destination_table=partial,credentials=credentials,chunksize=50000)
|
||||
data_comp.to_csv(_pname,index=False)
|
||||
INSERT_FLAG = 'replace' if 'partition' not in args or 'segment' not in args else 'append'
|
||||
_args['data'].to_gbq(if_exists=INSERT_FLAG,destination_table=complete,credentials=credentials,chunksize=50000)
|
||||
_args['data'].to_gbq(if_exists='append',destination_table=complete,credentials=credentials,chunksize=50000)
|
||||
_id = 'dataset'
|
||||
info = {"full":{_id:_fname,"rows":_args['data'].shape[0]},"partial":{"path":_pname,"rows":data_comp.shape[0]} }
|
||||
if partition :
|
||||
|
|
2
setup.py
2
setup.py
|
@ -4,7 +4,7 @@ import sys
|
|||
|
||||
def read(fname):
|
||||
return open(os.path.join(os.path.dirname(__file__), fname)).read()
|
||||
args = {"name":"data-maker","version":"1.2.0","author":"Vanderbilt University Medical Center","author_email":"steve.l.nyemba@vanderbilt.edu","license":"MIT",
|
||||
args = {"name":"data-maker","version":"1.2.1","author":"Vanderbilt University Medical Center","author_email":"steve.l.nyemba@vanderbilt.edu","license":"MIT",
|
||||
"packages":find_packages(),"keywords":["healthcare","data","transport","protocol"]}
|
||||
args["install_requires"] = ['data-transport@git+https://dev.the-phi.com/git/steve/data-transport.git','tensorflow==1.15','pandas','pandas-gbq','pymongo']
|
||||
args['url'] = 'https://hiplab.mc.vanderbilt.edu/git/aou/data-maker.git'
|
||||
|
|
Loading…
Reference in New Issue