bug fix, with logs and partitioning

This commit is contained in:
Steve Nyemba 2020-03-08 19:33:08 -05:00
parent 266bdc8bd2
commit e07c355388
4 changed files with 15 additions and 10 deletions

View File

@ -59,6 +59,7 @@ class GNet :
self.logs = {} self.logs = {}
self.NUM_GPUS = 1 if 'num_gpu' not in args else args['num_gpu'] self.NUM_GPUS = 1 if 'num_gpu' not in args else args['num_gpu']
self.PARTITION = args['partition']
# if self.NUM_GPUS > 1 : # if self.NUM_GPUS > 1 :
# os.environ['CUDA_VISIBLE_DEVICES'] = "4" # os.environ['CUDA_VISIBLE_DEVICES'] = "4"
@ -356,7 +357,7 @@ class Train (GNet):
self.meta = self.log_meta() self.meta = self.log_meta()
if(self.logger): if(self.logger):
self.logger.write({"module":"gan-train","action":"start","input":self.meta} ) self.logger.write({"module":"gan-train","action":"start","input":{"partition":self.PARTITION,"meta":self.meta} } )
# self.log (real_shape=list(self._REAL.shape),label_shape = self._LABEL.shape,meta_data=self.meta) # self.log (real_shape=list(self._REAL.shape),label_shape = self._LABEL.shape,meta_data=self.meta)
def load_meta(self, column): def load_meta(self, column):
@ -408,7 +409,7 @@ class Train (GNet):
# losses = tf.compat.v1.get_collection(flag, scope) # losses = tf.compat.v1.get_collection(flag, scope)
total_loss = tf.add_n(losses, name='total_loss') total_loss = tf.add_n(losses, name='total_loss')
print (total_loss)
return total_loss, w return total_loss, w
def input_fn(self): def input_fn(self):
""" """
@ -514,7 +515,7 @@ class Train (GNet):
# #
# #
if self.logger : if self.logger :
row = {"module":"gan-train","action":"logs","input":logs} #,"model":pickle.dump(sess)} row = {"module":"gan-train","action":"logs","input":{"partition":self.PARTITION,"logs":logs}} #,"model":pickle.dump(sess)}
self.logger.write(row) self.logger.write(row)
# #
# @TODO: # @TODO:
@ -623,6 +624,7 @@ class Predict(GNet):
# r = np.zeros((self.ROW_COUNT,len(columns))) # r = np.zeros((self.ROW_COUNT,len(columns)))
# r = np.zeros(self.ROW_COUNT) # r = np.zeros(self.ROW_COUNT)
if self.logger : if self.logger :
info = {"found":len(found),"rows":df.shape[0],"cols":df.shape[1],"expected":len(self.values)} info = {"found":len(found),"rows":df.shape[0],"cols":df.shape[1],"expected":len(self.values)}
if INDEX > 0 : if INDEX > 0 :
@ -631,6 +633,7 @@ class Predict(GNet):
info['selected'] = -1 info['selected'] = -1
info['ratio'] = __ratio info['ratio'] = __ratio
info['partition'] = self.PARTITION
self.logger.write({"module":"gan-generate","action":"generate","input":info}) self.logger.write({"module":"gan-generate","action":"generate","input":info})
df.columns = self.values df.columns = self.values
if len(found) or df.columns.size == len(self.values): if len(found) or df.columns.size == len(self.values):
@ -658,7 +661,7 @@ class Predict(GNet):
df = df[columns[0]].append(pd.Series(missing)) df = df[columns[0]].append(pd.Series(missing))
if self.logger : if self.logger :
info= {"missing": i.size,"rows":df.shape[0],"cols":1} info= {"missing": i.size,"rows":df.shape[0],"cols":1,'partition':self.PARTITION}
self.logger.write({"module":"gan-generate","action":"compile.io","input":info}) self.logger.write({"module":"gan-generate","action":"compile.io","input":info})

View File

@ -111,7 +111,7 @@ def train (**args) :
BIN_SIZE = 4 if 'bin_size' not in args else int(args['bin_size']) BIN_SIZE = 4 if 'bin_size' not in args else int(args['bin_size'])
args['real'] = ContinuousToDiscrete.binary(df[col],BIN_SIZE).astype(np.float32) args['real'] = ContinuousToDiscrete.binary(df[col],BIN_SIZE).astype(np.float32)
else: else:
df.to_csv('tmp-'+args['logs'].replace('/','_')+'-'+col+'.csv',index=False) # df.to_csv('tmp-'+args['logs'].replace('/','_')+'-'+col+'.csv',index=False)
# print (df[col].dtypes) # print (df[col].dtypes)
# print (df[col].dropna/(axis=1).unique()) # print (df[col].dropna/(axis=1).unique())
args['real'] = pd.get_dummies(df[col].dropna()).astype(np.float32).values args['real'] = pd.get_dummies(df[col].dropna()).astype(np.float32).values
@ -124,7 +124,7 @@ def train (**args) :
args['store']['args']['doc'] = context args['store']['args']['doc'] = context
logger = factory.instance(**args['store']) logger = factory.instance(**args['store'])
args['logger'] = logger args['logger'] = logger
info = {"rows":args['real'].shape[0],"cols":args['real'].shape[1],"name":col} info = {"rows":args['real'].shape[0],"cols":args['real'].shape[1],"name":col,"partition":args['partition']}
logger.write({"module":"gan-train","action":"data-prep","input":info}) logger.write({"module":"gan-train","action":"data-prep","input":info})
else: else:

View File

@ -89,7 +89,8 @@ class Components :
_args['gpu'] = 0 _args['gpu'] = 0
_args['num_gpu'] = 1 _args['num_gpu'] = 1
os.environ['CUDA_VISIBLE_DEVICES'] = str(args['gpu']) os.environ['CUDA_VISIBLE_DEVICES'] = str(args['gpu'])
_args['partition'] = int(partition)
_args['continuous']= args['continuous'] if 'continuous' in args else []
_args['store'] = {'type':'mongo.MongoWriter','args':{'dbname':'aou','doc':args['context']}} _args['store'] = {'type':'mongo.MongoWriter','args':{'dbname':'aou','doc':args['context']}}
_args['data'] = args['data'] _args['data'] = args['data']
@ -144,7 +145,8 @@ class Components :
# df = pd.DataFrame(df[ int (partition) ],columns = columns) # df = pd.DataFrame(df[ int (partition) ],columns = columns)
info = {"parition":int(partition),"gpu":_args["gpu"],"rows":df.shape[0],"cols":df.shape[1],"part_size":PART_SIZE} info = {"parition":int(partition),"gpu":_args["gpu"],"rows":df.shape[0],"cols":df.shape[1],"part_size":PART_SIZE}
logger.write({"module":"generate","action":"partition","input":info}) logger.write({"module":"generate","action":"partition","input":info})
_args['partition'] = int(partition)
_args['continuous']= args['continuous'] if 'continuous' in args else []
_args['data'] = df _args['data'] = df
# _args['data'] = reader() # _args['data'] = reader()
#_args['data'] = _args['data'].astype(object) #_args['data'] = _args['data'].astype(object)
@ -194,7 +196,7 @@ class Components :
data_comp.to_gbq(if_exists='append',destination_table=partial,credentials=credentials,chunksize=50000) data_comp.to_gbq(if_exists='append',destination_table=partial,credentials=credentials,chunksize=50000)
data_comp.to_csv(_pname,index=False) data_comp.to_csv(_pname,index=False)
INSERT_FLAG = 'replace' if 'partition' not in args or 'segment' not in args else 'append' INSERT_FLAG = 'replace' if 'partition' not in args or 'segment' not in args else 'append'
_args['data'].to_gbq(if_exists=INSERT_FLAG,destination_table=complete,credentials=credentials,chunksize=50000) _args['data'].to_gbq(if_exists='append',destination_table=complete,credentials=credentials,chunksize=50000)
_id = 'dataset' _id = 'dataset'
info = {"full":{_id:_fname,"rows":_args['data'].shape[0]},"partial":{"path":_pname,"rows":data_comp.shape[0]} } info = {"full":{_id:_fname,"rows":_args['data'].shape[0]},"partial":{"path":_pname,"rows":data_comp.shape[0]} }
if partition : if partition :

View File

@ -4,7 +4,7 @@ import sys
def read(fname): def read(fname):
return open(os.path.join(os.path.dirname(__file__), fname)).read() return open(os.path.join(os.path.dirname(__file__), fname)).read()
args = {"name":"data-maker","version":"1.2.0","author":"Vanderbilt University Medical Center","author_email":"steve.l.nyemba@vanderbilt.edu","license":"MIT", args = {"name":"data-maker","version":"1.2.1","author":"Vanderbilt University Medical Center","author_email":"steve.l.nyemba@vanderbilt.edu","license":"MIT",
"packages":find_packages(),"keywords":["healthcare","data","transport","protocol"]} "packages":find_packages(),"keywords":["healthcare","data","transport","protocol"]}
args["install_requires"] = ['data-transport@git+https://dev.the-phi.com/git/steve/data-transport.git','tensorflow==1.15','pandas','pandas-gbq','pymongo'] args["install_requires"] = ['data-transport@git+https://dev.the-phi.com/git/steve/data-transport.git','tensorflow==1.15','pandas','pandas-gbq','pymongo']
args['url'] = 'https://hiplab.mc.vanderbilt.edu/git/aou/data-maker.git' args['url'] = 'https://hiplab.mc.vanderbilt.edu/git/aou/data-maker.git'