feature: bootstrap-like with candidates
This commit is contained in:
parent
12d7573ba8
commit
f26795387e
26
data/gan.py
26
data/gan.py
|
@ -67,8 +67,9 @@ class GNet :
|
||||||
self.NUM_GPUS = 0
|
self.NUM_GPUS = 0
|
||||||
else:
|
else:
|
||||||
self.NUM_GPUS = len(self.GPU_CHIPS)
|
self.NUM_GPUS = len(self.GPU_CHIPS)
|
||||||
|
# os.environ['CUDA_VISIBLE_DEVICES'] = str(self.GPU_CHIPS[0])
|
||||||
|
|
||||||
self.PARTITION = args['partition']
|
self.PARTITION = args['partition'] if 'partition' in args else None
|
||||||
# if self.NUM_GPUS > 1 :
|
# if self.NUM_GPUS > 1 :
|
||||||
# os.environ['CUDA_VISIBLE_DEVICES'] = "4"
|
# os.environ['CUDA_VISIBLE_DEVICES'] = "4"
|
||||||
|
|
||||||
|
@ -117,9 +118,14 @@ class GNet :
|
||||||
for key in ['train','output'] :
|
for key in ['train','output'] :
|
||||||
self.mkdir(os.sep.join([self.log_dir,key]))
|
self.mkdir(os.sep.join([self.log_dir,key]))
|
||||||
self.mkdir (os.sep.join([self.log_dir,key,self.CONTEXT]))
|
self.mkdir (os.sep.join([self.log_dir,key,self.CONTEXT]))
|
||||||
|
if 'partition' in args :
|
||||||
|
self.mkdir (os.sep.join([self.log_dir,key,self.CONTEXT,str(args['partition'])]))
|
||||||
|
|
||||||
self.train_dir = os.sep.join([self.log_dir,'train',self.CONTEXT])
|
self.train_dir = os.sep.join([self.log_dir,'train',self.CONTEXT])
|
||||||
self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT])
|
self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT])
|
||||||
|
if 'partition' in args :
|
||||||
|
self.train_dir = os.sep.join([self.train_dir,str(args['partition'])])
|
||||||
|
self.out_dir = os.sep.join([self.out_dir,str(args['partition'])])
|
||||||
# if self.logger :
|
# if self.logger :
|
||||||
|
|
||||||
# We will clear the logs from the data-store
|
# We will clear the logs from the data-store
|
||||||
|
@ -130,7 +136,7 @@ class GNet :
|
||||||
# db.backup.insert({'name':column,'logs':list(db[column].find()) })
|
# db.backup.insert({'name':column,'logs':list(db[column].find()) })
|
||||||
# db[column].drop()
|
# db[column].drop()
|
||||||
|
|
||||||
def load_meta(self,column):
|
def load_meta(self,**args):
|
||||||
"""
|
"""
|
||||||
This function is designed to accomodate the uses of the sub-classes outside of a strict dependency model.
|
This function is designed to accomodate the uses of the sub-classes outside of a strict dependency model.
|
||||||
Because prediction and training can happen independently
|
Because prediction and training can happen independently
|
||||||
|
@ -145,6 +151,9 @@ class GNet :
|
||||||
setattr(self,key,value)
|
setattr(self,key,value)
|
||||||
self.train_dir = os.sep.join([self.log_dir,'train',self.CONTEXT])
|
self.train_dir = os.sep.join([self.log_dir,'train',self.CONTEXT])
|
||||||
self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT])
|
self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT])
|
||||||
|
if 'partition' in args :
|
||||||
|
self.train_dir = os.sep.join([self.train_dir,str(args['partition'])])
|
||||||
|
self.out_dir = os.sep.join([self.out_dir,str(args['partition'])])
|
||||||
|
|
||||||
|
|
||||||
def log_meta(self,**args) :
|
def log_meta(self,**args) :
|
||||||
|
@ -265,9 +274,9 @@ class Generator (GNet):
|
||||||
#tf.add_to_collection('glosses', loss)
|
#tf.add_to_collection('glosses', loss)
|
||||||
tf.compat.v1.add_to_collection('glosses', loss)
|
tf.compat.v1.add_to_collection('glosses', loss)
|
||||||
return loss, loss
|
return loss, loss
|
||||||
def load_meta(self, column):
|
def load_meta(self, **args):
|
||||||
super().load_meta(column)
|
super().load_meta(**args)
|
||||||
self.discriminator.load_meta(column)
|
self.discriminator.load_meta(**args)
|
||||||
def network(self,**args) :
|
def network(self,**args) :
|
||||||
"""
|
"""
|
||||||
This function will build the network that will generate the synthetic candidates
|
This function will build the network that will generate the synthetic candidates
|
||||||
|
@ -454,6 +463,7 @@ class Train (GNet):
|
||||||
# - determine if the GPU/CPU are busy
|
# - determine if the GPU/CPU are busy
|
||||||
#
|
#
|
||||||
for i in self.GPU_CHIPS : #range(self.NUM_GPUS):
|
for i in self.GPU_CHIPS : #range(self.NUM_GPUS):
|
||||||
|
|
||||||
with tf.device('/gpu:%d' % i):
|
with tf.device('/gpu:%d' % i):
|
||||||
with tf.name_scope('%s_%d' % ('TOWER', i)) as scope:
|
with tf.name_scope('%s_%d' % ('TOWER', i)) as scope:
|
||||||
if self._LABEL is not None :
|
if self._LABEL is not None :
|
||||||
|
@ -559,9 +569,9 @@ class Predict(GNet):
|
||||||
|
|
||||||
# self.MISSING_VALUES = args['no_value']
|
# self.MISSING_VALUES = args['no_value']
|
||||||
# self.MISSING_VALUES = int(args['no_value']) if args['no_value'].isnumeric() else np.na if args['no_value'] in ['na','NA','N/A'] else args['no_value']
|
# self.MISSING_VALUES = int(args['no_value']) if args['no_value'].isnumeric() else np.na if args['no_value'] in ['na','NA','N/A'] else args['no_value']
|
||||||
def load_meta(self, column):
|
def load_meta(self, **args):
|
||||||
super().load_meta(column)
|
super().load_meta(**args)
|
||||||
self.generator.load_meta(column)
|
self.generator.load_meta(**args)
|
||||||
self.ROW_COUNT = self.oROW_COUNT
|
self.ROW_COUNT = self.oROW_COUNT
|
||||||
def apply(self,**args):
|
def apply(self,**args):
|
||||||
suffix = self.CONTEXT #self.get.suffix()
|
suffix = self.CONTEXT #self.get.suffix()
|
||||||
|
|
|
@ -112,7 +112,8 @@ def train (**_args):
|
||||||
args ['max_epochs'] = _args['max_epochs']
|
args ['max_epochs'] = _args['max_epochs']
|
||||||
args['matrix_size'] = _matrix.shape[0]
|
args['matrix_size'] = _matrix.shape[0]
|
||||||
args['batch_size'] = 2000
|
args['batch_size'] = 2000
|
||||||
args['partition'] = 0 if 'partition' not in _args else _args['partition']
|
if 'partition' in _args :
|
||||||
|
args['partition'] = _args['partition']
|
||||||
if 'gpu' in _args :
|
if 'gpu' in _args :
|
||||||
args['gpu'] = _args['gpu']
|
args['gpu'] = _args['gpu']
|
||||||
# os.environ['CUDA_VISIBLE_DEVICES'] = str(args['gpu']) if 'gpu' in args else '0'
|
# os.environ['CUDA_VISIBLE_DEVICES'] = str(args['gpu']) if 'gpu' in args else '0'
|
||||||
|
@ -121,7 +122,8 @@ def train (**_args):
|
||||||
#
|
#
|
||||||
# @TODO: Write the map.json in the output directory for the logs
|
# @TODO: Write the map.json in the output directory for the logs
|
||||||
#
|
#
|
||||||
f = open(os.sep.join([_args['logs'],'output',_args['context'],'map.json']),'w')
|
# f = open(os.sep.join([_args['logs'],'output',_args['context'],'map.json']),'w')
|
||||||
|
f = open(os.sep.join([trainer.out_dir,'map.json']),'w')
|
||||||
f.write(json.dumps(_map))
|
f.write(json.dumps(_map))
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
|
@ -140,7 +142,11 @@ def generate(**_args):
|
||||||
:param context
|
:param context
|
||||||
:param logs
|
:param logs
|
||||||
"""
|
"""
|
||||||
f = open(os.sep.join([_args['logs'],'output',_args['context'],'map.json']))
|
partition = _args['partition'] if 'partition' in _args else None
|
||||||
|
if not partition :
|
||||||
|
f = open(os.sep.join([_args['logs'],'output',_args['context'],'map.json']))
|
||||||
|
else:
|
||||||
|
f = open(os.sep.join([_args['logs'],'output',_args['context'],str(partition),'map.json']))
|
||||||
_map = json.loads(f.read())
|
_map = json.loads(f.read())
|
||||||
f.close()
|
f.close()
|
||||||
# if 'file' in _args :
|
# if 'file' in _args :
|
||||||
|
@ -165,7 +171,7 @@ def generate(**_args):
|
||||||
args['gpu'] = _args['gpu']
|
args['gpu'] = _args['gpu']
|
||||||
|
|
||||||
handler = gan.Predict (**args)
|
handler = gan.Predict (**args)
|
||||||
handler.load_meta(None)
|
handler.load_meta(column=None)
|
||||||
#
|
#
|
||||||
# Let us now format the matrices by reverting them to a data-frame with values
|
# Let us now format the matrices by reverting them to a data-frame with values
|
||||||
#
|
#
|
||||||
|
|
|
@ -237,7 +237,7 @@ class Input :
|
||||||
#
|
#
|
||||||
# @NOTE: For some reason, there is an out of memory error created here, this seems to fix it (go figure)
|
# @NOTE: For some reason, there is an out of memory error created here, this seems to fix it (go figure)
|
||||||
#
|
#
|
||||||
_matrix = np.array([np.repeat(0,cols.size) for i in range(row_count)])
|
_matrix = np.array([np.repeat(0,cols.size) for i in range(0,row_count)])
|
||||||
[np.put(_matrix[i], np.where(cols == rows[i]) ,1)for i in np.arange(row_count) if np.where(cols == rows[i])[0].size > 0]
|
[np.put(_matrix[i], np.where(cols == rows[i]) ,1)for i in np.arange(row_count) if np.where(cols == rows[i])[0].size > 0]
|
||||||
# else:
|
# else:
|
||||||
# _matrix = cp.zeros([row_count,cols.size])
|
# _matrix = cp.zeros([row_count,cols.size])
|
||||||
|
|
71
pipeline.py
71
pipeline.py
|
@ -146,6 +146,8 @@ class Components :
|
||||||
_args['data'] = _args['data'][list(set(_args['data'].columns) - set(x_cols))]
|
_args['data'] = _args['data'][list(set(_args['data'].columns) - set(x_cols))]
|
||||||
if 'gpu' in args :
|
if 'gpu' in args :
|
||||||
_args['gpu'] = self.set_gpu(gpu=args['gpu'])
|
_args['gpu'] = self.set_gpu(gpu=args['gpu'])
|
||||||
|
if 'partition' in args :
|
||||||
|
_args['partition'] = args['partition']
|
||||||
if df.shape[0] and df.shape[0] :
|
if df.shape[0] and df.shape[0] :
|
||||||
#
|
#
|
||||||
# We have a full blown matrix to be processed
|
# We have a full blown matrix to be processed
|
||||||
|
@ -154,7 +156,7 @@ class Components :
|
||||||
print ("... skipping training !!")
|
print ("... skipping training !!")
|
||||||
|
|
||||||
if 'autopilot' in ( list(args.keys())) :
|
if 'autopilot' in ( list(args.keys())) :
|
||||||
|
|
||||||
args['data'] = df
|
args['data'] = df
|
||||||
print (['autopilot mode enabled ....',args['context']])
|
print (['autopilot mode enabled ....',args['context']])
|
||||||
self.generate(args)
|
self.generate(args)
|
||||||
|
@ -171,6 +173,7 @@ class Components :
|
||||||
r = np.random.dirichlet(values+.001) #-- dirichlet doesn't work on values with zeros
|
r = np.random.dirichlet(values+.001) #-- dirichlet doesn't work on values with zeros
|
||||||
_sd = values[values > 0].std()
|
_sd = values[values > 0].std()
|
||||||
_me = values[values > 0].mean()
|
_me = values[values > 0].mean()
|
||||||
|
_mi = values.min()
|
||||||
x = []
|
x = []
|
||||||
_type = values.dtype
|
_type = values.dtype
|
||||||
for index in np.arange(values.size) :
|
for index in np.arange(values.size) :
|
||||||
|
@ -182,7 +185,7 @@ class Components :
|
||||||
value = values[index] - (values[index] * r[index])
|
value = values[index] - (values[index] * r[index])
|
||||||
#
|
#
|
||||||
# randomly shifting the measurements
|
# randomly shifting the measurements
|
||||||
if np.random.choice([0,1],1)[0] and _me > _sd:
|
if np.random.choice([0,1],1)[0] and _me > _sd :
|
||||||
if np.random.choice([0,1],1)[0] :
|
if np.random.choice([0,1],1)[0] :
|
||||||
value = value * np.divide(_me,_sd)
|
value = value * np.divide(_me,_sd)
|
||||||
else:
|
else:
|
||||||
|
@ -273,6 +276,9 @@ class Components :
|
||||||
args['candidates'] = 1 if 'candidates' not in args else int(args['candidates'])
|
args['candidates'] = 1 if 'candidates' not in args else int(args['candidates'])
|
||||||
if 'gpu' in args :
|
if 'gpu' in args :
|
||||||
args['gpu'] = self.set_gpu(gpu=args['gpu'])
|
args['gpu'] = self.set_gpu(gpu=args['gpu'])
|
||||||
|
# if 'partition' in args :
|
||||||
|
# args['logs'] = os.sep.join([args['logs'],str(args['partition'])])
|
||||||
|
|
||||||
_info = {"module":"gan-prep","action":"prune","shape":{"rows":args['data'].shape[0],"columns":args['data'].shape[1]}}
|
_info = {"module":"gan-prep","action":"prune","shape":{"rows":args['data'].shape[0],"columns":args['data'].shape[1]}}
|
||||||
logger.write(_info)
|
logger.write(_info)
|
||||||
if args['data'].shape[0] > 0 and args['data'].shape[1] > 0 :
|
if args['data'].shape[0] > 0 and args['data'].shape[1] > 0 :
|
||||||
|
@ -459,12 +465,18 @@ if __name__ == '__main__' :
|
||||||
# COLUMNS = DATA.columns
|
# COLUMNS = DATA.columns
|
||||||
# DATA = np.array_split(DATA,PART_SIZE)
|
# DATA = np.array_split(DATA,PART_SIZE)
|
||||||
# args['schema'] = schema
|
# args['schema'] = schema
|
||||||
|
GPU_CHIPS = SYS_ARGS['gpu'] if 'gpu' in SYS_ARGS else None
|
||||||
|
if GPU_CHIPS and type(GPU_CHIPS) != list :
|
||||||
|
GPU_CHIPS = [int(_id.strip()) for _id in GPU_CHIPS.split(',')] if type(GPU_CHIPS) == str else [GPU_CHIPS]
|
||||||
|
if 'gpu' in SYS_ARGS :
|
||||||
|
args['gpu'] = GPU_CHIPS
|
||||||
|
jobs = []
|
||||||
if 'generate' in SYS_ARGS :
|
if 'generate' in SYS_ARGS :
|
||||||
#
|
#
|
||||||
# Let us see if we have partitions given the log folder
|
# Let us see if we have partitions given the log folder
|
||||||
|
|
||||||
content = os.listdir( os.sep.join([args['logs'],'train',args['context']]))
|
content = os.listdir( os.sep.join([args['logs'],'train',args['context']]))
|
||||||
generator = Components()
|
|
||||||
|
|
||||||
# if ''.join(content).isnumeric() :
|
# if ''.join(content).isnumeric() :
|
||||||
# #
|
# #
|
||||||
|
@ -508,13 +520,60 @@ if __name__ == '__main__' :
|
||||||
# else:
|
# else:
|
||||||
# generator.generate(args)
|
# generator.generate(args)
|
||||||
# Components.generate(args)
|
# Components.generate(args)
|
||||||
generator.generate(args)
|
if '--all-chips' in SYS_ARGS and GPU_CHIPS:
|
||||||
|
index = 0
|
||||||
|
jobs = []
|
||||||
|
for _id in GPU_CHIPS :
|
||||||
|
_args = copy.deepcopy(args)
|
||||||
|
_args['gpu'] = [int(_gpu)]
|
||||||
|
_args['partition'] = index
|
||||||
|
index += 1
|
||||||
|
make = lambda _params: (Components()).generate(_params)
|
||||||
|
job = Process(target=make,args=( dict(_args),))
|
||||||
|
job.name = 'Trainer # ' + str(index)
|
||||||
|
job.start()
|
||||||
|
jobs.append(job)
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
generator = Components()
|
||||||
|
generator.generate(args)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
# DATA = np.array_split(DATA,PART_SIZE)
|
# DATA = np.array_split(DATA,PART_SIZE)
|
||||||
agent = Components()
|
#
|
||||||
agent.train(**args)
|
# Let us create n-jobs across n-gpus, The assumption here is the data that is produced will be a partition
|
||||||
|
# @TODO: Find better name for partition
|
||||||
|
#
|
||||||
|
if GPU_CHIPS and '--all-chips' in SYS_ARGS:
|
||||||
|
index = 0
|
||||||
|
|
||||||
|
for _gpu in GPU_CHIPS :
|
||||||
|
_args = copy.deepcopy(args)
|
||||||
|
_args['gpu'] = [int(_gpu)]
|
||||||
|
_args['partition'] = index
|
||||||
|
index += 1
|
||||||
|
make = lambda _params: (Components()).train(**_params)
|
||||||
|
job = Process(target=make,args=( dict(_args),))
|
||||||
|
job.name = 'Trainer # ' + str(index)
|
||||||
|
job.start()
|
||||||
|
jobs.append(job)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
else:
|
||||||
|
#
|
||||||
|
# The choice of the chip will be made internally
|
||||||
|
agent = Components()
|
||||||
|
agent.train(**args)
|
||||||
|
#
|
||||||
|
# If we have any obs we should wait till they finish
|
||||||
|
#
|
||||||
|
while len(jobs)> 0 :
|
||||||
|
jobs = [job for job in jobs if job.is_alive()]
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
# jobs = []
|
# jobs = []
|
||||||
# for index in range(0,PART_SIZE) :
|
# for index in range(0,PART_SIZE) :
|
||||||
# if 'focus' in args and int(args['focus']) != index :
|
# if 'focus' in args and int(args['focus']) != index :
|
||||||
|
|
Loading…
Reference in New Issue