feature: bootstrap-like with candidates
This commit is contained in:
parent
12d7573ba8
commit
f26795387e
26
data/gan.py
26
data/gan.py
|
@ -67,8 +67,9 @@ class GNet :
|
|||
self.NUM_GPUS = 0
|
||||
else:
|
||||
self.NUM_GPUS = len(self.GPU_CHIPS)
|
||||
# os.environ['CUDA_VISIBLE_DEVICES'] = str(self.GPU_CHIPS[0])
|
||||
|
||||
self.PARTITION = args['partition']
|
||||
self.PARTITION = args['partition'] if 'partition' in args else None
|
||||
# if self.NUM_GPUS > 1 :
|
||||
# os.environ['CUDA_VISIBLE_DEVICES'] = "4"
|
||||
|
||||
|
@ -117,9 +118,14 @@ class GNet :
|
|||
for key in ['train','output'] :
|
||||
self.mkdir(os.sep.join([self.log_dir,key]))
|
||||
self.mkdir (os.sep.join([self.log_dir,key,self.CONTEXT]))
|
||||
if 'partition' in args :
|
||||
self.mkdir (os.sep.join([self.log_dir,key,self.CONTEXT,str(args['partition'])]))
|
||||
|
||||
self.train_dir = os.sep.join([self.log_dir,'train',self.CONTEXT])
|
||||
self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT])
|
||||
if 'partition' in args :
|
||||
self.train_dir = os.sep.join([self.train_dir,str(args['partition'])])
|
||||
self.out_dir = os.sep.join([self.out_dir,str(args['partition'])])
|
||||
# if self.logger :
|
||||
|
||||
# We will clear the logs from the data-store
|
||||
|
@ -130,7 +136,7 @@ class GNet :
|
|||
# db.backup.insert({'name':column,'logs':list(db[column].find()) })
|
||||
# db[column].drop()
|
||||
|
||||
def load_meta(self,column):
|
||||
def load_meta(self,**args):
|
||||
"""
|
||||
This function is designed to accomodate the uses of the sub-classes outside of a strict dependency model.
|
||||
Because prediction and training can happen independently
|
||||
|
@ -145,6 +151,9 @@ class GNet :
|
|||
setattr(self,key,value)
|
||||
self.train_dir = os.sep.join([self.log_dir,'train',self.CONTEXT])
|
||||
self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT])
|
||||
if 'partition' in args :
|
||||
self.train_dir = os.sep.join([self.train_dir,str(args['partition'])])
|
||||
self.out_dir = os.sep.join([self.out_dir,str(args['partition'])])
|
||||
|
||||
|
||||
def log_meta(self,**args) :
|
||||
|
@ -265,9 +274,9 @@ class Generator (GNet):
|
|||
#tf.add_to_collection('glosses', loss)
|
||||
tf.compat.v1.add_to_collection('glosses', loss)
|
||||
return loss, loss
|
||||
def load_meta(self, column):
|
||||
super().load_meta(column)
|
||||
self.discriminator.load_meta(column)
|
||||
def load_meta(self, **args):
|
||||
super().load_meta(**args)
|
||||
self.discriminator.load_meta(**args)
|
||||
def network(self,**args) :
|
||||
"""
|
||||
This function will build the network that will generate the synthetic candidates
|
||||
|
@ -454,6 +463,7 @@ class Train (GNet):
|
|||
# - determine if the GPU/CPU are busy
|
||||
#
|
||||
for i in self.GPU_CHIPS : #range(self.NUM_GPUS):
|
||||
|
||||
with tf.device('/gpu:%d' % i):
|
||||
with tf.name_scope('%s_%d' % ('TOWER', i)) as scope:
|
||||
if self._LABEL is not None :
|
||||
|
@ -559,9 +569,9 @@ class Predict(GNet):
|
|||
|
||||
# self.MISSING_VALUES = args['no_value']
|
||||
# self.MISSING_VALUES = int(args['no_value']) if args['no_value'].isnumeric() else np.na if args['no_value'] in ['na','NA','N/A'] else args['no_value']
|
||||
def load_meta(self, column):
|
||||
super().load_meta(column)
|
||||
self.generator.load_meta(column)
|
||||
def load_meta(self, **args):
|
||||
super().load_meta(**args)
|
||||
self.generator.load_meta(**args)
|
||||
self.ROW_COUNT = self.oROW_COUNT
|
||||
def apply(self,**args):
|
||||
suffix = self.CONTEXT #self.get.suffix()
|
||||
|
|
|
@ -112,7 +112,8 @@ def train (**_args):
|
|||
args ['max_epochs'] = _args['max_epochs']
|
||||
args['matrix_size'] = _matrix.shape[0]
|
||||
args['batch_size'] = 2000
|
||||
args['partition'] = 0 if 'partition' not in _args else _args['partition']
|
||||
if 'partition' in _args :
|
||||
args['partition'] = _args['partition']
|
||||
if 'gpu' in _args :
|
||||
args['gpu'] = _args['gpu']
|
||||
# os.environ['CUDA_VISIBLE_DEVICES'] = str(args['gpu']) if 'gpu' in args else '0'
|
||||
|
@ -121,7 +122,8 @@ def train (**_args):
|
|||
#
|
||||
# @TODO: Write the map.json in the output directory for the logs
|
||||
#
|
||||
f = open(os.sep.join([_args['logs'],'output',_args['context'],'map.json']),'w')
|
||||
# f = open(os.sep.join([_args['logs'],'output',_args['context'],'map.json']),'w')
|
||||
f = open(os.sep.join([trainer.out_dir,'map.json']),'w')
|
||||
f.write(json.dumps(_map))
|
||||
f.close()
|
||||
|
||||
|
@ -140,7 +142,11 @@ def generate(**_args):
|
|||
:param context
|
||||
:param logs
|
||||
"""
|
||||
partition = _args['partition'] if 'partition' in _args else None
|
||||
if not partition :
|
||||
f = open(os.sep.join([_args['logs'],'output',_args['context'],'map.json']))
|
||||
else:
|
||||
f = open(os.sep.join([_args['logs'],'output',_args['context'],str(partition),'map.json']))
|
||||
_map = json.loads(f.read())
|
||||
f.close()
|
||||
# if 'file' in _args :
|
||||
|
@ -165,7 +171,7 @@ def generate(**_args):
|
|||
args['gpu'] = _args['gpu']
|
||||
|
||||
handler = gan.Predict (**args)
|
||||
handler.load_meta(None)
|
||||
handler.load_meta(column=None)
|
||||
#
|
||||
# Let us now format the matrices by reverting them to a data-frame with values
|
||||
#
|
||||
|
|
|
@ -237,7 +237,7 @@ class Input :
|
|||
#
|
||||
# @NOTE: For some reason, there is an out of memory error created here, this seems to fix it (go figure)
|
||||
#
|
||||
_matrix = np.array([np.repeat(0,cols.size) for i in range(row_count)])
|
||||
_matrix = np.array([np.repeat(0,cols.size) for i in range(0,row_count)])
|
||||
[np.put(_matrix[i], np.where(cols == rows[i]) ,1)for i in np.arange(row_count) if np.where(cols == rows[i])[0].size > 0]
|
||||
# else:
|
||||
# _matrix = cp.zeros([row_count,cols.size])
|
||||
|
|
63
pipeline.py
63
pipeline.py
|
@ -146,6 +146,8 @@ class Components :
|
|||
_args['data'] = _args['data'][list(set(_args['data'].columns) - set(x_cols))]
|
||||
if 'gpu' in args :
|
||||
_args['gpu'] = self.set_gpu(gpu=args['gpu'])
|
||||
if 'partition' in args :
|
||||
_args['partition'] = args['partition']
|
||||
if df.shape[0] and df.shape[0] :
|
||||
#
|
||||
# We have a full blown matrix to be processed
|
||||
|
@ -171,6 +173,7 @@ class Components :
|
|||
r = np.random.dirichlet(values+.001) #-- dirichlet doesn't work on values with zeros
|
||||
_sd = values[values > 0].std()
|
||||
_me = values[values > 0].mean()
|
||||
_mi = values.min()
|
||||
x = []
|
||||
_type = values.dtype
|
||||
for index in np.arange(values.size) :
|
||||
|
@ -182,7 +185,7 @@ class Components :
|
|||
value = values[index] - (values[index] * r[index])
|
||||
#
|
||||
# randomly shifting the measurements
|
||||
if np.random.choice([0,1],1)[0] and _me > _sd:
|
||||
if np.random.choice([0,1],1)[0] and _me > _sd :
|
||||
if np.random.choice([0,1],1)[0] :
|
||||
value = value * np.divide(_me,_sd)
|
||||
else:
|
||||
|
@ -273,6 +276,9 @@ class Components :
|
|||
args['candidates'] = 1 if 'candidates' not in args else int(args['candidates'])
|
||||
if 'gpu' in args :
|
||||
args['gpu'] = self.set_gpu(gpu=args['gpu'])
|
||||
# if 'partition' in args :
|
||||
# args['logs'] = os.sep.join([args['logs'],str(args['partition'])])
|
||||
|
||||
_info = {"module":"gan-prep","action":"prune","shape":{"rows":args['data'].shape[0],"columns":args['data'].shape[1]}}
|
||||
logger.write(_info)
|
||||
if args['data'].shape[0] > 0 and args['data'].shape[1] > 0 :
|
||||
|
@ -459,12 +465,18 @@ if __name__ == '__main__' :
|
|||
# COLUMNS = DATA.columns
|
||||
# DATA = np.array_split(DATA,PART_SIZE)
|
||||
# args['schema'] = schema
|
||||
GPU_CHIPS = SYS_ARGS['gpu'] if 'gpu' in SYS_ARGS else None
|
||||
if GPU_CHIPS and type(GPU_CHIPS) != list :
|
||||
GPU_CHIPS = [int(_id.strip()) for _id in GPU_CHIPS.split(',')] if type(GPU_CHIPS) == str else [GPU_CHIPS]
|
||||
if 'gpu' in SYS_ARGS :
|
||||
args['gpu'] = GPU_CHIPS
|
||||
jobs = []
|
||||
if 'generate' in SYS_ARGS :
|
||||
#
|
||||
# Let us see if we have partitions given the log folder
|
||||
|
||||
content = os.listdir( os.sep.join([args['logs'],'train',args['context']]))
|
||||
generator = Components()
|
||||
|
||||
|
||||
# if ''.join(content).isnumeric() :
|
||||
# #
|
||||
|
@ -508,13 +520,60 @@ if __name__ == '__main__' :
|
|||
# else:
|
||||
# generator.generate(args)
|
||||
# Components.generate(args)
|
||||
if '--all-chips' in SYS_ARGS and GPU_CHIPS:
|
||||
index = 0
|
||||
jobs = []
|
||||
for _id in GPU_CHIPS :
|
||||
_args = copy.deepcopy(args)
|
||||
_args['gpu'] = [int(_gpu)]
|
||||
_args['partition'] = index
|
||||
index += 1
|
||||
make = lambda _params: (Components()).generate(_params)
|
||||
job = Process(target=make,args=( dict(_args),))
|
||||
job.name = 'Trainer # ' + str(index)
|
||||
job.start()
|
||||
jobs.append(job)
|
||||
pass
|
||||
else:
|
||||
generator = Components()
|
||||
generator.generate(args)
|
||||
|
||||
else:
|
||||
|
||||
# DATA = np.array_split(DATA,PART_SIZE)
|
||||
#
|
||||
# Let us create n-jobs across n-gpus, The assumption here is the data that is produced will be a partition
|
||||
# @TODO: Find better name for partition
|
||||
#
|
||||
if GPU_CHIPS and '--all-chips' in SYS_ARGS:
|
||||
index = 0
|
||||
|
||||
for _gpu in GPU_CHIPS :
|
||||
_args = copy.deepcopy(args)
|
||||
_args['gpu'] = [int(_gpu)]
|
||||
_args['partition'] = index
|
||||
index += 1
|
||||
make = lambda _params: (Components()).train(**_params)
|
||||
job = Process(target=make,args=( dict(_args),))
|
||||
job.name = 'Trainer # ' + str(index)
|
||||
job.start()
|
||||
jobs.append(job)
|
||||
|
||||
|
||||
|
||||
|
||||
else:
|
||||
#
|
||||
# The choice of the chip will be made internally
|
||||
agent = Components()
|
||||
agent.train(**args)
|
||||
#
|
||||
# If we have any obs we should wait till they finish
|
||||
#
|
||||
while len(jobs)> 0 :
|
||||
jobs = [job for job in jobs if job.is_alive()]
|
||||
time.sleep(2)
|
||||
|
||||
# jobs = []
|
||||
# for index in range(0,PART_SIZE) :
|
||||
# if 'focus' in args and int(args['focus']) != index :
|
||||
|
|
Loading…
Reference in New Issue