feature: bootstrap-like with candidates

2021-04-07 15:30:59 -05:00 · 2021-04-07 15:30:59 -05:00 · f26795387e
parent 12d7573ba8
commit f26795387e
4 changed files with 94 additions and 19 deletions
--- a/data/gan.py
+++ b/data/gan.py
@ -67,8 +67,9 @@ class GNet :
                        self.NUM_GPUS = 0
                else:
                        self.NUM_GPUS = len(self.GPU_CHIPS)
+                        # os.environ['CUDA_VISIBLE_DEVICES'] = str(self.GPU_CHIPS[0])
                
-                self.PARTITION = args['partition']
+                self.PARTITION = args['partition'] if 'partition' in args else None
                # if self.NUM_GPUS > 1 :
                #     os.environ['CUDA_VISIBLE_DEVICES'] = "4"

@ -117,9 +118,14 @@ class GNet :
                for key in ['train','output'] :
                        self.mkdir(os.sep.join([self.log_dir,key]))
                        self.mkdir (os.sep.join([self.log_dir,key,self.CONTEXT]))
+                        if 'partition' in args :
+                                self.mkdir (os.sep.join([self.log_dir,key,self.CONTEXT,str(args['partition'])]))
                        
                self.train_dir  = os.sep.join([self.log_dir,'train',self.CONTEXT])                
                self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT])
+                if 'partition' in args :
+                        self.train_dir = os.sep.join([self.train_dir,str(args['partition'])])
+                        self.out_dir = os.sep.join([self.out_dir,str(args['partition'])])
                # if self.logger :
                        
                #         We will clear the logs from the data-store 
@ -130,7 +136,7 @@ class GNet :
                #                 db.backup.insert({'name':column,'logs':list(db[column].find()) })
                #                 db[column].drop()
                
-        def load_meta(self,column):
+        def load_meta(self,**args):
                """
                This function is designed to accomodate the uses of the sub-classes outside of a strict dependency model.
                Because prediction and training can happen independently
@ -145,6 +151,9 @@ class GNet :
                                setattr(self,key,value)
                self.train_dir  = os.sep.join([self.log_dir,'train',self.CONTEXT])                
                self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT])
+                if 'partition' in args :
+                        self.train_dir = os.sep.join([self.train_dir,str(args['partition'])])
+                        self.out_dir = os.sep.join([self.out_dir,str(args['partition'])])
                                
                        
        def log_meta(self,**args) :
@ -265,9 +274,9 @@ class Generator (GNet):
                #tf.add_to_collection('glosses', loss)
                tf.compat.v1.add_to_collection('glosses', loss)
                return loss, loss                
-        def load_meta(self, column):
-                super().load_meta(column)
-                self.discriminator.load_meta(column)
+        def load_meta(self, **args):
+                super().load_meta(**args)
+                self.discriminator.load_meta(**args)
        def network(self,**args) :
                """
                This function will build the network that will generate the synthetic candidates
@ -454,6 +463,7 @@ class Train (GNet):
                        #       - determine if the GPU/CPU are busy
                        #
                        for i in self.GPU_CHIPS : #range(self.NUM_GPUS):
+                                
                                with tf.device('/gpu:%d' % i):
                                        with tf.name_scope('%s_%d' % ('TOWER', i)) as scope:
                                                if self._LABEL is not None :
@ -559,9 +569,9 @@ class Predict(GNet):
                        
                # self.MISSING_VALUES = args['no_value']
                # self.MISSING_VALUES = int(args['no_value']) if args['no_value'].isnumeric() else  np.na if args['no_value'] in ['na','NA','N/A'] else args['no_value']
-        def load_meta(self, column):
-                super().load_meta(column)
-                self.generator.load_meta(column)
+        def load_meta(self, **args):
+                super().load_meta(**args)
+                self.generator.load_meta(**args)
                self.ROW_COUNT = self.oROW_COUNT
        def apply(self,**args):
                suffix = self.CONTEXT #self.get.suffix()
--- a/data/maker/init.py
+++ b/data/maker/init.py
@ -112,7 +112,8 @@ def train (**_args):
    args ['max_epochs'] = _args['max_epochs']
    args['matrix_size'] = _matrix.shape[0]
    args['batch_size'] = 2000
-    args['partition'] = 0 if 'partition' not in _args else _args['partition']
+    if 'partition' in _args :
+        args['partition'] = _args['partition']
    if 'gpu' in _args :
        args['gpu'] = _args['gpu']
    # os.environ['CUDA_VISIBLE_DEVICES'] = str(args['gpu']) if 'gpu' in args else '0'
@ -121,7 +122,8 @@ def train (**_args):
    #
    # @TODO: Write the map.json in the output directory for the logs
    # 
-    f = open(os.sep.join([_args['logs'],'output',_args['context'],'map.json']),'w')
+    # f = open(os.sep.join([_args['logs'],'output',_args['context'],'map.json']),'w')
+    f = open(os.sep.join([trainer.out_dir,'map.json']),'w')
    f.write(json.dumps(_map))
    f.close()

@ -140,7 +142,11 @@ def generate(**_args):
    :param context
    :param logs
    """
+    partition = _args['partition'] if 'partition' in _args else None
+    if not partition :
        f = open(os.sep.join([_args['logs'],'output',_args['context'],'map.json']))
+    else:
+        f = open(os.sep.join([_args['logs'],'output',_args['context'],str(partition),'map.json']))
    _map = json.loads(f.read())
    f.close()
    # if 'file' in _args :
@ -165,7 +171,7 @@ def generate(**_args):
        args['gpu'] = _args['gpu']
       
    handler     = gan.Predict (**args)
-    handler.load_meta(None)
+    handler.load_meta(column=None)
    #
    # Let us now format the matrices by reverting them to a data-frame with values
    #
--- a/data/maker/prepare/init.py
+++ b/data/maker/prepare/init.py
@ -237,7 +237,7 @@ class Input :
        #
        # @NOTE: For some reason, there is an out of memory error created here, this seems to fix it (go figure)
        #
-        _matrix = np.array([np.repeat(0,cols.size) for i in range(row_count)])
+        _matrix = np.array([np.repeat(0,cols.size) for i in range(0,row_count)])
        [np.put(_matrix[i], np.where(cols ==  rows[i])  ,1)for i in np.arange(row_count) if np.where(cols == rows[i])[0].size > 0]
        # else:
        #     _matrix = cp.zeros([row_count,cols.size])
--- a/pipeline.py
+++ b/pipeline.py
@ -146,6 +146,8 @@ class Components :
 			_args['data'] = _args['data'][list(set(_args['data'].columns) - set(x_cols))]
 		if 'gpu' in args :
 			_args['gpu'] = self.set_gpu(gpu=args['gpu'])
+		if 'partition' in args :
+			_args['partition'] = args['partition']
 		if df.shape[0] and df.shape[0] :
 			#
 			# We have a full blown matrix to be processed 
@ -171,6 +173,7 @@ class Components :
 			r = np.random.dirichlet(values+.001) #-- dirichlet doesn't work on values with zeros
 			_sd = values[values > 0].std()
 			_me = values[values > 0].mean()
+			_mi = values.min()
 			x = []
 			_type = values.dtype
 			for index in np.arange(values.size) :
@ -182,7 +185,7 @@ class Components :
 					value = values[index] - (values[index] * r[index])
 				#
 				# randomly shifting the measurements 
-				if np.random.choice([0,1],1)[0] and _me > _sd:
+				if np.random.choice([0,1],1)[0] and _me > _sd :
 					if np.random.choice([0,1],1)[0] :
 						value = value * np.divide(_me,_sd)
 					else:
@ -273,6 +276,9 @@ class Components :
 		args['candidates']	= 1 if 'candidates' not in args else int(args['candidates'])
 		if 'gpu' in args :
 			args['gpu'] = self.set_gpu(gpu=args['gpu'])
+		# if 'partition' in args :
+		# 	args['logs'] = os.sep.join([args['logs'],str(args['partition'])])
+		
 		_info = {"module":"gan-prep","action":"prune","shape":{"rows":args['data'].shape[0],"columns":args['data'].shape[1]}}
 		logger.write(_info)
 		if args['data'].shape[0] > 0 and args['data'].shape[1] > 0 :
@ -459,12 +465,18 @@ if __name__ == '__main__' :
 	# COLUMNS = DATA.columns
 	# DATA = np.array_split(DATA,PART_SIZE)
 	# args['schema'] = schema
+	GPU_CHIPS = SYS_ARGS['gpu'] if 'gpu' in SYS_ARGS else None
+	if GPU_CHIPS and type(GPU_CHIPS) != list :				
+		GPU_CHIPS = [int(_id.strip()) for _id in GPU_CHIPS.split(',')] if type(GPU_CHIPS) == str else [GPU_CHIPS]
+	if 'gpu' in SYS_ARGS :
+		args['gpu'] = GPU_CHIPS
+	jobs = []
 	if 'generate' in SYS_ARGS :
 		#
 		# Let us see if we have partitions given the log folder
 		
 		content = os.listdir( os.sep.join([args['logs'],'train',args['context']]))
-		generator = Components()
+		
 		
 		# if ''.join(content).isnumeric() :
 		# 	#
@ -508,13 +520,60 @@ if __name__ == '__main__' :
 		# else:
 		# 	generator.generate(args)
 		# Components.generate(args)
+		if '--all-chips' in SYS_ARGS and GPU_CHIPS:
+			index = 0
+			jobs = []
+			for _id in GPU_CHIPS :
+				_args = copy.deepcopy(args)
+				_args['gpu'] = [int(_gpu)]
+				_args['partition'] = index
+				index += 1
+				make = lambda _params: (Components()).generate(_params)
+				job = Process(target=make,args=( dict(_args),))
+				job.name = 'Trainer # ' + str(index)
+				job.start()
+				jobs.append(job)
+			pass
+		else:
+			generator = Components()
 			generator.generate(args)
 	
 	else:
 		
 		# DATA  = np.array_split(DATA,PART_SIZE)
+		#
+		# Let us create n-jobs across n-gpus, The assumption here is the data that is produced will be a partition
+		# @TODO: Find better name for partition
+		#
+		if GPU_CHIPS and '--all-chips' in SYS_ARGS:
+			index = 0
+			
+			for _gpu in GPU_CHIPS :
+				_args = copy.deepcopy(args)
+				_args['gpu'] = [int(_gpu)]
+				_args['partition'] = index
+				index += 1
+				make = lambda _params: (Components()).train(**_params)
+				job = Process(target=make,args=( dict(_args),))
+				job.name = 'Trainer # ' + str(index)
+				job.start()
+				jobs.append(job)
+			
+
+
+
+		else:
+			#
+			# The choice of the chip will be made internally
 			agent = Components()
 			agent.train(**args)
+		#
+		# If we have any obs we should wait till they finish
+		#
+		while len(jobs)> 0 :
+			jobs = [job for job in jobs if job.is_alive()]
+			time.sleep(2)
+
 		# jobs = []
 		# for index in range(0,PART_SIZE) :
 		# 	if 'focus' in args and int(args['focus']) != index :