gpu indexing

2021-04-01 13:09:06 -05:00 · 2021-04-01 13:09:06 -05:00 · 5a16e325ac
parent 732ccb42e5
commit 5a16e325ac
2 changed files with 11 additions and 79 deletions
--- a/data/maker/init.py
+++ b/data/maker/init.py
@ -81,7 +81,6 @@ class ContinuousToDiscrete :
        return values
            

-
 def train (**_args):
    """
    :params sql
@ -126,7 +125,7 @@ def train (**_args):
    args['matrix_size'] = _matrix.shape[0]
    args['batch_size'] = 2000
    args['partition'] = 0 if 'partition' not in _args else _args['partition']
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args['gpu']) if 'gpu' in args else '0'
+    # os.environ['CUDA_VISIBLE_DEVICES'] = str(args['gpu']) if 'gpu' in args else '0'
    
    trainer = gan.Train(**args)   
    #
@ -215,8 +214,7 @@ def generate(**_args):
    _inputhandler = prepare.Input(**_args)
    values,_matrix = _inputhandler.convert()    
    args['values'] = np.array(values)
-    if 'gpu' in _args :
-        os.environ['CUDA_VISIBLE_DEVICES'] = str(_args['gpu'])
+       
    handler     = gan.Predict (**args)
    handler.load_meta(None)
    #
@ -226,76 +224,3 @@ def generate(**_args):
    candidates = handler.apply(candidates=args['candidates'])       
    return [_inputhandler.revert(matrix=_matrix) for _matrix in candidates]
    
-
-
-def _generate(**args):
-    """
-    This function will generate a synthetic dataset on the basis of a model that has been learnt for the dataset
-    @return pandas.DataFrame
-
-    :data   data-frame to be synthesized
-    :column   columns that need to be synthesized (discrete)
-    :id     column identifying an entity
-    :logs   location on disk where the learnt knowledge of the dataset is
-    """
-    # df      = args['data']
-    df      = args['data'] if not isinstance(args['data'],str) else pd.read_csv(args['data'])
-    
-    CONTINUOUS = args['continuous'] if 'continuous' in args else []
-    column      = args['column'] if (isinstance(args['column'],list)) else [args['column']]
-    # column_id   = args['id']
-    #
-    #@TODO:
-    #   If the identifier is not present, we should fine a way to determine or make one
-    #
-    BIN_SIZE    = 4 if 'bin_size' not in args else int(args['bin_size'])
-    # NO_VALUE = dict(args['no_value']) if type(args['no_value']) == dict else args['no_value']
-    bhandler = Binary()    
-    _df     = df.copy()
-    for col in column :
-        args['context'] = col
-        args['column']  = col
-        
-        msize = args['matrix_size'] if 'matrix_size' in args else -1        
-        values = bhandler.get_column(df[col],msize)
-        MISSING= bhandler.get_missing(df[col],msize)
-        
-        
-        
-        args['values']      = values    
-        args['row_count']   = df.shape[0]
-        # if col in NO_VALUE :
-        #     args['no_value'] = NO_VALUE[col] 
-        # else:
-        #     args['no_value'] = NO_VALUE
-        # novalue = NO_VALUE[col] if NO_VALUE[col] in ['na',''] else NO_VALUE[col]
-        # MISSING += [NO_VALUE[col]]
-        args['missing'] = MISSING
-        #
-        # we can determine the cardinalities here so we know what to allow or disallow
-        handler     = gan.Predict (**args)
-        handler.load_meta(col)
-        r           =  handler.apply()                
-        if col in CONTINUOUS :
-            r[col] = np.array(r[col])            
-            _approx = ContinuousToDiscrete.continuous(r[col],BIN_SIZE)  #-- approximating based on arbitrary bins                                
-            r[col] = _approx
-            
-            
-            
-        _df[col]    = r[col]
-        #
-        # Let's cast the type to the original type (it makes the data more usable)
-        #
-        # print (values)
-        # print ([col,df[col].dtype,_df[col].tolist()])
-        otype       = df[col].dtype
-        _df[col]    = _df[col].astype(otype)
-        
-        #
-        # @TODO: log basic stats about the synthetic attribute
-        #
-        # print (r)s
-        # break
-        
-    return _df
--- a/pipeline.py
+++ b/pipeline.py
@ -81,7 +81,12 @@ class Components :
 			terms = _args['columns']
 			return [name for name in _df.columns if  np.sum( [int(field in  name )for field in terms ]) > 0 ]
 		return []
-
+	def set_gpu(self,**_args) :
+		if 'gpu' in _args :
+			gpu = _args['gpu'] if type(_args['gpu']) != str else [_args['gpu']]
+			_index = str(gpu[0])
+			os.environ['CUDA_VISIBLE_DEVICES'] = _index
+			return gpu
 	def train(self,**args):
 		"""
 		This function will perform training on the basis of a given pointer that reads data
@ -137,7 +142,7 @@ class Components :
 		if x_cols :
 			_args['data'] = df[list(set(df.columns) - set(x_cols))]
 		if 'gpu' in args :
-			_args['gpu'] = args['gpu']
+			_args['gpu'] = self.set_gpu(gpu=args['gpu'])
 		data.maker.train(**_args)
 		
 		if 'autopilot' in ( list(args.keys())) :
@ -228,6 +233,8 @@ class Components :
 			args['data'] = df[list(set(df.columns) - set(x_cols))]
 		
 		args['candidates']	= 1 if 'candidates' not in args else int(args['candidates'])
+		if 'gpu' in args :
+			args['gpu'] = self.set_gpu(gpu=args['gpu'])
 		
 		candidates = (data.maker.generate(**args))
 		if 'sql.BQWriter' in ostore['type'] :