bug fix: matrix space restriction
This commit is contained in:
parent
1cf9c6e47a
commit
8f390931f3
|
@ -173,7 +173,7 @@ class Binary :
|
|||
# N =
|
||||
i = np.random.choice(col_count,size)
|
||||
values = values[-i]
|
||||
col_count = N
|
||||
col_count = size
|
||||
|
||||
|
||||
|
||||
|
@ -209,7 +209,7 @@ class Binary :
|
|||
# N =
|
||||
i = np.random.choice(col_count,size)
|
||||
values = values[-i]
|
||||
col_count = N
|
||||
col_count = size
|
||||
return values
|
||||
|
||||
def _Export(self,df) :
|
||||
|
@ -271,7 +271,7 @@ if __name__ == '__main__' :
|
|||
"""
|
||||
df = pd.read_csv('sample.csv')
|
||||
print ( pd.get_dummies(df.race))
|
||||
print ( (Binary()).apply(df.race, 30))
|
||||
print ( (Binary()).apply(df.race, 2))
|
||||
|
||||
# has_basic = 'dataset' in SYS_ARGS.keys() and 'table' in SYS_ARGS.keys() and 'key' in SYS_ARGS.keys()
|
||||
# has_action= 'export' in SYS_ARGS.keys() or 'pseudo' in SYS_ARGS.keys()
|
||||
|
|
|
@ -136,7 +136,7 @@ def train (**args) :
|
|||
# print (df[col].dtypes)
|
||||
# print (df[col].dropna/(axis=1).unique())
|
||||
# args['real'] = pd.get_dummies(df[col].dropna()).astype(np.float32).values
|
||||
msize = args['matrix_size'] if 'matrix_size' in args else -1
|
||||
msize = args['matrix_size'] if 'matrix_size' in args else 128
|
||||
args['real'] = (Binary()).apply(df[col],msize)
|
||||
|
||||
|
||||
|
@ -210,7 +210,7 @@ def generate(**args):
|
|||
|
||||
# else:
|
||||
# values = df[col].dropna().unique().tolist()
|
||||
msize = args['matrix_size'] if 'matrix_size' in args else -1
|
||||
msize = args['matrix_size'] if 'matrix_size' in args else 128
|
||||
values = bhandler.get_column_values(df[col])
|
||||
|
||||
|
||||
|
|
24
pipeline.py
24
pipeline.py
|
@ -73,21 +73,7 @@ class Components :
|
|||
# @TODO: we need to log something here about the parameters being passed
|
||||
# pointer = args['reader'] if 'reader' in args else lambda: Components.get(**args)
|
||||
df = args['data']
|
||||
|
||||
if 'slice' in args and 'max_rows' in args['slice']:
|
||||
max_rows = args['slice']['max_rows']
|
||||
if df.shape[0] > max_rows :
|
||||
print (".. slicing ")
|
||||
i = np.random.choice(df.shape[0],max_rows,replace=False)
|
||||
df = df.iloc[i]
|
||||
|
||||
|
||||
#
|
||||
# Certain columns need to be removed too large of a matrix
|
||||
#
|
||||
# if df.shape[0] == 0 :
|
||||
# print ("CAN NOT TRAIN EMPTY DATASET ")
|
||||
# return
|
||||
|
||||
#
|
||||
# Now we can parse the arguments and submit the entire thing to training
|
||||
#
|
||||
|
@ -102,8 +88,8 @@ class Components :
|
|||
_args['max_epochs'] = 150 if 'max_epochs' not in args else int(args['max_epochs'])
|
||||
if 'batch_size' in args :
|
||||
_args['batch_size'] = int(args['batch_size'])
|
||||
|
||||
#
|
||||
|
||||
_args['matrix_size'] = args['matrix_size'] if 'matrix_size' in args else 128 #
|
||||
# We ask the process to assume 1 gpu given the system number of GPU and that these tasks can run in parallel
|
||||
#
|
||||
if int(args['num_gpu']) > 1 :
|
||||
|
@ -157,6 +143,8 @@ class Components :
|
|||
_args['num_gpu'] = 1
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = str(args['gpu'])
|
||||
_args['no_value']= args['no_value']
|
||||
_args['matrix_size'] = args['matrix_size'] if 'matrix_size' in args else 128
|
||||
|
||||
|
||||
# MAX_ROWS = args['max_rows'] if 'max_rows' in args else 0
|
||||
PART_SIZE = int(args['part_size']) if 'part_size' in args else 8
|
||||
|
@ -298,6 +286,8 @@ if __name__ == '__main__' :
|
|||
args[key] = _config[key]
|
||||
|
||||
args = dict(args,**SYS_ARGS)
|
||||
if 'matrix_size' in args :
|
||||
args['matrix_size'] = int(args['matrix_size'])
|
||||
if 'batch_size' not in args :
|
||||
args['batch_size'] = 2000 #if 'batch_size' not in args else int(args['batch_size'])
|
||||
if 'dataset' not in args :
|
||||
|
|
Loading…
Reference in New Issue