From 60cbf2dd3fd32ae8f5712d22dcceb367945a24a1 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Thu, 12 Mar 2020 09:55:29 -0500 Subject: [PATCH] bug fix: continuous values --- data/maker/__init__.py | 1 + pipeline.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/data/maker/__init__.py b/data/maker/__init__.py index 97cc3dd..2b51670 100644 --- a/data/maker/__init__.py +++ b/data/maker/__init__.py @@ -112,6 +112,7 @@ def train (**args) : if col in CONTINUOUS: BIN_SIZE = 4 if 'bin_size' not in args else int(args['bin_size']) args['real'] = ContinuousToDiscrete.binary(df[col],BIN_SIZE).astype(np.float32) + # print ( pd.DataFrame(args['real']).head() ) else: # df.to_csv('tmp-'+args['logs'].replace('/','_')+'-'+col+'.csv',index=False) # print (df[col].dtypes) diff --git a/pipeline.py b/pipeline.py index 7a2cf3a..9eee8c5 100644 --- a/pipeline.py +++ b/pipeline.py @@ -143,7 +143,7 @@ class Components : # columns = args['columns'] # df = np.array_split(df[columns].values,PART_SIZE) # df = pd.DataFrame(df[ int (partition) ],columns = columns) - info = {"parition":int(partition),"gpu":_args["gpu"],"rows":df.shape[0],"cols":df.shape[1],"part_size":PART_SIZE} + info = {"parition":int(partition),"gpu":_args["gpu"],"rows":str(df.shape[0]),"cols":str(df.shape[1]),"part_size":int(PART_SIZE)} logger.write({"module":"generate","action":"partition","input":info}) _args['partition'] = int(partition) _args['continuous']= args['continuous'] if 'continuous' in args else [] @@ -352,7 +352,7 @@ if __name__ == '__main__' : args['partition'] = index # _df = pd.DataFrame(DATA[index],columns=args['columns']) args['data'] = DATA[index] - args['data'].to_csv('aou-'+str(index)+'csv',index=False) + # args['data'].to_csv('aou-'+str(index)+'csv',index=False) # args['reader'] = lambda: _df if int(args['num_gpu']) > 1 : args['gpu'] = index