From abed87db22ad47c1d8e9c717967692078248fb36 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Mon, 12 Apr 2021 15:11:41 -0500 Subject: [PATCH] bug fix: column specification for shuffle --- pipeline.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pipeline.py b/pipeline.py index 9d33873..dcf649c 100644 --- a/pipeline.py +++ b/pipeline.py @@ -224,12 +224,13 @@ class Components : if 'ignore' in args and 'columns' in args['ignore'] : _cols = self.get_ignore(data=df,columns=args['ignore']['columns']) - - for name in list (set(df.columns) - set(_cols)) : + columns = args['columns'] if 'columns' in args else df.columns + columns = list(set(columns) - set(_cols)) + for name in columns : i = np.arange(df.shape[0]) np.random.shuffle(i) if name in x_cols : - df[name] = self.approximate(df[name].values) + df[name] = self.approximate(df.iloc[i][name].values) df[name] = df.iloc[i][name] self.post(data=df,schema=schema,store=args['store']['target']) def post(self,**_args) :