diff --git a/pipeline.py b/pipeline.py index 9f57d59..72dea06 100644 --- a/pipeline.py +++ b/pipeline.py @@ -236,8 +236,10 @@ class Components : # We need to remove the continuous columns from the data-frame # @TODO: Abstract this !! # + real_df = pd.DataFrame() if x_cols : args['data'] = args['data'][list(set(args['data'].columns) - set(x_cols))] + real_df = args[x_cols].copy() args['candidates'] = 1 if 'candidates' not in args else int(args['candidates']) if 'gpu' in args : @@ -276,7 +278,7 @@ class Components : _df = _df[list(set(_df.columns) - set(skip_columns))] if x_cols : for _col in x_cols : - if df[_col].unique().size > 0 : + if real_df[_col].unique().size > 0 : _df[_col] = self.approximate(df[_col].fillna(-1)) else: _df[_col] = -1 @@ -289,6 +291,7 @@ class Components : # Let us merge the dataset here and and have a comprehensive dataset _df = pd.DataFrame.join(df,_df) + if _schema : for _item in _schema : if _item['type'] in ['DATE','TIMESTAMP','DATETIME'] :