diff --git a/pipeline.py b/pipeline.py index 8d35cd8..00bb80c 100644 --- a/pipeline.py +++ b/pipeline.py @@ -166,7 +166,9 @@ class Components : :param values array of values to be approximated """ if values.dtype in [int,float] : - r = np.random.dirichlet(values) + # + # @TODO: create bins? + r = np.random.dirichlet(values+.001) #-- dirichlet doesn't work on values with zeros x = [] _type = values.dtype for index in np.arange(values.size) : @@ -222,7 +224,7 @@ class Components : dtype = str name = _item['name'] novalue = -1 - if _item['type'] == 'INTEGER' : + if _item['type'] in ['INTEGER','NUMERIC']: dtype = np.int64 elif _item['type'] == 'FLOAT' : @@ -296,11 +298,11 @@ class Components : # - The original dataset has all the fields except those that need to be synthesized # - _df = _df[list(set(_df.columns) - set(skip_columns))] + _df = _df[list(set(_df.columns) - set(skip_columns))].copy() if x_cols : for _col in x_cols : if real_df[_col].unique().size > 0 : - _df[_col] = self.approximate(real_df[_col].fillna(-1)) + _df[_col] = self.approximate(real_df[_col]) else: _df[_col] = -1