bg fix : approximation
This commit is contained in:
parent
fc7b694d02
commit
12d7573ba8
12
pipeline.py
12
pipeline.py
|
@ -169,14 +169,24 @@ class Components :
|
||||||
#
|
#
|
||||||
# @TODO: create bins?
|
# @TODO: create bins?
|
||||||
r = np.random.dirichlet(values+.001) #-- dirichlet doesn't work on values with zeros
|
r = np.random.dirichlet(values+.001) #-- dirichlet doesn't work on values with zeros
|
||||||
|
_sd = values[values > 0].std()
|
||||||
|
_me = values[values > 0].mean()
|
||||||
x = []
|
x = []
|
||||||
_type = values.dtype
|
_type = values.dtype
|
||||||
for index in np.arange(values.size) :
|
for index in np.arange(values.size) :
|
||||||
|
|
||||||
if np.random.choice([0,1],1)[0] :
|
if np.random.choice([0,1],1)[0] :
|
||||||
value = values[index] + (values[index] * r[index])
|
value = values[index] + (values[index] * r[index])
|
||||||
|
|
||||||
else :
|
else :
|
||||||
value = values[index] - (values[index] * r[index])
|
value = values[index] - (values[index] * r[index])
|
||||||
|
#
|
||||||
|
# randomly shifting the measurements
|
||||||
|
if np.random.choice([0,1],1)[0] and _me > _sd:
|
||||||
|
if np.random.choice([0,1],1)[0] :
|
||||||
|
value = value * np.divide(_me,_sd)
|
||||||
|
else:
|
||||||
|
value = value + (np.divide(_me,_sd))
|
||||||
value = int(value) if _type == int else np.round(value,2)
|
value = int(value) if _type == int else np.round(value,2)
|
||||||
x.append( value)
|
x.append( value)
|
||||||
np.random.shuffle(x)
|
np.random.shuffle(x)
|
||||||
|
@ -305,7 +315,7 @@ class Components :
|
||||||
if real_df[_col].unique().size > 0 :
|
if real_df[_col].unique().size > 0 :
|
||||||
|
|
||||||
|
|
||||||
_df[_col] = self.approximate(real_df[_col])
|
_df[_col] = self.approximate(real_df[_col].values)
|
||||||
_approx[_col] = {
|
_approx[_col] = {
|
||||||
"io":{"min":_df[_col].min().astype(float),"max":_df[_col].max().astype(float),"mean":_df[_col].mean().astype(float),"sd":_df[_col].values.std().astype(float),"missing": _df[_col].where(_df[_col] == -1).dropna().count().astype(float),"zeros":_df[_col].where(_df[_col] == 0).dropna().count().astype(float)},
|
"io":{"min":_df[_col].min().astype(float),"max":_df[_col].max().astype(float),"mean":_df[_col].mean().astype(float),"sd":_df[_col].values.std().astype(float),"missing": _df[_col].where(_df[_col] == -1).dropna().count().astype(float),"zeros":_df[_col].where(_df[_col] == 0).dropna().count().astype(float)},
|
||||||
"real":{"min":real_df[_col].min().astype(float),"max":real_df[_col].max().astype(float),"mean":real_df[_col].mean().astype(float),"sd":real_df[_col].values.std().astype(float),"missing": real_df[_col].where(_df[_col] == -1).dropna().count().astype(float),"zeros":real_df[_col].where(_df[_col] == 0).dropna().count().astype(float)}
|
"real":{"min":real_df[_col].min().astype(float),"max":real_df[_col].max().astype(float),"mean":real_df[_col].mean().astype(float),"sd":real_df[_col].values.std().astype(float),"missing": real_df[_col].where(_df[_col] == -1).dropna().count().astype(float),"zeros":real_df[_col].where(_df[_col] == 0).dropna().count().astype(float)}
|
||||||
|
|
Loading…
Reference in New Issue