2020-01-05 05:02:15 +00:00
|
|
|
import pandas as pd
|
|
|
|
import data.maker
|
2020-01-10 19:12:58 +00:00
|
|
|
from data.params import SYS_ARGS
|
|
|
|
import json
|
|
|
|
from scipy.stats import wasserstein_distance as wd
|
|
|
|
import risk
|
|
|
|
import numpy as np
|
|
|
|
if 'config' in SYS_ARGS :
|
|
|
|
ARGS = json.loads(open(SYS_ARGS['config']).read())
|
|
|
|
if 'generate' not in SYS_ARGS :
|
|
|
|
data.maker.train(**ARGS)
|
|
|
|
else:
|
|
|
|
#
|
|
|
|
#
|
2020-02-26 15:32:29 +00:00
|
|
|
ARGS['no_value'] = ''
|
2020-01-10 19:12:58 +00:00
|
|
|
_df = data.maker.generate(**ARGS)
|
|
|
|
odf = pd.read_csv (ARGS['data'])
|
|
|
|
odf.columns = [name.lower() for name in odf.columns]
|
2020-02-11 18:00:16 +00:00
|
|
|
column = ARGS['column'] if isinstance(ARGS['column'],list) else [ARGS['column']]
|
2020-02-29 03:37:26 +00:00
|
|
|
# print (odf.head())
|
|
|
|
# print (_df.head())
|
|
|
|
print(odf.join(_df[column],rsuffix='_io'))
|
2020-02-11 18:00:16 +00:00
|
|
|
# print (_df[column].risk.evaluate(flag='synth'))
|
|
|
|
# print (odf[column].risk.evaluate(flag='original'))
|
|
|
|
# _x = pd.get_dummies(_df[column]).values
|
|
|
|
# y = pd.get_dummies(odf[column]).values
|
|
|
|
# N = _df.shape[0]
|
|
|
|
# print (np.mean([ wd(_x[i],y[i])for i in range(0,N)]))
|
|
|
|
# print (wd(_x[0],y[0]) )
|
|
|
|
|
2020-01-10 19:12:58 +00:00
|
|
|
# column = SYS_ARGS['column']
|
|
|
|
# odf = open(SYS_ARGS['data'])
|