import pandas as pd import data.maker from data.params import SYS_ARGS import json from scipy.stats import wasserstein_distance as wd import risk import numpy as np if 'config' in SYS_ARGS : ARGS = json.loads(open(SYS_ARGS['config']).read()) if 'generate' not in SYS_ARGS : data.maker.train(**ARGS) else: # # ARGS['no_value'] = '' _df = data.maker.generate(**ARGS) odf = pd.read_csv (ARGS['data']) odf.columns = [name.lower() for name in odf.columns] column = ARGS['column'] if isinstance(ARGS['column'],list) else [ARGS['column']] # print (odf.head()) # print (_df.head()) print(odf.join(_df[column],rsuffix='_io')) # print (_df[column].risk.evaluate(flag='synth')) # print (odf[column].risk.evaluate(flag='original')) # _x = pd.get_dummies(_df[column]).values # y = pd.get_dummies(odf[column]).values # N = _df.shape[0] # print (np.mean([ wd(_x[i],y[i])for i in range(0,N)])) # print (wd(_x[0],y[0]) ) # column = SYS_ARGS['column'] # odf = open(SYS_ARGS['data'])