"""
(c) 2019 Data Maker, hiplab.mc.vanderbilt.edu
version 1.0.0

This package serves as a proxy to the overall usage of the framework.
This package is designed to generate synthetic data from a dataset from an original dataset using deep learning techniques

@TODO:
    - Make configurable GPU, EPOCHS
"""
import pandas as pd
import numpy as np
import data.gan as gan
from transport import factory
import threading as thread
def train (**args) :
    """
    This function is intended to train the GAN in order to learn about the distribution of the features
    :column     columns that need to be synthesized (discrete)
    :logs       where the output of the (location on disk)
    :id         identifier of the dataset
    :data       data-frame to be synthesized
    :context    label of what we are synthesizing
    """
    column      = args['column'] if (isinstance(args['column'],list)) else [args['column']]
    
    column_id   = args['id']
    df          = args['data'] if not isinstance(args['data'],str) else pd.read_csv(args['data'])
    df.columns = [name.lower() for name in df.columns]

    #
    # If we have several columns we will proceed one at a time (it could be done in separate threads)
    # @TODO : Consider performing this task on several threads/GPUs simulataneously
    # 
    args['label']      = pd.get_dummies(df[column_id]).astype(np.float32).values
    for col in column :    
        args['real']        = pd.get_dummies(df[col]).astype(np.float32).values 
        args['column']  = col
        args['context'] = col
        context     = args['context']
        if 'store' in args :
            args['store']['args']['doc'] = context
            logger = factory.instance(**args['store'])
            args['logger'] = logger
            
        else:
            logger = None
        trainer = gan.Train(**args)        
        trainer.apply()
def post(**args):
    """
    This uploads the tensorflow checkpoint to a data-store (mongodb, biguqery, s3)
    
    """
    pass
def get(**args):
    """
    This function will restore a checkpoint from a persistant storage on to disk
    """
    pass
def generate(**args):
    """
    This function will generate a synthetic dataset on the basis of a model that has been learnt for the dataset
    @return pandas.DataFrame

    :data   data-frame to be synthesized
    :column   columns that need to be synthesized (discrete)
    :id     column identifying an entity
    :logs   location on disk where the learnt knowledge of the dataset is
    """
    # df      = args['data']
    df      = args['data'] if not isinstance(args['data'],str) else pd.read_csv(args['data'])
    
    column      = args['column'] if (isinstance(args['column'],list)) else [args['column']]
    column_id   = args['id']
    #
    #@TODO:
    #   If the identifier is not present, we should fine a way to determine or make one
    #
    args['label']  = pd.get_dummies(df[column_id]).astype(np.float32).values
    _df     = df.copy()
    for col in column :
        args['context'] = col
        args['column']  = col
        values          = df[col].unique().tolist()
        # values.sort()        
        args['values']  = values
        #
        # we can determine the cardinalities here so we know what to allow or disallow
        handler         = gan.Predict (**args)
        handler.load_meta(col)
        r       =  handler.apply()        
        # print (r)        
        _df[col] = r[col]
        # break
    return _df