bug fix: misc. improvements
This commit is contained in:
parent
157df9334c
commit
f99af3655d
21
data/gan.py
21
data/gan.py
|
@ -20,7 +20,9 @@ EMBEDDED IN CODE :
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from tensorflow.contrib.layers import l2_regularizer
|
# from tensorflow.contrib.layers import l2_regularizer
|
||||||
|
from tensorflow.keras import layers
|
||||||
|
from tensorflow.keras.regularizers import L2 as l2_regularizer
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import time
|
import time
|
||||||
|
@ -34,7 +36,7 @@ import pickle
|
||||||
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
||||||
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
|
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
|
||||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
|
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
|
||||||
|
tf.compat.v1.disable_eager_execution()
|
||||||
# STEPS_PER_EPOCH = int(SYS_ARGS['epoch']) if 'epoch' in SYS_ARGS else 256
|
# STEPS_PER_EPOCH = int(SYS_ARGS['epoch']) if 'epoch' in SYS_ARGS else 256
|
||||||
# NUM_GPUS = 1 if 'num_gpu' not in SYS_ARGS else int(SYS_ARGS['num_gpu'])
|
# NUM_GPUS = 1 if 'num_gpu' not in SYS_ARGS else int(SYS_ARGS['num_gpu'])
|
||||||
# BATCHSIZE_PER_GPU = 2000
|
# BATCHSIZE_PER_GPU = 2000
|
||||||
|
@ -211,13 +213,14 @@ class GNet :
|
||||||
labels = None if 'labels' not in args else args['labels']
|
labels = None if 'labels' not in args else args['labels']
|
||||||
n_labels= None if 'n_labels' not in args else args['n_labels']
|
n_labels= None if 'n_labels' not in args else args['n_labels']
|
||||||
shift = [0] if self.__class__.__name__.lower() == 'generator' else [1] #-- not sure what this is doing
|
shift = [0] if self.__class__.__name__.lower() == 'generator' else [1] #-- not sure what this is doing
|
||||||
mean, var = tf.nn.moments(inputs, shift, keep_dims=True)
|
# mean, var = tf.nn.moments(inputs, shift, keep_dims=True)
|
||||||
shape = inputs.shape[1].value
|
mean, var = tf.nn.moments(inputs, shift,keepdims=True)
|
||||||
|
# shape = inputs.shape[1].value
|
||||||
|
shape = inputs.shape[1]
|
||||||
|
|
||||||
if labels is not None:
|
if labels is not None:
|
||||||
offset_m = self.get.variables(shape=[1,shape], name='offset'+name,
|
offset_m = self.get.variables(shape=[1,shape], name='offset'+name,initializer=tf.zeros_initializer)
|
||||||
initializer=tf.zeros_initializer)
|
scale_m = self.get.variables(shape=[n_labels,shape], name='scale'+name,initializer=tf.ones_initializer)
|
||||||
scale_m = self.get.variables(shape=[n_labels,shape], name='scale'+name,
|
|
||||||
initializer=tf.ones_initializer)
|
|
||||||
offset = tf.nn.embedding_lookup(offset_m, labels)
|
offset = tf.nn.embedding_lookup(offset_m, labels)
|
||||||
scale = tf.nn.embedding_lookup(scale_m, labels)
|
scale = tf.nn.embedding_lookup(scale_m, labels)
|
||||||
|
|
||||||
|
@ -595,7 +598,7 @@ class Predict(GNet):
|
||||||
df = pd.DataFrame()
|
df = pd.DataFrame()
|
||||||
CANDIDATE_COUNT = args['candidates'] if 'candidates' in args else 1 #0 if self.ROW_COUNT < 1000 else 100
|
CANDIDATE_COUNT = args['candidates'] if 'candidates' in args else 1 #0 if self.ROW_COUNT < 1000 else 100
|
||||||
candidates = []
|
candidates = []
|
||||||
|
|
||||||
with tf.compat.v1.Session() as sess:
|
with tf.compat.v1.Session() as sess:
|
||||||
saver.restore(sess, model_dir)
|
saver.restore(sess, model_dir)
|
||||||
if self._LABEL is not None :
|
if self._LABEL is not None :
|
||||||
|
|
|
@ -106,6 +106,8 @@ def train (**_args):
|
||||||
values = _inputhandler._map[key]['values'].tolist()
|
values = _inputhandler._map[key]['values'].tolist()
|
||||||
_map[key] = {"beg":beg,"end":end,"values":np.array(values).astype(str).tolist()}
|
_map[key] = {"beg":beg,"end":end,"values":np.array(values).astype(str).tolist()}
|
||||||
info = {"rows":_matrix.shape[0],"cols":_matrix.shape[1],"map":_map}
|
info = {"rows":_matrix.shape[0],"cols":_matrix.shape[1],"map":_map}
|
||||||
|
print()
|
||||||
|
# print ([_args['context'],_inputhandler._io])
|
||||||
logger.write({"module":"gan-train","action":"data-prep","context":_args['context'],"input":_inputhandler._io})
|
logger.write({"module":"gan-train","action":"data-prep","context":_args['context'],"input":_inputhandler._io})
|
||||||
|
|
||||||
args['logs'] = _args['logs'] if 'logs' in _args else 'logs'
|
args['logs'] = _args['logs'] if 'logs' in _args else 'logs'
|
||||||
|
@ -142,9 +144,10 @@ def generate(**_args):
|
||||||
:param context
|
:param context
|
||||||
:param logs
|
:param logs
|
||||||
"""
|
"""
|
||||||
|
_args['logs'] = _args['logs'] if 'logs' in _args else 'logs'
|
||||||
partition = _args['partition'] if 'partition' in _args else None
|
partition = _args['partition'] if 'partition' in _args else None
|
||||||
if not partition :
|
if not partition :
|
||||||
MAP_FLDER = os.sep.join([_args['logs'],'output',_args['context']])
|
MAP_FOLDER = os.sep.join([_args['logs'],'output',_args['context']])
|
||||||
# f = open(os.sep.join([_args['logs'],'output',_args['context'],'map.json']))
|
# f = open(os.sep.join([_args['logs'],'output',_args['context'],'map.json']))
|
||||||
else:
|
else:
|
||||||
MAP_FOLDER = os.sep.join([_args['logs'],'output',_args['context'],str(partition)])
|
MAP_FOLDER = os.sep.join([_args['logs'],'output',_args['context'],str(partition)])
|
||||||
|
|
18
pipeline.py
18
pipeline.py
|
@ -151,6 +151,7 @@ class Components :
|
||||||
if df.shape[0] and df.shape[0] :
|
if df.shape[0] and df.shape[0] :
|
||||||
#
|
#
|
||||||
# We have a full blown matrix to be processed
|
# We have a full blown matrix to be processed
|
||||||
|
print ('-- Training --')
|
||||||
data.maker.train(**_args)
|
data.maker.train(**_args)
|
||||||
else:
|
else:
|
||||||
print ("... skipping training !!")
|
print ("... skipping training !!")
|
||||||
|
@ -259,16 +260,23 @@ class Components :
|
||||||
_df[name] = _df[name].apply(lambda value: '' if str(value) == 'NaT' else str(value)[:10])
|
_df[name] = _df[name].apply(lambda value: '' if str(value) == 'NaT' else str(value)[:10])
|
||||||
#_df[name] = _df[name].dt.date
|
#_df[name] = _df[name].dt.date
|
||||||
# _df[name] = pd.to_datetime(_df[name].fillna(''),errors='coerce')
|
# _df[name] = pd.to_datetime(_df[name].fillna(''),errors='coerce')
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
_df[name] = pd.to_datetime(_df[name])
|
||||||
else:
|
else:
|
||||||
|
value = 0
|
||||||
if _item['type'] == 'INTEGER' :
|
if _item['type'] == 'INTEGER' :
|
||||||
_type = np.int64
|
_type = np.int64
|
||||||
elif _item['type'] in ['FLOAT','NUMERIC']:
|
elif _item['type'] in ['FLOAT','NUMERIC']:
|
||||||
_type = np.float64
|
_type = np.float64
|
||||||
else:
|
else:
|
||||||
|
|
||||||
_value = ''
|
_value = ''
|
||||||
_df[name] = _df[name].fillna(_value).astype(_type)
|
_df[name] = _df[name].fillna(_value) #.astype(_type)
|
||||||
columns.append(name)
|
columns.append(name)
|
||||||
writer.write(_df,schema=_schema,table=args['from'])
|
print ()
|
||||||
|
print (_df)
|
||||||
|
writer.write(_df.astype(object),schema=_schema,table=args['from'])
|
||||||
else:
|
else:
|
||||||
writer.write(_df,table=args['from'])
|
writer.write(_df,table=args['from'])
|
||||||
|
|
||||||
|
@ -350,7 +358,7 @@ class Components :
|
||||||
for _item in schema :
|
for _item in schema :
|
||||||
dtype = str
|
dtype = str
|
||||||
name = _item['name']
|
name = _item['name']
|
||||||
novalue = -1
|
novalue = 0
|
||||||
if _item['type'] in ['INTEGER','NUMERIC']:
|
if _item['type'] in ['INTEGER','NUMERIC']:
|
||||||
dtype = np.int64
|
dtype = np.int64
|
||||||
|
|
||||||
|
@ -550,7 +558,7 @@ if __name__ == '__main__' :
|
||||||
index = f[0] if f else 0
|
index = f[0] if f else 0
|
||||||
#
|
#
|
||||||
|
|
||||||
print ("..::: ",PIPELINE[index]['context'])
|
print ("..::: ",PIPELINE[index]['context'],':::..')
|
||||||
args = (PIPELINE[index])
|
args = (PIPELINE[index])
|
||||||
for key in _config :
|
for key in _config :
|
||||||
if key == 'pipeline' or key in args:
|
if key == 'pipeline' or key in args:
|
||||||
|
@ -567,6 +575,7 @@ if __name__ == '__main__' :
|
||||||
args['batch_size'] = 2000 #if 'batch_size' not in args else int(args['batch_size'])
|
args['batch_size'] = 2000 #if 'batch_size' not in args else int(args['batch_size'])
|
||||||
if 'dataset' not in args :
|
if 'dataset' not in args :
|
||||||
args['dataset'] = 'combined20191004v2_deid'
|
args['dataset'] = 'combined20191004v2_deid'
|
||||||
|
args['logs'] = args['logs'] if 'logs' in args else 'logs'
|
||||||
PART_SIZE = int(args['part_size']) if 'part_size' in args else 8
|
PART_SIZE = int(args['part_size']) if 'part_size' in args else 8
|
||||||
#
|
#
|
||||||
# @TODO:
|
# @TODO:
|
||||||
|
@ -599,6 +608,7 @@ if __name__ == '__main__' :
|
||||||
jobs.append(job)
|
jobs.append(job)
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
|
|
||||||
generator = Components()
|
generator = Components()
|
||||||
generator.generate(args)
|
generator.generate(args)
|
||||||
elif 'shuffle' in SYS_ARGS :
|
elif 'shuffle' in SYS_ARGS :
|
||||||
|
|
Loading…
Reference in New Issue