removing conditions, it blows up computational space
This commit is contained in:
parent
dab3ab7bf7
commit
4a25af6b13
85
data/gan.py
85
data/gan.py
|
@ -72,7 +72,7 @@ class GNet :
|
||||||
elif 'label' in args and len(args['label']) == 1 :
|
elif 'label' in args and len(args['label']) == 1 :
|
||||||
self.NUM_LABELS = args['label'].shape[0]
|
self.NUM_LABELS = args['label'].shape[0]
|
||||||
else:
|
else:
|
||||||
self.NUM_LABELS = 8
|
self.NUM_LABELS = None
|
||||||
# self.Z_DIM = 128 #self.X_SPACE_SIZE
|
# self.Z_DIM = 128 #self.X_SPACE_SIZE
|
||||||
self.Z_DIM = 128 #-- used as rows down stream
|
self.Z_DIM = 128 #-- used as rows down stream
|
||||||
self.G_STRUCTURE = [self.Z_DIM,self.Z_DIM]
|
self.G_STRUCTURE = [self.Z_DIM,self.Z_DIM]
|
||||||
|
@ -180,14 +180,19 @@ class GNet :
|
||||||
shift = [0] if self.__class__.__name__.lower() == 'generator' else [1] #-- not sure what this is doing
|
shift = [0] if self.__class__.__name__.lower() == 'generator' else [1] #-- not sure what this is doing
|
||||||
mean, var = tf.nn.moments(inputs, shift, keep_dims=True)
|
mean, var = tf.nn.moments(inputs, shift, keep_dims=True)
|
||||||
shape = inputs.shape[1].value
|
shape = inputs.shape[1].value
|
||||||
offset_m = self.get.variables(shape=[n_labels,shape], name='offset'+name,
|
if labels is not None:
|
||||||
initializer=tf.zeros_initializer)
|
offset_m = self.get.variables(shape=[1,shape], name='offset'+name,
|
||||||
scale_m = self.get.variables(shape=[n_labels,shape], name='scale'+name,
|
initializer=tf.zeros_initializer)
|
||||||
initializer=tf.ones_initializer)
|
scale_m = self.get.variables(shape=[n_labels,shape], name='scale'+name,
|
||||||
|
initializer=tf.ones_initializer)
|
||||||
|
offset = tf.nn.embedding_lookup(offset_m, labels)
|
||||||
|
scale = tf.nn.embedding_lookup(scale_m, labels)
|
||||||
|
|
||||||
offset = tf.nn.embedding_lookup(offset_m, labels)
|
else:
|
||||||
scale = tf.nn.embedding_lookup(scale_m, labels)
|
offset = None
|
||||||
result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-8)
|
scale = None
|
||||||
|
|
||||||
|
result = tf.nn.batch_normalization(inputs, mean, var,offset,scale, 1e-8)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _variable_on_cpu(self,**args):
|
def _variable_on_cpu(self,**args):
|
||||||
|
@ -248,7 +253,7 @@ class Generator (GNet):
|
||||||
x = args['inputs']
|
x = args['inputs']
|
||||||
tmp_dim = self.Z_DIM if 'dim' not in args else args['dim']
|
tmp_dim = self.Z_DIM if 'dim' not in args else args['dim']
|
||||||
label = args['label']
|
label = args['label']
|
||||||
|
print (self.NUM_LABELS)
|
||||||
with tf.compat.v1.variable_scope('G', reuse=tf.compat.v1.AUTO_REUSE , regularizer=l2_regularizer(0.00001)):
|
with tf.compat.v1.variable_scope('G', reuse=tf.compat.v1.AUTO_REUSE , regularizer=l2_regularizer(0.00001)):
|
||||||
for i, dim in enumerate(self.G_STRUCTURE[:-1]):
|
for i, dim in enumerate(self.G_STRUCTURE[:-1]):
|
||||||
kernel = self.get.variables(name='W_' + str(i), shape=[tmp_dim, dim])
|
kernel = self.get.variables(name='W_' + str(i), shape=[tmp_dim, dim])
|
||||||
|
@ -331,7 +336,7 @@ class Train (GNet):
|
||||||
self.generator = Generator(**args)
|
self.generator = Generator(**args)
|
||||||
self.discriminator = Discriminator(**args)
|
self.discriminator = Discriminator(**args)
|
||||||
self._REAL = args['real']
|
self._REAL = args['real']
|
||||||
self._LABEL= args['label']
|
self._LABEL= args['label'] if 'label' in args else None
|
||||||
self.column = args['column']
|
self.column = args['column']
|
||||||
# print ([" *** ",self.BATCHSIZE_PER_GPU])
|
# print ([" *** ",self.BATCHSIZE_PER_GPU])
|
||||||
|
|
||||||
|
@ -340,7 +345,7 @@ class Train (GNet):
|
||||||
|
|
||||||
self.logger.write( self.meta )
|
self.logger.write( self.meta )
|
||||||
|
|
||||||
self.log (real_shape=list(self._REAL.shape),label_shape = list(self._LABEL.shape),meta_data=self.meta)
|
# self.log (real_shape=list(self._REAL.shape),label_shape = self._LABEL.shape,meta_data=self.meta)
|
||||||
def load_meta(self, column):
|
def load_meta(self, column):
|
||||||
"""
|
"""
|
||||||
This function will delegate the calls to load meta data to it's dependents
|
This function will delegate the calls to load meta data to it's dependents
|
||||||
|
@ -363,13 +368,16 @@ class Train (GNet):
|
||||||
stage = args['stage']
|
stage = args['stage']
|
||||||
real = args['real']
|
real = args['real']
|
||||||
label = args['label']
|
label = args['label']
|
||||||
label = tf.cast(label, tf.int32)
|
|
||||||
#
|
|
||||||
# @TODO: Ziqi needs to explain what's going on here
|
if label is not None :
|
||||||
m = [[i] for i in np.arange(self._LABEL.shape[1]-2)]
|
label = tf.cast(label, tf.int32)
|
||||||
label = label[:, 1] * len(m) + tf.squeeze(
|
#
|
||||||
tf.matmul(label[:, 2:], tf.constant(m, dtype=tf.int32))
|
# @TODO: Ziqi needs to explain what's going on here
|
||||||
)
|
m = [[i] for i in np.arange(self._LABEL.shape[1]-2)]
|
||||||
|
label = label[:, 1] * len(m) + tf.squeeze(
|
||||||
|
tf.matmul(label[:, 2:], tf.constant(m, dtype=tf.int32))
|
||||||
|
)
|
||||||
# label = label[:,1] * 4 + tf.squeeze( label[:,2]*[[0],[1],[2],[3]] )
|
# label = label[:,1] * 4 + tf.squeeze( label[:,2]*[[0],[1],[2],[3]] )
|
||||||
z = tf.random.normal(shape=[self.BATCHSIZE_PER_GPU, self.Z_DIM])
|
z = tf.random.normal(shape=[self.BATCHSIZE_PER_GPU, self.Z_DIM])
|
||||||
|
|
||||||
|
@ -394,8 +402,13 @@ class Train (GNet):
|
||||||
This function seems to produce
|
This function seems to produce
|
||||||
"""
|
"""
|
||||||
features_placeholder = tf.compat.v1.placeholder(shape=self._REAL.shape, dtype=tf.float32)
|
features_placeholder = tf.compat.v1.placeholder(shape=self._REAL.shape, dtype=tf.float32)
|
||||||
labels_placeholder = tf.compat.v1.placeholder(shape=self._LABEL.shape, dtype=tf.float32)
|
LABEL_SHAPE = [None,None] if self._LABEL is None else self._LABEL.shape
|
||||||
dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
|
labels_placeholder = tf.compat.v1.placeholder(shape=LABEL_SHAPE, dtype=tf.float32)
|
||||||
|
if self._LABEL is not None :
|
||||||
|
dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
|
||||||
|
else :
|
||||||
|
dataset = tf.data.Dataset.from_tensor_slices(features_placeholder)
|
||||||
|
# labels_placeholder = None
|
||||||
dataset = dataset.repeat(10000)
|
dataset = dataset.repeat(10000)
|
||||||
dataset = dataset.batch(batch_size=3000)
|
dataset = dataset.batch(batch_size=3000)
|
||||||
dataset = dataset.prefetch(1)
|
dataset = dataset.prefetch(1)
|
||||||
|
@ -413,7 +426,10 @@ class Train (GNet):
|
||||||
for i in range(self.NUM_GPUS):
|
for i in range(self.NUM_GPUS):
|
||||||
with tf.device('/gpu:%d' % i):
|
with tf.device('/gpu:%d' % i):
|
||||||
with tf.name_scope('%s_%d' % ('TOWER', i)) as scope:
|
with tf.name_scope('%s_%d' % ('TOWER', i)) as scope:
|
||||||
(real, label) = iterator.get_next()
|
if self._LABEL is not None :
|
||||||
|
(real, label) = iterator.get_next()
|
||||||
|
else:
|
||||||
|
real = iterator.get_next()
|
||||||
loss, w = self.loss(scope=scope, stage=stage, real=self._REAL, label=self._LABEL)
|
loss, w = self.loss(scope=scope, stage=stage, real=self._REAL, label=self._LABEL)
|
||||||
#tf.get_variable_scope().reuse_variables()
|
#tf.get_variable_scope().reuse_variables()
|
||||||
tf.compat.v1.get_variable_scope().reuse_variables()
|
tf.compat.v1.get_variable_scope().reuse_variables()
|
||||||
|
@ -450,10 +466,11 @@ class Train (GNet):
|
||||||
#with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess:
|
#with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess:
|
||||||
with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess:
|
with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess:
|
||||||
sess.run(init)
|
sess.run(init)
|
||||||
|
|
||||||
sess.run(iterator_d.initializer,
|
sess.run(iterator_d.initializer,
|
||||||
feed_dict={features_placeholder_d: REAL, labels_placeholder_d: LABEL})
|
feed_dict={features_placeholder_d: REAL})
|
||||||
sess.run(iterator_g.initializer,
|
sess.run(iterator_g.initializer,
|
||||||
feed_dict={features_placeholder_g: REAL, labels_placeholder_g: LABEL})
|
feed_dict={features_placeholder_g: REAL})
|
||||||
|
|
||||||
for epoch in range(1, self.MAX_EPOCHS + 1):
|
for epoch in range(1, self.MAX_EPOCHS + 1):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
@ -511,9 +528,11 @@ class Predict(GNet):
|
||||||
tf.compat.v1.reset_default_graph()
|
tf.compat.v1.reset_default_graph()
|
||||||
z = tf.random.normal(shape=[self.BATCHSIZE_PER_GPU, self.Z_DIM])
|
z = tf.random.normal(shape=[self.BATCHSIZE_PER_GPU, self.Z_DIM])
|
||||||
y = tf.compat.v1.placeholder(shape=[self.BATCHSIZE_PER_GPU, self.NUM_LABELS], dtype=tf.int32)
|
y = tf.compat.v1.placeholder(shape=[self.BATCHSIZE_PER_GPU, self.NUM_LABELS], dtype=tf.int32)
|
||||||
ma = [[i] for i in np.arange(self.NUM_LABELS - 2)]
|
if self._LABEL is not None :
|
||||||
label = y[:, 1] * len(ma) + tf.squeeze(tf.matmul(y[:, 2:], tf.constant(ma, dtype=tf.int32)))
|
ma = [[i] for i in np.arange(self.NUM_LABELS - 2)]
|
||||||
|
label = y[:, 1] * len(ma) + tf.squeeze(tf.matmul(y[:, 2:], tf.constant(ma, dtype=tf.int32)))
|
||||||
|
else:
|
||||||
|
label = None
|
||||||
fake = self.generator.network(inputs=z, label=label)
|
fake = self.generator.network(inputs=z, label=label)
|
||||||
init = tf.compat.v1.global_variables_initializer()
|
init = tf.compat.v1.global_variables_initializer()
|
||||||
saver = tf.compat.v1.train.Saver()
|
saver = tf.compat.v1.train.Saver()
|
||||||
|
@ -524,13 +543,19 @@ class Predict(GNet):
|
||||||
|
|
||||||
# sess.run(init)
|
# sess.run(init)
|
||||||
saver.restore(sess, model_dir)
|
saver.restore(sess, model_dir)
|
||||||
labels = np.zeros((self.ROW_COUNT,self.NUM_LABELS) )
|
if self._LABEL is not None :
|
||||||
|
labels = np.zeros((self.ROW_COUNT,self.NUM_LABELS) )
|
||||||
|
labels= demo
|
||||||
|
else:
|
||||||
|
labels = None
|
||||||
|
|
||||||
found = []
|
found = []
|
||||||
labels= demo
|
|
||||||
for i in np.arange(CANDIDATE_COUNT) :
|
|
||||||
|
|
||||||
f = sess.run(fake,feed_dict={y:labels})
|
for i in np.arange(CANDIDATE_COUNT) :
|
||||||
|
if labels :
|
||||||
|
f = sess.run(fake,feed_dict={y:labels})
|
||||||
|
else:
|
||||||
|
f = sess.run(fake)
|
||||||
#
|
#
|
||||||
# if we are dealing with numeric values only we can perform a simple marginal sum against the indexes
|
# if we are dealing with numeric values only we can perform a simple marginal sum against the indexes
|
||||||
# The code below will insure we have some acceptable cardinal relationships between id and synthetic values
|
# The code below will insure we have some acceptable cardinal relationships between id and synthetic values
|
||||||
|
|
|
@ -25,7 +25,7 @@ def train (**args) :
|
||||||
"""
|
"""
|
||||||
column = args['column'] if (isinstance(args['column'],list)) else [args['column']]
|
column = args['column'] if (isinstance(args['column'],list)) else [args['column']]
|
||||||
|
|
||||||
column_id = args['id']
|
# column_id = args['id']
|
||||||
df = args['data'] if not isinstance(args['data'],str) else pd.read_csv(args['data'])
|
df = args['data'] if not isinstance(args['data'],str) else pd.read_csv(args['data'])
|
||||||
df.columns = [name.lower() for name in df.columns]
|
df.columns = [name.lower() for name in df.columns]
|
||||||
|
|
||||||
|
@ -35,7 +35,8 @@ def train (**args) :
|
||||||
#
|
#
|
||||||
handler = Binary()
|
handler = Binary()
|
||||||
# args['label'] = pd.get_dummies(df[column_id]).astype(np.float32).values
|
# args['label'] = pd.get_dummies(df[column_id]).astype(np.float32).values
|
||||||
args['label'] = handler.Export(df[[column_id]])
|
# args['label'] = handler.Export(df[[column_id]])
|
||||||
|
# args['label'] = np.ones(df.shape[0]).reshape(df.shape[0],1)
|
||||||
for col in column :
|
for col in column :
|
||||||
# args['real'] = pd.get_dummies(df[col]).astype(np.float32).values
|
# args['real'] = pd.get_dummies(df[col]).astype(np.float32).values
|
||||||
args['real'] = handler.Export(df[[col]])
|
args['real'] = handler.Export(df[[col]])
|
||||||
|
@ -83,7 +84,7 @@ def generate(**args):
|
||||||
#
|
#
|
||||||
# args['label'] = pd.get_dummies(df[column_id]).astype(np.float32).values
|
# args['label'] = pd.get_dummies(df[column_id]).astype(np.float32).values
|
||||||
bwrangler = Binary()
|
bwrangler = Binary()
|
||||||
args['label'] = bwrangler.Export(df[[column_id]])
|
# args['label'] = bwrangler.Export(df[[column_id]])
|
||||||
_df = df.copy()
|
_df = df.copy()
|
||||||
for col in column :
|
for col in column :
|
||||||
args['context'] = col
|
args['context'] = col
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -7,7 +7,7 @@ def read(fname):
|
||||||
args = {"name":"data-maker","version":"1.1.0","author":"Vanderbilt University Medical Center","author_email":"steve.l.nyemba@vanderbilt.edu","license":"MIT",
|
args = {"name":"data-maker","version":"1.1.0","author":"Vanderbilt University Medical Center","author_email":"steve.l.nyemba@vanderbilt.edu","license":"MIT",
|
||||||
"packages":find_packages(),"keywords":["healthcare","data","transport","protocol"]}
|
"packages":find_packages(),"keywords":["healthcare","data","transport","protocol"]}
|
||||||
args["install_requires"] = ['data-transport@git+https://dev.the-phi.com/git/steve/data-transport.git','tensorflow==1.15','pandas','pandas-gbq','pymongo']
|
args["install_requires"] = ['data-transport@git+https://dev.the-phi.com/git/steve/data-transport.git','tensorflow==1.15','pandas','pandas-gbq','pymongo']
|
||||||
args['url'] = 'https://hiplab.mc.vanderbilt.edu/aou/data-maker.git'
|
args['url'] = 'https://hiplab.mc.vanderbilt.edu/git/aou/data-maker.git'
|
||||||
|
|
||||||
if sys.version_info[0] == 2 :
|
if sys.version_info[0] == 2 :
|
||||||
args['use_2to3'] = False
|
args['use_2to3'] = False
|
||||||
|
|
Loading…
Reference in New Issue