bug fix with binary matrix generation
This commit is contained in:
parent
ce55848cc8
commit
0f0c2642c2
|
@ -191,12 +191,13 @@ class Binary :
|
||||||
#
|
#
|
||||||
# This will give us a map of how each column was mapped to a bitstream
|
# This will give us a map of how each column was mapped to a bitstream
|
||||||
|
|
||||||
_map = df.fillna(np.nan).apply(lambda column: self.__stream(column),axis=0)
|
# _map = df.fillna(np.nan).apply(lambda column: self.__stream(column),axis=0)
|
||||||
|
_map = df.fillna('').apply(lambda column: self.__stream(column),axis=0)
|
||||||
|
|
||||||
#
|
#
|
||||||
# We will merge this to have a healthy matrix
|
# We will merge this to have a healthy matrix
|
||||||
_matrix = _map.apply(lambda row: list(list(itertools.chain(*row.values.tolist()))),axis=1)
|
_matrix = _map.apply(lambda row: list(list(itertools.chain(*row.values.tolist()))),axis=1)
|
||||||
_matrix = np.matrix([list(item) for item in _matrix])
|
_matrix = np.matrix([list(item) for item in _matrix]).astype(np.float32)
|
||||||
#
|
#
|
||||||
# let's format the map so we don't have an unreasonable amount of data
|
# let's format the map so we don't have an unreasonable amount of data
|
||||||
#
|
#
|
||||||
|
@ -210,7 +211,8 @@ class Binary :
|
||||||
_m[name] = {"start":beg,"end":end}
|
_m[name] = {"start":beg,"end":end}
|
||||||
beg = end
|
beg = end
|
||||||
|
|
||||||
return _m,_matrix.astype(np.float32)
|
# return _m,_matrix.astype(np.float32)
|
||||||
|
return _matrix
|
||||||
|
|
||||||
def Import(self,df,values,_map):
|
def Import(self,df,values,_map):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -397,17 +397,13 @@ class Train (GNet):
|
||||||
labels_placeholder = tf.compat.v1.placeholder(shape=self._LABEL.shape, dtype=tf.float32)
|
labels_placeholder = tf.compat.v1.placeholder(shape=self._LABEL.shape, dtype=tf.float32)
|
||||||
dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
|
dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
|
||||||
dataset = dataset.repeat(10000)
|
dataset = dataset.repeat(10000)
|
||||||
dataset = dataset.batch(batch_size=self.BATCHSIZE_PER_GPU)
|
dataset = dataset.batch(batch_size=3000)
|
||||||
dataset = dataset.prefetch(1)
|
dataset = dataset.prefetch(1)
|
||||||
# iterator = dataset.make_initializable_iterator()
|
# iterator = dataset.make_initializable_iterator()
|
||||||
iterator = tf.compat.v1.data.make_initializable_iterator(dataset)
|
iterator = tf.compat.v1.data.make_initializable_iterator(dataset)
|
||||||
# next_element = iterator.get_next()
|
|
||||||
# init_op = iterator.initializer
|
|
||||||
return iterator, features_placeholder, labels_placeholder
|
return iterator, features_placeholder, labels_placeholder
|
||||||
|
|
||||||
def network(self,**args):
|
def network(self,**args):
|
||||||
# def graph(stage, opt):
|
|
||||||
# global_step = tf.get_variable(stage+'_step', [], initializer=tf.constant_initializer(0), trainable=False)
|
|
||||||
stage = args['stage']
|
stage = args['stage']
|
||||||
opt = args['opt']
|
opt = args['opt']
|
||||||
tower_grads = []
|
tower_grads = []
|
||||||
|
@ -540,8 +536,6 @@ class Predict(GNet):
|
||||||
# The code below will insure we have some acceptable cardinal relationships between id and synthetic values
|
# The code below will insure we have some acceptable cardinal relationships between id and synthetic values
|
||||||
#
|
#
|
||||||
df = ( pd.DataFrame(np.round(f).astype(np.int32)))
|
df = ( pd.DataFrame(np.round(f).astype(np.int32)))
|
||||||
print (df.head())
|
|
||||||
print ()
|
|
||||||
p = 0 not in df.sum(axis=1).values
|
p = 0 not in df.sum(axis=1).values
|
||||||
|
|
||||||
if p:
|
if p:
|
||||||
|
|
|
@ -12,6 +12,7 @@ import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import data.gan as gan
|
import data.gan as gan
|
||||||
from transport import factory
|
from transport import factory
|
||||||
|
from data.bridge import Binary
|
||||||
import threading as thread
|
import threading as thread
|
||||||
def train (**args) :
|
def train (**args) :
|
||||||
"""
|
"""
|
||||||
|
@ -32,9 +33,12 @@ def train (**args) :
|
||||||
# If we have several columns we will proceed one at a time (it could be done in separate threads)
|
# If we have several columns we will proceed one at a time (it could be done in separate threads)
|
||||||
# @TODO : Consider performing this task on several threads/GPUs simulataneously
|
# @TODO : Consider performing this task on several threads/GPUs simulataneously
|
||||||
#
|
#
|
||||||
args['label'] = pd.get_dummies(df[column_id]).astype(np.float32).values
|
handler = Binary()
|
||||||
|
# args['label'] = pd.get_dummies(df[column_id]).astype(np.float32).values
|
||||||
|
args['label'] = handler.Export(df[[column_id]])
|
||||||
for col in column :
|
for col in column :
|
||||||
args['real'] = pd.get_dummies(df[col]).astype(np.float32).values
|
# args['real'] = pd.get_dummies(df[col]).astype(np.float32).values
|
||||||
|
args['real'] = handler.Export(df[[col]])
|
||||||
args['column'] = col
|
args['column'] = col
|
||||||
args['context'] = col
|
args['context'] = col
|
||||||
context = args['context']
|
context = args['context']
|
||||||
|
@ -77,7 +81,9 @@ def generate(**args):
|
||||||
#@TODO:
|
#@TODO:
|
||||||
# If the identifier is not present, we should fine a way to determine or make one
|
# If the identifier is not present, we should fine a way to determine or make one
|
||||||
#
|
#
|
||||||
args['label'] = pd.get_dummies(df[column_id]).astype(np.float32).values
|
# args['label'] = pd.get_dummies(df[column_id]).astype(np.float32).values
|
||||||
|
bwrangler = Binary()
|
||||||
|
args['label'] = bwrangler.Export(df[[column_id]])
|
||||||
_df = df.copy()
|
_df = df.copy()
|
||||||
for col in column :
|
for col in column :
|
||||||
args['context'] = col
|
args['context'] = col
|
||||||
|
|
Loading…
Reference in New Issue