bug fix with binary matrix generation
This commit is contained in:
parent
ce55848cc8
commit
0f0c2642c2
|
@ -191,12 +191,13 @@ class Binary :
|
|||
#
|
||||
# This will give us a map of how each column was mapped to a bitstream
|
||||
|
||||
_map = df.fillna(np.nan).apply(lambda column: self.__stream(column),axis=0)
|
||||
# _map = df.fillna(np.nan).apply(lambda column: self.__stream(column),axis=0)
|
||||
_map = df.fillna('').apply(lambda column: self.__stream(column),axis=0)
|
||||
|
||||
#
|
||||
# We will merge this to have a healthy matrix
|
||||
_matrix = _map.apply(lambda row: list(list(itertools.chain(*row.values.tolist()))),axis=1)
|
||||
_matrix = np.matrix([list(item) for item in _matrix])
|
||||
_matrix = np.matrix([list(item) for item in _matrix]).astype(np.float32)
|
||||
#
|
||||
# let's format the map so we don't have an unreasonable amount of data
|
||||
#
|
||||
|
@ -210,7 +211,8 @@ class Binary :
|
|||
_m[name] = {"start":beg,"end":end}
|
||||
beg = end
|
||||
|
||||
return _m,_matrix.astype(np.float32)
|
||||
# return _m,_matrix.astype(np.float32)
|
||||
return _matrix
|
||||
|
||||
def Import(self,df,values,_map):
|
||||
"""
|
||||
|
|
|
@ -397,17 +397,13 @@ class Train (GNet):
|
|||
labels_placeholder = tf.compat.v1.placeholder(shape=self._LABEL.shape, dtype=tf.float32)
|
||||
dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
|
||||
dataset = dataset.repeat(10000)
|
||||
dataset = dataset.batch(batch_size=self.BATCHSIZE_PER_GPU)
|
||||
dataset = dataset.batch(batch_size=3000)
|
||||
dataset = dataset.prefetch(1)
|
||||
# iterator = dataset.make_initializable_iterator()
|
||||
iterator = tf.compat.v1.data.make_initializable_iterator(dataset)
|
||||
# next_element = iterator.get_next()
|
||||
# init_op = iterator.initializer
|
||||
return iterator, features_placeholder, labels_placeholder
|
||||
|
||||
def network(self,**args):
|
||||
# def graph(stage, opt):
|
||||
# global_step = tf.get_variable(stage+'_step', [], initializer=tf.constant_initializer(0), trainable=False)
|
||||
stage = args['stage']
|
||||
opt = args['opt']
|
||||
tower_grads = []
|
||||
|
@ -540,8 +536,6 @@ class Predict(GNet):
|
|||
# The code below will insure we have some acceptable cardinal relationships between id and synthetic values
|
||||
#
|
||||
df = ( pd.DataFrame(np.round(f).astype(np.int32)))
|
||||
print (df.head())
|
||||
print ()
|
||||
p = 0 not in df.sum(axis=1).values
|
||||
|
||||
if p:
|
||||
|
|
|
@ -12,6 +12,7 @@ import pandas as pd
|
|||
import numpy as np
|
||||
import data.gan as gan
|
||||
from transport import factory
|
||||
from data.bridge import Binary
|
||||
import threading as thread
|
||||
def train (**args) :
|
||||
"""
|
||||
|
@ -32,9 +33,12 @@ def train (**args) :
|
|||
# If we have several columns we will proceed one at a time (it could be done in separate threads)
|
||||
# @TODO : Consider performing this task on several threads/GPUs simulataneously
|
||||
#
|
||||
args['label'] = pd.get_dummies(df[column_id]).astype(np.float32).values
|
||||
handler = Binary()
|
||||
# args['label'] = pd.get_dummies(df[column_id]).astype(np.float32).values
|
||||
args['label'] = handler.Export(df[[column_id]])
|
||||
for col in column :
|
||||
args['real'] = pd.get_dummies(df[col]).astype(np.float32).values
|
||||
# args['real'] = pd.get_dummies(df[col]).astype(np.float32).values
|
||||
args['real'] = handler.Export(df[[col]])
|
||||
args['column'] = col
|
||||
args['context'] = col
|
||||
context = args['context']
|
||||
|
@ -77,7 +81,9 @@ def generate(**args):
|
|||
#@TODO:
|
||||
# If the identifier is not present, we should fine a way to determine or make one
|
||||
#
|
||||
args['label'] = pd.get_dummies(df[column_id]).astype(np.float32).values
|
||||
# args['label'] = pd.get_dummies(df[column_id]).astype(np.float32).values
|
||||
bwrangler = Binary()
|
||||
args['label'] = bwrangler.Export(df[[column_id]])
|
||||
_df = df.copy()
|
||||
for col in column :
|
||||
args['context'] = col
|
||||
|
|
Loading…
Reference in New Issue