From d42d601be7adeb6573a3824d607f300bcf271fda Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Wed, 31 Aug 2022 12:51:48 -0500 Subject: [PATCH] bug fix & enhancements --- data/gan.py | 3 ++- data/maker/__init__.py | 18 +++++++++++++----- data/maker/prepare/__init__.py | 15 +++++++++++---- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/data/gan.py b/data/gan.py index 26f19a2..812426a 100644 --- a/data/gan.py +++ b/data/gan.py @@ -533,7 +533,7 @@ class Train (GNet): print(format_str % (epoch, -w_sum/(self.STEPS_PER_EPOCH*2), duration)) # print (dir (w_distance)) - logs.append({"epoch":epoch,"distance":-w_sum/(self.STEPS_PER_EPOCH*2) }) + logs.append({"epoch": int(epoch),"distance":float(-w_sum/(self.STEPS_PER_EPOCH*2)) }) # if epoch % self.MAX_EPOCHS == 0: if epoch in [5,10,20,50,75, self.MAX_EPOCHS] : @@ -547,6 +547,7 @@ class Train (GNet): if self.logger : row = {"module":"gan-train","action":"logs","input":{"partition":self.PARTITION,"logs":logs}} #,"model":pickle.dump(sess)} self.logger.write(row) + # # @TODO: # We should upload the files in the checkpoint diff --git a/data/maker/__init__.py b/data/maker/__init__.py index 7ea2c74..50ac8c1 100644 --- a/data/maker/__init__.py +++ b/data/maker/__init__.py @@ -69,15 +69,19 @@ class Learner(Process): self.cache = [] # self.logpath= _args['logpath'] if 'logpath' in _args else 'logs' # sel.max_epoc + self.logger = None + if 'logger' in self.store : + self.logger = transport.factory.instance(**self.store['logger']) def log(self,**_args): try: - # _context = self.info['context'] - # _label = self.info['info'] if 'info' in self.info else _context + _context = self.info['context'] + _label = self.info['info'] if 'info' in self.info else _context # logger = transport.factory.instance(**self.store['logger']) if 'logger' in self.store else transport.factory.instance(provider=transport.providers.CONSOLE,context='write',lock=True) - # _args = dict({'ndx':self.ndx,'module':self.name,'table':self.info['from'],'context':_context,'info':_label,**_args}) - # logger.write(_args) - # self.ndx += 1 + _args = dict({'ndx':self.ndx,'module':self.name,'table':self.info['from'],'context':_context,'info':_label,**_args}) + if self.logger: + self.logger.write(_args) + self.ndx += 1 # if hasattr(logger,'close') : # logger.close() pass @@ -178,6 +182,8 @@ class Trainer(Learner): _args['gpu'] = self.gpu _args['real'] = _matrix _args['candidates'] = self.candidates + if self.logger : + _args['logger'] = transport.factory.instance(**self.store['logger']) # # At this point we have the binary matrix, we can initiate training # @@ -250,6 +256,8 @@ class Generator (Learner): _args['row_count'] = self._df.shape[0] if self.gpu : _args['gpu'] = self.gpu + if self.logger : + _args['logger'] = transport.factory.instance(**self.store['logger']) gHandler = gan.Predict(**_args) gHandler.load_meta(columns=None) _iomatrix = gHandler.apply() diff --git a/data/maker/prepare/__init__.py b/data/maker/prepare/__init__.py index 1adc44d..c8331bd 100644 --- a/data/maker/prepare/__init__.py +++ b/data/maker/prepare/__init__.py @@ -34,6 +34,8 @@ class Hardware : pass class Input : + class NOVALUES : + RANDOM,IGNORE,ALWAYS = ['random','ignore','always'] """ This class is designed to read data from a source and and perform a variet of operations : - provide a feature space, and rows (matrix profile) @@ -257,8 +259,6 @@ class Input : def decode (self,_matrix,**_args): # # _matrix binary matrix - # _values value space given the columns - # columns name of the columns ... # columns = _args['columns'] @@ -268,8 +268,15 @@ class Input : #@TODO: Provide random values for things that are missing # x = _matrix.apply(lambda row: _values[row.values == 1].tolist()[0] if (row.values == 1).sum() > 0 else np.repeat(None,len(self._columns)) ,axis=1).tolist() - novalues = _values[np.random.choice( len(_values),1)[0]].tolist() - # novalues = np.repeat(None,len(self._columns)) + # + # @TODO: Provide a parameter to either: + # - missing = {outlier,random,none} + # - outlier: select an outlier, random: randomly select a value, none: do nothing ... + # + if np.random.choice([0,1],1)[0] : + novalues = _values[np.random.choice( len(_values),1)[0]].tolist() + else: + novalues = np.repeat(None,len(self._columns)) x = _matrix.apply(lambda row: _values[row.values == 1].tolist()[0] if (row.values == 1).sum() > 0 else novalues ,axis=1).tolist() return pd.DataFrame(x,columns=columns)