bug fix with model saving, and pre/post processing
This commit is contained in:
parent
0efd4b13bc
commit
936bd3ee0b
|
@ -469,7 +469,7 @@ class Train (GNet):
|
||||||
else :
|
else :
|
||||||
dataset = tf.data.Dataset.from_tensor_slices(features_placeholder)
|
dataset = tf.data.Dataset.from_tensor_slices(features_placeholder)
|
||||||
# labels_placeholder = None
|
# labels_placeholder = None
|
||||||
dataset = dataset.repeat(20000)
|
dataset = dataset.repeat(80000)
|
||||||
|
|
||||||
dataset = dataset.batch(batch_size=self.BATCHSIZE_PER_GPU)
|
dataset = dataset.batch(batch_size=self.BATCHSIZE_PER_GPU)
|
||||||
dataset = dataset.prefetch(1)
|
dataset = dataset.prefetch(1)
|
||||||
|
@ -564,12 +564,12 @@ class Train (GNet):
|
||||||
|
|
||||||
# if epoch % self.MAX_EPOCHS == 0:
|
# if epoch % self.MAX_EPOCHS == 0:
|
||||||
# if epoch in [5,10,20,50,75, self.MAX_EPOCHS] :
|
# if epoch in [5,10,20,50,75, self.MAX_EPOCHS] :
|
||||||
if epoch in self.CHECKPOINTS or int(epoch) == 1:
|
if epoch in self.CHECKPOINTS :
|
||||||
# suffix = "-".join(self.ATTRIBUTES['synthetic']) if isinstance(self.ATTRIBUTES['synthetic'],list) else self.ATTRIBUTES['synthetic']
|
# suffix = "-".join(self.ATTRIBUTES['synthetic']) if isinstance(self.ATTRIBUTES['synthetic'],list) else self.ATTRIBUTES['synthetic']
|
||||||
suffix = self.CONTEXT #self.get.suffix()
|
suffix = self.CONTEXT #self.get.suffix()
|
||||||
_name = os.sep.join([self.train_dir,str(epoch),suffix])
|
_name = os.sep.join([self.train_dir,str(epoch),suffix])
|
||||||
# saver.save(sess, self.train_dir, write_meta_graph=False, global_step=epoch)
|
# saver.save(sess, self.train_dir, write_meta_graph=False, global_step=epoch)
|
||||||
saver.save(sess, _name, write_meta_graph=False, global_step=epoch)
|
saver.save(sess, _name, write_meta_graph=False, global_step=np.int64(epoch))
|
||||||
|
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
|
|
|
@ -265,7 +265,7 @@ class Trainer(Learner):
|
||||||
|
|
||||||
#
|
#
|
||||||
# Let us find the smallest, the item is sorted by loss ...
|
# Let us find the smallest, the item is sorted by loss ...
|
||||||
_args['epochs'] = gTrain.logs['epochs'][0]['epochs']
|
_args['network_args']['max_epochs'] = gTrain.logs['epochs'][0]['epochs']
|
||||||
g = Generator(**_args)
|
g = Generator(**_args)
|
||||||
# g.run()
|
# g.run()
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,76 @@
|
||||||
|
"""
|
||||||
|
This file is designed to specify the appliction of pre/post-processing code.
|
||||||
|
The pre-processing code gets applied after the data has been loaded
|
||||||
|
The post-processing code get applied after the data has been generated for instance:
|
||||||
|
-approximation code/logic; date shifting; suppression; adding noise
|
||||||
|
-
|
||||||
|
"""
|
||||||
|
import numpy as np
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
import time
|
||||||
|
|
||||||
|
class Phase:
|
||||||
|
def __init__(self,**_args):
|
||||||
|
self._df = _args['data']
|
||||||
|
self.callback = _args['callback']
|
||||||
|
def apply(self,**_args):
|
||||||
|
"""
|
||||||
|
:param data data-frame
|
||||||
|
:param _info arguments needed to be applied
|
||||||
|
:param callback callback function once done
|
||||||
|
"""
|
||||||
|
raise Exception ("Function needs to be Implemented")
|
||||||
|
class Pre(Phase):
|
||||||
|
pass
|
||||||
|
class Post(Phase):
|
||||||
|
def __init__(self,**_args):
|
||||||
|
super().__init__(**_args)
|
||||||
|
pass
|
||||||
|
|
||||||
|
class Date(Post):
|
||||||
|
def __init__(self,**_args):
|
||||||
|
super().__init__(**_args)
|
||||||
|
def make(self,**_args):
|
||||||
|
"""
|
||||||
|
This function generates a random date given a year and optionally a set of days from the randomly generated date
|
||||||
|
:param year initial value of a year
|
||||||
|
:param offset list of days between initial date
|
||||||
|
"""
|
||||||
|
if _args['year'] in ['',None,np.nan] :
|
||||||
|
return None
|
||||||
|
year = int(_args['year'])
|
||||||
|
|
||||||
|
offset = _args['offset'] if 'offset' in _args else 0
|
||||||
|
month = np.random.randint(1,13)
|
||||||
|
if month == 2:
|
||||||
|
_end = 28 if year % 4 != 0 else 29
|
||||||
|
else:
|
||||||
|
_end = 31 if month in [1,3,5,7,8,10,12] else 30
|
||||||
|
day = np.random.randint(1,_end)
|
||||||
|
|
||||||
|
#-- synthetic date
|
||||||
|
_date = datetime(year=year,month=month,day=day,minute=0,hour=0,second=0)
|
||||||
|
FORMAT = '%Y-%m-%d' if 'format' not in _args else _args['format']
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# print ([_name,FORMAT, _date.strftime(FORMAT)])
|
||||||
|
r = []
|
||||||
|
if offset :
|
||||||
|
r = [_date.strftime(FORMAT)]
|
||||||
|
for _delta in offset :
|
||||||
|
_date = _date + timedelta(_delta)
|
||||||
|
r.append(_date.strptime(FORMAT))
|
||||||
|
return r
|
||||||
|
else:
|
||||||
|
return _date.strftime(FORMAT)
|
||||||
|
|
||||||
|
def apply(self,**_args):
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
class Approximate(Post):
|
||||||
|
def apply(**_args):
|
||||||
|
pass
|
||||||
|
def applyWithRange(**_args):
|
|
@ -0,0 +1,105 @@
|
||||||
|
"""
|
||||||
|
This file handles state-space of the data training/generation process i.e Upon specification of the pre/post conditiions
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
This file handles state-space of the data training/generation process i.e Upon specification of the pre/post conditions,
|
||||||
|
The specifications for this are as follows (within an entry of the configuration)
|
||||||
|
{
|
||||||
|
"state":{
|
||||||
|
"pre":[{"approximate":{"field":"int"}},{"newdate":{"field":"format"}}],"post":[{"limit":10}]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
import importlib
|
||||||
|
import importlib.util
|
||||||
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
from data.maker.state.default import *
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
class State :
|
||||||
|
@staticmethod
|
||||||
|
def apply(_data,lpointers):
|
||||||
|
"""
|
||||||
|
This function applies a pipeline against a given data-frame, the calling code must decide whether it is a pre/post
|
||||||
|
:_data data-frame
|
||||||
|
:_lpointers functions modules returned by instance (module,_args)
|
||||||
|
"""
|
||||||
|
for _item in lpointers :
|
||||||
|
if _item is None :
|
||||||
|
continue
|
||||||
|
|
||||||
|
pointer = _item['module']
|
||||||
|
_args = _item['args']
|
||||||
|
|
||||||
|
_data = pointer(_data,_args)
|
||||||
|
return _data
|
||||||
|
@staticmethod
|
||||||
|
def instance(_args):
|
||||||
|
pre = []
|
||||||
|
post=[]
|
||||||
|
|
||||||
|
out = {}
|
||||||
|
for key in _args :
|
||||||
|
#
|
||||||
|
# If the item has a path property is should be ignored
|
||||||
|
path = _args[key]['path'] if 'path' in _args[key] else ''
|
||||||
|
out[key] = [ State._build(dict(_item,**{'path':path})) if 'path' not in _item else State._build(_item) for _item in _args[key]['pipeline']]
|
||||||
|
|
||||||
|
return out
|
||||||
|
# if 'pre' in _args:
|
||||||
|
# path = _args['pre']['path'] if 'path' in _args['pre'] else ''
|
||||||
|
|
||||||
|
# pre = [ State._build(dict(_item,**{'path':path})) for _item in _args['pre']['pipeline']]
|
||||||
|
# else:
|
||||||
|
# path = _args['post']['path'] if 'path' in _args['post'] else ''
|
||||||
|
|
||||||
|
# post = [ State._build(dict(_item,**{'path':path})) for _item in _args['post']['pipeline']]
|
||||||
|
# return {'pre':pre,'post':post}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract(_entry):
|
||||||
|
|
||||||
|
_name = list(set(_entry.keys()) - set(['path']) )
|
||||||
|
_name = _name[0]
|
||||||
|
path = _entry['path'] if 'path' in _entry and os.path.exists(_entry['path']) else ''
|
||||||
|
return {"module": _name,"args": _entry[_name],'name':_name,'path':path}
|
||||||
|
pass
|
||||||
|
@staticmethod
|
||||||
|
def _build(_args):
|
||||||
|
|
||||||
|
_info = State._extract(_args)
|
||||||
|
# _info = dict(_args,**_info)
|
||||||
|
|
||||||
|
_info['module'] = State._instance(_info)
|
||||||
|
return _info if _info['module'] is not None else None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _instance(_args):
|
||||||
|
"""
|
||||||
|
:path optional path of the file on disk
|
||||||
|
:module name of the function
|
||||||
|
"""
|
||||||
|
|
||||||
|
_name = _args['module']
|
||||||
|
|
||||||
|
if 'path' in _args and os.path.exists(_args['path']):
|
||||||
|
path= _args['path']
|
||||||
|
|
||||||
|
spec = importlib.util.spec_from_file_location(_name, path)
|
||||||
|
module = importlib.util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(module)
|
||||||
|
else:
|
||||||
|
#
|
||||||
|
# Probably calling a built-in module (should be in this file)
|
||||||
|
|
||||||
|
module = sys.modules['data.maker.state.default']
|
||||||
|
|
||||||
|
return getattr(module,_name) if hasattr(module,_name) else None
|
||||||
|
|
||||||
|
#
|
||||||
|
# Adding a few custom functions that should be able to help ....
|
||||||
|
# These functions can be called without specifying a path
|
||||||
|
#
|
||||||
|
|
|
@ -0,0 +1,116 @@
|
||||||
|
"""
|
||||||
|
This file contains default functions applied to a data-frame/dataset as pre/post processing jobs.
|
||||||
|
The functions are organized in a pipeline i.e the data will be applied to each function
|
||||||
|
|
||||||
|
Custom functions :
|
||||||
|
functions must tak 2 arguments (_data,_args) : where _data is a data frame and _arg is a object describing the input parameters
|
||||||
|
"""
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
|
||||||
|
def limit(_data,size):
|
||||||
|
"""
|
||||||
|
...,{limit:size}
|
||||||
|
"""
|
||||||
|
|
||||||
|
# size = int(_args['limit'])
|
||||||
|
return _data.iloc[:size]
|
||||||
|
def format(_data,_schema):
|
||||||
|
"""
|
||||||
|
This function enforces a schema against a data-frame, this may or may not work depending on the persistence storage
|
||||||
|
:_data data-frame containing all data
|
||||||
|
:_args schema to enforce the data, we are expecting the format as a list of {name,type,description}
|
||||||
|
"""
|
||||||
|
return _data
|
||||||
|
|
||||||
|
def approximate(_data,_args):
|
||||||
|
"""
|
||||||
|
:_args Object of {field:type}
|
||||||
|
This function will approximate n-fields in the data given it's distribution
|
||||||
|
"""
|
||||||
|
_m = {'int':int,'float':float,'integer':int,'double':float}
|
||||||
|
columns = list(_args.keys())
|
||||||
|
for _name in columns :
|
||||||
|
if _name not in _data :
|
||||||
|
continue
|
||||||
|
otype = _args[_name]
|
||||||
|
otype = str if otype not in _m else _m[otype]
|
||||||
|
_data.loc[:,_name] = np.random.uniform(_data[_name].values).astype(otype)
|
||||||
|
|
||||||
|
return _data
|
||||||
|
def split_date(_data,_args):
|
||||||
|
"""
|
||||||
|
This function takes a field and applies the format from other fields
|
||||||
|
:_data data-frame
|
||||||
|
:_config configuration entry {column:{format,column:format,type}}
|
||||||
|
"""
|
||||||
|
_columns = list(_args.keys())
|
||||||
|
_m = {'int':int,'float':float,'integer':int,'double':float}
|
||||||
|
for _name in _columns :
|
||||||
|
_iname = _args[_name]['column']
|
||||||
|
_iformat = _args[_name]['format']['in']
|
||||||
|
_oformat = _args[_name]['format']['out']
|
||||||
|
_otype = str if 'type' not in _args[_name] else _args[_name]['type']
|
||||||
|
_data.loc[:,_name] = _data[_iname].apply(lambda _date: datetime.strftime(datetime.strptime(str(_date),_iformat),_oformat)).astype(_otype)
|
||||||
|
return _data
|
||||||
|
def newdate(_data,_args):
|
||||||
|
"""
|
||||||
|
This function creates a new data on a given column from another
|
||||||
|
:_data data frame
|
||||||
|
:_args configuration column:{format,column}
|
||||||
|
"""
|
||||||
|
_columns = list(_args.keys())
|
||||||
|
for _name in _columns :
|
||||||
|
|
||||||
|
format = _args[_name]['format']
|
||||||
|
ROW_COUNT = _data[_name].size
|
||||||
|
if 'column' in _args[_name] :
|
||||||
|
srcName = _args[_name]['column']
|
||||||
|
years = _data[srcName].values
|
||||||
|
else:
|
||||||
|
years = np.random.choice(np.arange(datetime.now().year- 90,datetime.now().year),ROW_COUNT)
|
||||||
|
_data.loc[:,_name] = [ _makedate(year = years[_index],format = format) for _index in np.arange(ROW_COUNT)]
|
||||||
|
|
||||||
|
return _data
|
||||||
|
def _makedate(**_args):
|
||||||
|
"""
|
||||||
|
This function creates a new date and applies it to a column
|
||||||
|
:_data data-frame with columns
|
||||||
|
:_args arguments for col1:format
|
||||||
|
"""
|
||||||
|
_columns = list(_args.keys())
|
||||||
|
|
||||||
|
# if _args['year'] in ['',None,np.nan] :
|
||||||
|
# year = np.random.choice(np.arange(1920,222),1)
|
||||||
|
# else:
|
||||||
|
# year = int(_args['year'])
|
||||||
|
year = int(_args['year'])
|
||||||
|
offset = _args['offset'] if 'offset' in _args else 0
|
||||||
|
month = np.random.randint(1,13)
|
||||||
|
if month == 2:
|
||||||
|
_end = 28 if year % 4 != 0 else 29
|
||||||
|
else:
|
||||||
|
_end = 31 if month in [1,3,5,7,8,10,12] else 30
|
||||||
|
day = np.random.randint(1,_end)
|
||||||
|
|
||||||
|
#-- synthetic date
|
||||||
|
_date = datetime(year=year,month=month,day=day,minute=0,hour=0,second=0)
|
||||||
|
FORMAT = '%Y-%m-%d'
|
||||||
|
|
||||||
|
if 'format' in _args:
|
||||||
|
FORMAT = _args['format']
|
||||||
|
|
||||||
|
|
||||||
|
# print ([_name,FORMAT, _date.strftime(FORMAT)])
|
||||||
|
r = []
|
||||||
|
if offset :
|
||||||
|
r = [_date.strftime(FORMAT)]
|
||||||
|
for _delta in offset :
|
||||||
|
_date = _date + timedelta(_delta)
|
||||||
|
r.append(_date.strptime(FORMAT))
|
||||||
|
return r
|
||||||
|
else:
|
||||||
|
return _date.strftime(FORMAT)
|
||||||
|
|
Loading…
Reference in New Issue