2022-09-17 00:10:49 +00:00
|
|
|
"""
|
|
|
|
This file handles state-space of the data training/generation process i.e Upon specification of the pre/post conditiions
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
This file handles state-space of the data training/generation process i.e Upon specification of the pre/post conditions,
|
|
|
|
The specifications for this are as follows (within an entry of the configuration)
|
|
|
|
{
|
|
|
|
"state":{
|
|
|
|
"pre":[{"approximate":{"field":"int"}},{"newdate":{"field":"format"}}],"post":[{"limit":10}]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
"""
|
|
|
|
import importlib
|
|
|
|
import importlib.util
|
|
|
|
import sys
|
|
|
|
from datetime import datetime
|
|
|
|
from data.maker.state.default import *
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
|
|
class State :
|
|
|
|
@staticmethod
|
2024-02-16 17:54:05 +00:00
|
|
|
def apply(_data,lpointers,_config={}):
|
2022-09-17 00:10:49 +00:00
|
|
|
"""
|
|
|
|
This function applies a pipeline against a given data-frame, the calling code must decide whether it is a pre/post
|
|
|
|
:_data data-frame
|
|
|
|
:_lpointers functions modules returned by instance (module,_args)
|
|
|
|
"""
|
|
|
|
for _item in lpointers :
|
|
|
|
if _item is None :
|
|
|
|
continue
|
|
|
|
|
|
|
|
pointer = _item['module']
|
2023-04-24 21:37:08 +00:00
|
|
|
|
|
|
|
if type(pointer).__name__ != 'function':
|
|
|
|
_args = _item['args'] if 'args' in _item else {}
|
|
|
|
else:
|
|
|
|
pointer = _item['module']
|
|
|
|
|
|
|
|
_args = _item['args'] if 'args' in _item else {}
|
|
|
|
|
2022-09-17 00:10:49 +00:00
|
|
|
|
|
|
|
_data = pointer(_data,_args)
|
|
|
|
return _data
|
|
|
|
@staticmethod
|
|
|
|
def instance(_args):
|
2023-04-24 21:37:08 +00:00
|
|
|
"""
|
|
|
|
|
|
|
|
"""
|
2022-09-17 00:10:49 +00:00
|
|
|
pre = []
|
|
|
|
post=[]
|
|
|
|
|
|
|
|
out = {}
|
|
|
|
for key in _args :
|
|
|
|
#
|
|
|
|
# If the item has a path property is should be ignored
|
|
|
|
path = _args[key]['path'] if 'path' in _args[key] else ''
|
2023-04-24 21:37:08 +00:00
|
|
|
# out[key] = [ State._build(dict(_item,**{'path':path})) if 'path' not in _item else State._build(_item) for _item in _args[key]['pipeline']]
|
|
|
|
out[key] = []
|
|
|
|
for _item in _args[key]['pipeline'] :
|
|
|
|
|
|
|
|
if type(_item).__name__ == 'function':
|
|
|
|
_stageInfo = {'module':_item,'name':_item.__name__,'args':{},'path':''}
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
if 'path' in _item :
|
|
|
|
_stageInfo = State._build(dict(_item,**{'path':path}))
|
|
|
|
else :
|
|
|
|
_stageInfo= State._build(_item)
|
|
|
|
out[key].append(_stageInfo)
|
|
|
|
# print ([out])
|
2022-09-17 00:10:49 +00:00
|
|
|
return out
|
|
|
|
# if 'pre' in _args:
|
|
|
|
# path = _args['pre']['path'] if 'path' in _args['pre'] else ''
|
|
|
|
|
|
|
|
# pre = [ State._build(dict(_item,**{'path':path})) for _item in _args['pre']['pipeline']]
|
|
|
|
# else:
|
|
|
|
# path = _args['post']['path'] if 'path' in _args['post'] else ''
|
|
|
|
|
|
|
|
# post = [ State._build(dict(_item,**{'path':path})) for _item in _args['post']['pipeline']]
|
|
|
|
# return {'pre':pre,'post':post}
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _extract(_entry):
|
|
|
|
|
|
|
|
_name = list(set(_entry.keys()) - set(['path']) )
|
|
|
|
_name = _name[0]
|
|
|
|
path = _entry['path'] if 'path' in _entry and os.path.exists(_entry['path']) else ''
|
|
|
|
return {"module": _name,"args": _entry[_name],'name':_name,'path':path}
|
|
|
|
pass
|
|
|
|
@staticmethod
|
|
|
|
def _build(_args):
|
2023-04-24 21:37:08 +00:00
|
|
|
"""
|
|
|
|
This function builds the object {module,path} where module is extracted from a file (if needed)
|
|
|
|
:param _args dictionary containing attributes that can be value pair
|
|
|
|
It can also be a function
|
|
|
|
"""
|
|
|
|
#
|
|
|
|
# In the advent an actual pointer is passed we should do the following
|
|
|
|
|
2022-09-17 00:10:49 +00:00
|
|
|
_info = State._extract(_args)
|
|
|
|
# _info = dict(_args,**_info)
|
|
|
|
|
2023-04-24 21:37:08 +00:00
|
|
|
_info['module'] = State._instance(_info)
|
2022-09-17 00:10:49 +00:00
|
|
|
return _info if _info['module'] is not None else None
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _instance(_args):
|
|
|
|
"""
|
|
|
|
:path optional path of the file on disk
|
|
|
|
:module name of the function
|
|
|
|
"""
|
|
|
|
|
|
|
|
_name = _args['module']
|
|
|
|
|
|
|
|
if 'path' in _args and os.path.exists(_args['path']):
|
|
|
|
path= _args['path']
|
|
|
|
|
|
|
|
spec = importlib.util.spec_from_file_location(_name, path)
|
|
|
|
module = importlib.util.module_from_spec(spec)
|
|
|
|
spec.loader.exec_module(module)
|
|
|
|
else:
|
|
|
|
#
|
|
|
|
# Probably calling a built-in module (should be in this file)
|
|
|
|
|
|
|
|
module = sys.modules['data.maker.state.default']
|
|
|
|
|
|
|
|
return getattr(module,_name) if hasattr(module,_name) else None
|
|
|
|
|
|
|
|
#
|
|
|
|
# Adding a few custom functions that should be able to help ....
|
|
|
|
# These functions can be called without specifying a path
|
|
|
|
#
|
|
|
|
|