bug fixes and optimizations
This commit is contained in:
parent
459afa2890
commit
4c297679dc
|
@ -27,22 +27,25 @@ class ContinuousToDiscrete :
|
||||||
values = np.array(X).astype(np.float32)
|
values = np.array(X).astype(np.float32)
|
||||||
BOUNDS = ContinuousToDiscrete.bounds(values,n)
|
BOUNDS = ContinuousToDiscrete.bounds(values,n)
|
||||||
# _map = [{"index":BOUNDS.index(i),"ubound":i} for i in BOUNDS]
|
# _map = [{"index":BOUNDS.index(i),"ubound":i} for i in BOUNDS]
|
||||||
_matrix = []
|
# _matrix = []
|
||||||
m = []
|
# m = []
|
||||||
for value in X :
|
# for value in X :
|
||||||
x_ = np.zeros(n)
|
# x_ = np.zeros(n)
|
||||||
|
|
||||||
for row in BOUNDS :
|
# for row in BOUNDS :
|
||||||
|
|
||||||
|
# if value>= row.left and value <= row.right :
|
||||||
|
# index = BOUNDS.index(row)
|
||||||
|
# x_[index] = 1
|
||||||
|
# break
|
||||||
|
# _matrix += x_.tolist()
|
||||||
|
# #
|
||||||
|
# # for items in BOUNDS :
|
||||||
|
# # index = BOUNDS.index(items)
|
||||||
|
|
||||||
|
# return np.array(_matrix).reshape(len(X),n)
|
||||||
|
matrix = np.repeat(np.zeros(n),len(X)).reshape(len(X),n)
|
||||||
|
|
||||||
if value>= row.left and value <= row.right :
|
|
||||||
index = BOUNDS.index(row)
|
|
||||||
x_[index] = 1
|
|
||||||
break
|
|
||||||
_matrix += x_.tolist()
|
|
||||||
#
|
|
||||||
# for items in BOUNDS :
|
|
||||||
# index = BOUNDS.index(items)
|
|
||||||
return np.array(_matrix).reshape(len(X),n)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def bounds(x,n):
|
def bounds(x,n):
|
||||||
|
@ -65,9 +68,15 @@ class ContinuousToDiscrete :
|
||||||
# _BINARY= ContinuousToDiscrete.binary(X,BIN_SIZE)
|
# _BINARY= ContinuousToDiscrete.binary(X,BIN_SIZE)
|
||||||
# # # print (BOUNDS)
|
# # # print (BOUNDS)
|
||||||
l = {}
|
l = {}
|
||||||
for value in X :
|
for i in np.arange(len(X)): #value in X :
|
||||||
values += [ np.round(np.random.uniform(item.left,item.right),ContinuousToDiscrete.ROUND_UP) for item in BOUNDS if value >= item.left and value <= item.right ]
|
|
||||||
|
|
||||||
|
value = X[i]
|
||||||
|
|
||||||
|
for item in BOUNDS :
|
||||||
|
if value >= item.left and value <= item.right :
|
||||||
|
values += [np.round(np.random.uniform(item.left,item.right),ContinuousToDiscrete.ROUND_UP)]
|
||||||
|
break
|
||||||
|
# values += [ np.round(np.random.uniform(item.left,item.right),ContinuousToDiscrete.ROUND_UP) for item in BOUNDS if value >= item.left and value <= item.right ]
|
||||||
|
|
||||||
|
|
||||||
# # values = []
|
# # values = []
|
||||||
|
@ -223,11 +232,10 @@ def generate(**args):
|
||||||
i = np.where (i == False)[0]
|
i = np.where (i == False)[0]
|
||||||
else:
|
else:
|
||||||
i = np.where( r[col] != None)[0]
|
i = np.where( r[col] != None)[0]
|
||||||
_approx = ContinuousToDiscrete.continuous(r[col][i],BIN_SIZE)
|
_approx = ContinuousToDiscrete.continuous(r[col][i],BIN_SIZE) #-- approximating based on arbitrary bins
|
||||||
r[col][i] = _approx
|
r[col][i] = _approx
|
||||||
|
|
||||||
_df[col] = r[col] #ContinuousToDiscrete.continuous(r[col],BIN_SIZE) if col in CONTINUOUS else r[col]
|
_df[col] = r[col]
|
||||||
# _df[col] = r[col]
|
|
||||||
#
|
#
|
||||||
# @TODO: log basic stats about the synthetic attribute
|
# @TODO: log basic stats about the synthetic attribute
|
||||||
#
|
#
|
||||||
|
|
|
@ -47,7 +47,7 @@ class Components :
|
||||||
logger = factory.instance(type='mongo.MongoWriter',args={'dbname':'aou','doc':args['context']})
|
logger = factory.instance(type='mongo.MongoWriter',args={'dbname':'aou','doc':args['context']})
|
||||||
logger.write({"module":"bigquery","action":"read","input":{"sql":SQL}})
|
logger.write({"module":"bigquery","action":"read","input":{"sql":SQL}})
|
||||||
credentials = service_account.Credentials.from_service_account_file('/home/steve/dev/aou/accounts/curation-prod.json')
|
credentials = service_account.Credentials.from_service_account_file('/home/steve/dev/aou/accounts/curation-prod.json')
|
||||||
df = pd.read_gbq(SQL,credentials=credentials,dialect='standard').astype(object)
|
df = pd.read_gbq(SQL,credentials=credentials,dialect='standard')
|
||||||
return df
|
return df
|
||||||
|
|
||||||
# return lambda: pd.read_gbq(SQL,credentials=credentials,dialect='standard')[args['columns']].dropna()
|
# return lambda: pd.read_gbq(SQL,credentials=credentials,dialect='standard')[args['columns']].dropna()
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -4,7 +4,7 @@ import sys
|
||||||
|
|
||||||
def read(fname):
|
def read(fname):
|
||||||
return open(os.path.join(os.path.dirname(__file__), fname)).read()
|
return open(os.path.join(os.path.dirname(__file__), fname)).read()
|
||||||
args = {"name":"data-maker","version":"1.2.5","author":"Vanderbilt University Medical Center","author_email":"steve.l.nyemba@vanderbilt.edu","license":"MIT",
|
args = {"name":"data-maker","version":"1.2.6","author":"Vanderbilt University Medical Center","author_email":"steve.l.nyemba@vanderbilt.edu","license":"MIT",
|
||||||
"packages":find_packages(),"keywords":["healthcare","data","transport","protocol"]}
|
"packages":find_packages(),"keywords":["healthcare","data","transport","protocol"]}
|
||||||
args["install_requires"] = ['data-transport@git+https://dev.the-phi.com/git/steve/data-transport.git','tensorflow==1.15','pandas','pandas-gbq','pymongo']
|
args["install_requires"] = ['data-transport@git+https://dev.the-phi.com/git/steve/data-transport.git','tensorflow==1.15','pandas','pandas-gbq','pymongo']
|
||||||
args['url'] = 'https://hiplab.mc.vanderbilt.edu/git/aou/data-maker.git'
|
args['url'] = 'https://hiplab.mc.vanderbilt.edu/git/aou/data-maker.git'
|
||||||
|
|
Loading…
Reference in New Issue