bug fix ...

This commit is contained in:
Steve Nyemba 2022-09-15 17:56:15 -05:00
parent a2d4243f79
commit 3c643eb4df
3 changed files with 15 additions and 28 deletions

View File

@ -27,19 +27,19 @@ Install this package using pip as follows :
Stable :
pip install git+https://hiplab.mc.vanderbilt.edu/git/steve/deid-risk.git
pip install git+https://dev.the-phi.com/git/healthcareio/privacykit.git@release
Latest Development (not fully tested):
pip install git+https://hiplab.mc.vanderbilt.edu/git/steve/deid-risk.git@risk
pip install git+https://dev.the-phi.com/git/healthcareio/privacykit.git@dev
The framework will depend on pandas and numpy (for now). Below is a basic sample to get started quickly.
import numpy as np
import pandas as pd
import risk
import privacykit
mydf = pd.DataFrame({"x":np.random.choice( np.random.randint(1,10),50),"y":np.random.choice( np.random.randint(1,10),50),"z":np.random.choice( np.random.randint(1,10),50),"r":np.random.choice( np.random.randint(1,10),50) })
print (mydf.risk.evaluate())

View File

@ -107,38 +107,25 @@ class deid :
for size in np.arange(2,len(columns)) :
p = list(combinations(columns,size))
p = (np.array(p)[ np.random.choice( len(p), _policy_count)].tolist())
flag = 'Policy_'+str(_index)
_index += 1
for cols in p :
flag = 'Policy_'+str(_index)
r = self.evaluate(sample=sample,cols=cols,flag = flag)
p = pd.DataFrame(1*sample.columns.isin(cols)).T
p.columns = sample.columns
o = pd.concat([o,r.join(p)])
o['attr'] = ','.join(cols)
_index += 1
#
# We rename flags to policies and adequately number them, we also have a column to summarize the attributes attr
#
# for i in np.arange(RUNS):
# if 'strict' not in args or ('strict' in args and args['strict'] is False):
# n = np.random.randint(2,k)
# else:
# n = args['field_count']
# cols = np.random.choice(columns,n,replace=False).tolist()
# params = {'sample':sample,'cols':cols}
# if pop is not None :
# params['pop'] = pop
# if pop_size > 0 :
# params['pop_size'] = pop_size
# r = self.evaluate(**params)
# #
# # let's put the policy in place
# p = pd.DataFrame(1*sample.columns.isin(cols)).T
# p.columns = sample.columns
# # o = o.append(r.join(p))
# o = pd.concat([o,r.join(p)])
o.index = np.arange(o.shape[0]).astype(np.int64)
o = o.rename(columns={'flag':'policies'})
return o
def evaluate(self, **args):
"""

View File

@ -4,11 +4,11 @@ This is a build file for the
from setuptools import setup, find_packages
setup(
name = "risk",
name = "privacykit",
version = "0.8.1",
author = "Healthcare/IO - The Phi Technology LLC & Health Information Privacy Lab",
author_email = "info@the-phi.com",
license = "MIT",
packages=['risk'],
packages=['privacykit'],
install_requires = ['numpy','pandas']
)