bug fix ...

2022-09-15 17:56:15 -05:00 · 2022-09-15 17:56:15 -05:00 · 3c643eb4df
parent a2d4243f79
commit 3c643eb4df
3 changed files with 15 additions and 28 deletions
--- a/README.md
+++ b/README.md
@ -27,19 +27,19 @@ Install this package using pip as follows :
 Stable :
-    pip install git+https://hiplab.mc.vanderbilt.edu/git/steve/deid-risk.git
+    pip install git+https://dev.the-phi.com/git/healthcareio/privacykit.git@release
 Latest Development (not fully tested):
-    pip install git+https://hiplab.mc.vanderbilt.edu/git/steve/deid-risk.git@risk
+    pip install git+https://dev.the-phi.com/git/healthcareio/privacykit.git@dev
 The framework will depend on pandas and numpy (for now). Below is a basic sample to get started quickly.
    import numpy as np
    import pandas as pd
-    import risk
+    import privacykit
    mydf = pd.DataFrame({"x":np.random.choice( np.random.randint(1,10),50),"y":np.random.choice( np.random.randint(1,10),50),"z":np.random.choice( np.random.randint(1,10),50),"r":np.random.choice( np.random.randint(1,10),50)  })
    print (mydf.risk.evaluate())
--- a/privacykit/risk.py
+++ b/privacykit/risk.py
@ -107,38 +107,25 @@ class deid :
        for size in np.arange(2,len(columns)) :
            p = list(combinations(columns,size))            
            p = (np.array(p)[ np.random.choice( len(p), _policy_count)].tolist())
-            flag = 'Policy_'+str(_index)
+            
-            _index += 1
+            
            for cols in p :
                flag = 'Policy_'+str(_index)
                r = self.evaluate(sample=sample,cols=cols,flag = flag)
                p =  pd.DataFrame(1*sample.columns.isin(cols)).T
                p.columns = sample.columns
                o = pd.concat([o,r.join(p)])
-        
+                o['attr'] = ','.join(cols)
                _index += 1
        #
        # We rename flags to policies and adequately number them, we also have a column to summarize the attributes attr
        #
-        # for i in np.arange(RUNS):
+      
        #     if 'strict' not in args or ('strict' in args and args['strict'] is False):
        #         n = np.random.randint(2,k)
        #     else:
        #         n = args['field_count']
        #     cols = np.random.choice(columns,n,replace=False).tolist()            
        #     params = {'sample':sample,'cols':cols}
        #     if pop is not None :
        #         params['pop'] = pop
        #     if pop_size > 0  :
        #         params['pop_size'] = pop_size
        #     r = self.evaluate(**params)
        #     #
        #     # let's put the policy in place
        #     p =  pd.DataFrame(1*sample.columns.isin(cols)).T
        #     p.columns = sample.columns
        #     # o = o.append(r.join(p))
        #     o = pd.concat([o,r.join(p)])
        o.index = np.arange(o.shape[0]).astype(np.int64)
-
+        o = o.rename(columns={'flag':'policies'})
        return o
    def evaluate(self, **args):
        """
--- a/setup.py
+++ b/setup.py
@ -4,11 +4,11 @@ This is a build file for the
 from setuptools import setup, find_packages
 setup(
-    name = "risk",
+    name = "privacykit",
    version = "0.8.1",
    author = "Healthcare/IO - The Phi Technology LLC & Health Information Privacy Lab",
    author_email = "info@the-phi.com",
    license = "MIT",
-    packages=['risk'],
+    packages=['privacykit'],
    install_requires = ['numpy','pandas']
    )