2020-12-11 12:57:11 +00:00
"""
This file serves as proxy to healthcare - io , it will be embedded into the API
"""
import os
import transport
import numpy as np
2020-12-11 18:20:06 +00:00
from healthcareio import x12
2020-12-11 12:57:11 +00:00
import pandas as pd
import smart
2020-12-11 18:20:06 +00:00
from healthcareio . analytics import Apex
2020-12-11 12:57:11 +00:00
import time
class get :
PROCS = [ ]
PATH = os . sep . join ( [ os . environ [ ' HOME ' ] , ' .healthcareio ' , ' config.json ' ] )
@staticmethod
def resume ( files , args ) :
"""
This function will determine the appropriate files to be processed by performing a simple complementary set operation against the logs
@TODO : Support data - stores other than mongodb
: param files list of files within a folder
: param _args configuration
"""
_args = args [ ' store ' ] . copy ( )
if ' mongo ' in _args [ ' type ' ] :
_args [ ' type ' ] = ' mongo.MongoReader '
reader = transport . factory . instance ( * * _args )
_files = [ ]
try :
pipeline = [ { " $match " : { " completed " : { " $eq " : True } } } , { " $group " : { " _id " : " $name " } } , { " $project " : { " name " : " $_id " , " _id " : 0 } } ]
_args = { " aggregate " : " logs " , " cursor " : { } , " allowDiskUse " : True , " pipeline " : pipeline }
_files = reader . read ( mongo = _args )
_files = [ item [ ' name ' ] for item in _files ]
except Exception as e :
pass
2020-12-22 05:28:22 +00:00
print ( [ len ( list ( set ( files ) - set ( _files ) ) ) , ' files to be processed ' ] )
2020-12-11 12:57:11 +00:00
return list ( set ( files ) - set ( _files ) )
@staticmethod
def processes ( _args ) :
_info = pd . DataFrame ( smart . top . read ( name = ' healthcare-io.py ' ) ) [ [ ' name ' , ' cpu ' , ' mem ' ] ]
if _info . shape [ 0 ] == 0 :
_info = pd . DataFrame ( { " name " : [ " healthcare-io.py " ] , " cpu " : [ 0 ] , " mem " : [ 0 ] } )
# _info = pd.DataFrame(_info.groupby(['name']).sum())
# _info['name'] = ['healthcare-io.py']
m = { ' cpu ' : ' CPU ' , ' mem ' : ' RAM ' , ' name ' : ' name ' }
_info . columns = [ m [ name ] for name in _info . columns . tolist ( ) ]
_info . index = np . arange ( _info . shape [ 0 ] )
charts = [ ]
for label in [ ' CPU ' , ' RAM ' ] :
value = _info [ label ] . sum ( )
df = pd . DataFrame ( { " name " : [ label ] , label : [ value ] } )
charts . append (
Apex . apply (
{ " data " : df , " chart " : { " type " : " radial " , " axis " : { " x " : label , " y " : " name " } } }
) [ ' apex ' ]
)
#
# This will update the counts for the processes, upon subsequent requests so as to show the change
#
N = 0
lprocs = [ ]
for proc in get . PROCS :
if proc . is_alive ( ) :
lprocs . append ( proc )
N = len ( lprocs )
get . PROCS = lprocs
return { " process " : { " chart " : charts , " counts " : N } }
@staticmethod
def files ( _args ) :
_info = smart . folder . read ( path = ' /data ' )
N = _info . files . tolist ( ) [ 0 ]
if ' mongo ' in _args [ ' store ' ] [ ' type ' ] :
store_args = dict ( _args [ ' store ' ] . copy ( ) , * * { " type " : " mongo.MongoReader " } )
# reader = transport.factory.instance(**_args)
pipeline = [ { " $group " : { " _id " : " $name " , " count " : { " $sum " : { " $cond " : [ { " $eq " : [ " $completed " , True ] } , 1 , 0 ] } } } } , { " $group " : { " _id " : None , " count " : { " $sum " : " $count " } } } , { " $project " : { " _id " : 0 , " status " : " completed " , " count " : 1 } } ]
query = { " mongo " : { " aggregate " : " logs " , " allowDiskUse " : True , " cursor " : { } , " pipeline " : pipeline } }
# _info = pd.DataFrame(reader.read(mongo={"aggregate":"logs","allowDiskUse":True,"cursor":{},"pipeline":pipeline}))
pipeline = [ { " $group " : { " _id " : " $parse " , " claims " : { " $addToSet " : " $name " } } } , { " $project " : { " _id " : 0 , " type " : " $_id " , " count " : { " $size " : " $claims " } } } ]
_query = { " mongo " : { " aggregate " : " logs " , " cursor " : { } , " allowDiskUse " : True , " pipeline " : pipeline } } #-- distribution claims/remits
else :
store_args = dict ( _args [ ' store ' ] . copy ( ) , * * { " type " : " disk.SQLiteReader " } )
store_args [ ' args ' ] [ ' table ' ] = ' logs '
query = { " sql " : " select count(distinct json_extract(data, ' $.name ' )) as count, ' completed ' status from logs where json_extract(data, ' $.completed ' ) = true " }
_query = { " sql " : " select json_extract(data, ' $.parse ' ) as type,count(distinct json_extract(data, ' $.name ' )) as count from logs group by type " } #-- distribution claim/remits
reader = transport . factory . instance ( * * store_args )
_info = pd . DataFrame ( reader . read ( * * query ) )
if not _info . shape [ 0 ] :
_info = pd . DataFrame ( { " status " : [ " completed " ] , " count " : [ 0 ] } )
_info [ ' count ' ] = np . round ( ( _info [ ' count ' ] * 100 ) / N , 2 )
charts = [ Apex . apply ( { " data " : _info , " chart " : { " type " : " radial " , " axis " : { " y " : " status " , " x " : " count " } } } ) [ ' apex ' ] ]
#
# Let us classify the files now i.e claims / remits
#
# pipeline = [{"$group":{"_id":"$parse","claims":{"$addToSet":"$name"}}},{"$project":{"_id":0,"type":"$_id","count":{"$size":"$claims"}}}]
# _args = {"aggregate":"logs","cursor":{},"allowDiskUse":True,"pipeline":pipeline}
# r = pd.DataFrame(reader.read(mongo=_args))
r = pd . DataFrame ( reader . read ( * * _query ) ) #-- distribution claims/remits
r = Apex . apply ( { " chart " : { " type " : " donut " , " axis " : { " x " : " count " , " y " : " type " } } , " data " : r } ) [ ' apex ' ]
r [ ' chart ' ] [ ' height ' ] = ' 100 % '
r [ ' legend ' ] [ ' position ' ] = ' bottom '
charts + = [ r ]
return { " files " : { " counts " : N , " chart " : charts } }
pass
#
# Process handling ....
def run ( _args ) :
"""
This function will run the jobs and insure as processes ( as daemons ) .
: param _args system configuration
"""
FILES = [ ]
BATCH = int ( _args [ ' args ' ] [ ' batch ' ] ) #-- number of processes (poorly named variable)
for root , _dir , f in os . walk ( _args [ ' args ' ] [ ' folder ' ] ) :
if f :
FILES + = [ os . sep . join ( [ root , name ] ) for name in f ]
FILES = get . resume ( FILES , _args )
FILES = np . array_split ( FILES , BATCH )
for FILE_GROUP in FILES :
FILE_GROUP = FILE_GROUP . tolist ( )
# logger.write({"process":index,"parse":_args['parse'],"file_count":len(row)})
# proc = Process(target=apply,args=(row,info['store'],_info,))
parser = x12 . Parser ( get . PATH ) #os.sep.join([PATH,'config.json']))
parser . set . files ( FILE_GROUP )
parser . daemon = True
parser . start ( )
get . PROCS . append ( parser )
time . sleep ( 3 )
#
# @TODO:consider submitting an update to clients via publish/subscribe framework
#
return get . PROCS
def stop ( _args ) :
for job in get . PROCS :
if job . is_alive ( ) :
job . terminate ( )
get . PROCS = [ ]
#
# @TODO: consider submitting an update to clients via publish/subscribe framework
pass
def write ( src_args , dest_args , files ) :
#
# @TODO: Support for SQLite
pass
def publish ( src_args , dest_args , folder = " /data " ) :
FILES = [ ]
for root , _dir , f in os . walk ( folder ) :
if f :
FILES + = [ os . sep . join ( [ root , name ] ) for name in f ]
#
# @TODO: Add support for SQLite ....
FILES = np . array_split ( FILES , 4 )