""" (c) 2019 Healthcare/IO 1.0 Vanderbilt University Medical Center, Health Information Privacy Laboratory https://hiplab.mc.vanderbilt.edu/healthcareio Authors: Khanhly Nguyen, Steve L. Nyemba License: MIT, terms are available at https://opensource.org/licenses/MIT This parser was originally written by Khanhly Nguyen for her internship and is intended to parse x12 835,837 and others provided the appropriate configuration USAGE : - COMMAND LINE - EMBEDDED """ import hashlib import json import os import sys from itertools import islice from multiprocessing import Process import transport class void : pass class Formatters : def __init__(self): # self.config = config self.get = void() self.get.config = self.get_config self.parse = void() self.parse.sv3 = self.sv3 self.parse.sv2 = self.sv2 self.sv2_parse = self.sv2 self.sv3_parse = self.sv3 self.format_proc = self.procedure self.format_diag = self.diagnosis self.parse.procedure = self.procedure self.parse.diagnosis = self.diagnosis self.parse.date = self.date self.format_date = self.date self.format_pos = self.pos self.format_time = self.time def split(self,row,sep='*',prefix='HI') : """ This function is designed to split an x12 row and """ if row.startswith(prefix) is False: value = [] for row_value in row.replace('~','').split(sep) : if '>' in row_value : if row_value.startswith('HC') or row_value.startswith('AD'): value += row_value.split('>')[:2] else: value += row_value.split('>') if row.startswith('CLM') is False else [row_value] else : value.append(row_value.replace('\n','')) return [xchar.replace('\r','') for xchar in value] #row.replace('~','').split(sep) else: return [ [prefix]+ self.split(item,'>') for item in row.replace('~','').split(sep)[1:] ] def get_config(self,config,row): """ This function will return the meaningfull parts of the configuration for a given item """ _row = list(row) if type(row[0]) == str else list(row[0]) _info = config[_row[0]] if _row[0] in config else {} key = None if '@ref' in _info: key = list(set(_row) & set(_info['@ref'].keys())) if key : key = key[0] return _info['@ref'][key] else: return {} if not _info and 'SIMILAR' in config: # # Let's look for the nearest key using the edit distance if _row[0] in config['SIMILAR'] : key = config['SIMILAR'][_row[0]] _info = config[key] return _info def hash(self,value): salt = os.environ['HEALTHCAREIO_SALT'] if 'HEALTHCAREIO_SALT' in os.environ else '' _value = str(value)+ salt if sys.version_info[0] > 2 : return hashlib.md5(_value.encode('utf-8')).hexdigest() else: return hashlib.md5(_value).hexdigest() def suppress (self,value): return 'N/A' def date(self,value): if len(value) > 8 or '-' in value: value = value.split('-')[0] if len(value) == 8 : year = value[:4] month = value[4:6] day = value[6:] return "-".join([year,month,day])[:10] #{"year":year,"month":month,"day":day} elif len(value) == 6 : year = '20' + value[:2] month = value[2:4] day = value[4:] # # We have a date formatting issue return "-".join([year,month,day]) def time(self,value): pass def sv3(self,value): if '>' in value [1]: terms = value[1].split('>') return {'type':terms[0],'code':terms[1],"amount":float(value[2])} else: return {"code":value[2],"type":value[1],"amount":float(value[3])} def sv2(self,value): # # @TODO: Sometimes there's a suffix (need to inventory all the variations) # if '>' in value or ':' in value: xchar = '>' if '>' in value else ':' _values = value.split(xchar) modifier = {} if len(_values) > 2 : modifier= {"code":_values[2]} if len(_values) > 3 : modifier['type'] = _values[3] _value = {"code":_values[1],"type":_values[0]} if modifier : _value['modifier'] = modifier return _value else: return value def procedure(self,value): for xchar in [':','<'] : if xchar in value and len(value.split(xchar)) > 1 : #_value = {"type":value.split(':')[0].strip(),"code":value.split(':')[1].strip()} _value = {"type":value.split(xchar)[0].strip(),"code":value.split(xchar)[1].strip()} break else: _value = str(value) return _value def diagnosis(self,alue): return [ {"code":item[2], "type":item[1]} for item in value if len(item) > 1] def pos(self,value): """ formatting place of service information within a segment (REF) """ xchar = '>' if '>' in value else ':' x = value.split(xchar) x = {"code":x[0],"indicator":x[1],"frequency":x[2]} if len(x) == 3 else {"code":x[0],"indicator":None,"frequency":None} return x class Parser (Process): def __init__(self,path): Process.__init__(self) self.utils = Formatters() self.get = void() self.get.value = self.get_map self.get.default_value = self.get_default_value _config = json.loads(open(path).read()) self.config = _config['parser'] self.store = _config['store'] self.files = [] self.set = void() self.set.files = self.set_files def set_files(self,files): self.files = files def get_map(self,row,config,version=None): # label = config['label'] if 'label' in config else None handler = Formatters() if 'map' not in config and hasattr(handler,config['apply']): pointer = getattr(handler,config['apply']) object_value = pointer(row) return object_value omap = config['map'] if not version or version not in config else config[version] anchors = config['anchors'] if 'anchors' in config else [] if type(row[0]) == str: object_value = {} for key in omap : index = omap[key] if anchors and set(anchors) & set(row): _key = list(set(anchors) & set(row))[0] aindex = row.index(_key) index = aindex + index if index < len(row) : value = row[index] if 'cast' in config and key in config['cast'] and value.strip() != '' : if config['cast'][key] in ['float','int'] : value = eval(config['cast'][key])(value) elif hasattr(handler,config['cast'][key]): pointer = getattr(handler,config['cast'][key]) value = pointer(value) else: print ("Missing Pointer ",config['cast'][key]) # print (key,value) if type(value) == dict : for objkey in value : if type(value[objkey]) == dict : continue if 'syn' in config and value[objkey] in config['syn'] : value[objkey] = config['syn'][ value[objkey]] value = {key:value} if key not in value else value else: if 'syn' in config and value in config['syn'] : value = config['syn'][value] if type(value) == dict : object_value = dict(object_value, **value) else: object_value[key] = value else: # # we are dealing with a complex object object_value = [] for row_item in row : value = self.get.value(row_item,config,version) object_value.append(value) # # We need to add the index of the object it matters in determining the claim types # # object_value.append( list(get_map(row_item,config,version))) # object_value = {label:object_value} return object_value def apply(self,content,_code) : """ :file content i.e a segment with the envelope :_code 837 or 835 (helps get the appropriate configuration) """ util = Formatters() # header = default_value.copy() value = {} for row in content[:] : row = util.split(row.replace('\n','').replace('~','')) _info = util.get.config(self.config[_code][0],row) if _info : try: tmp = self.get.value(row,_info) # if 'P1080351470' in content[0] and 'PLB' in row: # print (_info) # print (row) # print (tmp) if not tmp : continue if 'label' in _info : label = _info['label'] if type(tmp) == list : value[label] = tmp if label not in value else value[label] + tmp else: if label not in value: value[label] = [tmp] elif len(list(tmp.keys())) == 1 : # print "\t",len(claim[label]),tmp index = len(value[label]) -1 value[label][index] = dict(value[label][index],**tmp) else: value[label].append(tmp) tmp['_index'] = len(value[label]) -1 # if len(value[label]) > 0 : # labels = [] # for item in value[label] : # item['_index'] = len(labels) # if item not in labels : # labels.append(item) # value[label] = labels elif 'field' in _info : name = _info['field'] value[name] = tmp else: value = dict(value,**tmp) pass except Exception as e : print ('__',e.args) pass return value if value else {} def get_default_value(self,content,_code): util = Formatters() TOP_ROW = content[1].split('*') CATEGORY= content[2].split('*')[1].strip() VERSION = content[1].split('*')[-1].replace('~','').replace('\n','') SUBMITTED_DATE = util.parse.date(TOP_ROW[4]) SENDER_ID = TOP_ROW[2] row = util.split(content[3]) _info = util.get_config(self.config[_code][0],row) value = self.get.value(row,_info,VERSION) if _info else {} value['category'] = {"setid": CATEGORY,"version":'X'+VERSION.split('X')[1],"id":VERSION.split('X')[0].strip()} value["submitted"] = SUBMITTED_DATE # value['version'] = VERSION if _code== '835' : value['payer_id'] = SENDER_ID else: value['provider_id'] = SENDER_ID return value def read(self,filename) : """ :formerly get_content This function returns the of the EDI file parsed given the configuration specified. it is capable of identifying a file given the content :section loop prefix (HL, CLP) :config configuration with formatting rules, labels ... :filename location of the file """ # section = section if section else config['SECTION'] logs = [] claims = [] try: file = open(filename.strip(),errors='ignore') INITIAL_ROWS = list(islice(file,4)) #.readlines(4) _code = "unknown" if len(INITIAL_ROWS) == 1 : file = INITIAL_ROWS[0].split('~') INITIAL_ROWS = file[:4] if len(INITIAL_ROWS) < 3 : return None,[{"name":filename,"completed":False}],None section = 'HL' if INITIAL_ROWS[1].split('*')[1] == 'HC' else 'CLP' _code = '837' if section == 'HL' else '835' DEFAULT_VALUE = self.get.default_value(INITIAL_ROWS,_code) DEFAULT_VALUE['name'] = filename.strip() # # In the initial rows, there's redundant information (so much for x12 standard) # index 1 identifies file type i.e CLM for claim and CLP for remittance segment = [] index = 0; for row in file : if row.startswith(section) and not segment: segment = [row] continue elif segment and not row.startswith(section): segment.append(row) if len(segment) > 1 and row.startswith(section): # # process the segment somewhere (create a thread maybe?) # # default_claim = dict({"index":index},**DEFAULT_VALUE) _claim = self.apply(segment,_code) # if _claim['claim_id'] == 'P1080351470' : # print (_claim) # _claim = dict(DEFAULT_VALUE,**_claim) if _claim : _claim['index'] = index #len(claims) claims.append(dict(DEFAULT_VALUE,**_claim)) segment = [row] index += 1 pass # # Handling the last claim found if segment[0].startswith(section) : default_claim = dict({"name":index},**DEFAULT_VALUE) claim = self.apply(segment,_code) if claim : claim['index'] = len(claims) claims.append(dict(DEFAULT_VALUE,**claim)) if type(file) != list : file.close() # x12_file = open(filename.strip(),errors='ignore').read().split('\n') except Exception as e: logs.append ({"parse":_code,"completed":False,"name":filename,"msg":e.args[0]}) return [],logs,None rate = 0 if len(claims) == 0 else (1 + index)/len(claims) logs.append ({"parse":"claims" if _code == '837' else 'remits',"completed":True,"name":filename,"rate":rate}) # self.finish(claims,logs,_code) return claims,logs,_code def run(self): for filename in self.files : content,logs,_code = self.read(filename) self.finish(content,logs,_code) def finish(self,content,logs,_code) : args = self.store _args = json.loads(json.dumps(self.store)) if args['type'] == 'mongo.MongoWriter' : args['args']['doc'] = 'claims' if _code == '837' else 'remits' _args['args']['doc'] = 'logs' if content : writer = transport.factory.instance(**args) writer.write(content) writer.close() if logs : logger = transport.factory.instance(**_args) logger.write(logs) logger.close() # p = Parser('/home/steve/.healthcareio/config.json') # p.set.files(['../../data/small/claims/ssiUB1122042711220427127438.clm_191122T043504']) # path = '../../data/small/claims/ssiUB1122042711220427127438.clm_191122T043504' # path = '../../data/small/claims/problems-with-procs' # path = '../../data/small/remits/1SG03927258.dat_181018T074559' # _path = "../../data/small/remits/1TR21426701.dat_180703T074559" # p.start() # p.join() # claims,logs = p.read(path) # print (json.dumps(claims[3])) # print (logs)