Enabling better filter handling

This commit is contained in:
Steve Nyemba 2019-10-08 12:37:35 -05:00
parent edb2d7b7c7
commit 0c8625caf5
1 changed files with 16 additions and 7 deletions

View File

@ -48,8 +48,8 @@ class pseudonym :
""" """
credentials = service_account.Credentials.from_service_account_file(args['key']) credentials = service_account.Credentials.from_service_account_file(args['key'])
SQL = ["SELECT * FROM :dataset.:table"] SQL = ["SELECT * FROM :dataset.:table"]
if 'filter' in args : # if 'filter' in args :
SQL += ['WHERE',args['filter']] # SQL += ['WHERE',args['filter']]
dataset = args['dataset'] dataset = args['dataset']
table = args['table'] table = args['table']
SQL = " ".join(SQL+["LIMIT 1"]).replace(":dataset",dataset).replace(":table",table) SQL = " ".join(SQL+["LIMIT 1"]).replace(":dataset",dataset).replace(":table",table)
@ -75,11 +75,7 @@ class pseudonym :
for name in columns : for name in columns :
p = dict(args,**{"field":name}) p = dict(args,**{"field":name})
p['filter'] = '' if 'filter' not in args else args['filter'] p['filter'] = '' if 'filter' not in args else args['filter']
# thread = threading.Thread(target=pseudonym.post, args=(p,))
# thread.start()
# if columns.tolist().index(name) == 0 :
# thread.join()
pseudonym.post(**p) pseudonym.post(**p)
@ -128,10 +124,16 @@ class Builder :
TEMPLATE = ['(SELECT encoded FROM :dataset'+DATASET_SUFFIX+'.'+PSEUDO_TABLENAME,"WHERE table=':table' AND field = ':name' AND CAST(values AS STRING)=CAST(:table.:name AS STRING ) ) as :name"] TEMPLATE = ['(SELECT encoded FROM :dataset'+DATASET_SUFFIX+'.'+PSEUDO_TABLENAME,"WHERE table=':table' AND field = ':name' AND CAST(values AS STRING)=CAST(:table.:name AS STRING ) ) as :name"]
SQL = ["SELECT"] SQL = ["SELECT"]
FIELDS = [] FIELDS = []
FILTER = args['filter'] if 'filter' in args else ""
for field in columns : for field in columns :
FIELDS += [" ".join(TEMPLATE).replace(":name",field)] FIELDS += [" ".join(TEMPLATE).replace(":name",field)]
# if field in FILTER :
# FILTER = FILTER.replace(field,'values')
SQL += [",\n\t".join(FIELDS)] SQL += [",\n\t".join(FIELDS)]
SQL += ['FROM :dataset.:table'] SQL += ['FROM :dataset.:table']
if FILTER != "" :
SQL += ["WHERE ",FILTER]
return ("\n".join(SQL).replace(":dataset",args['dataset']).replace(':table',args['table']) ) return ("\n".join(SQL).replace(":dataset",args['dataset']).replace(':table',args['table']) )
def process(self,**args): def process(self,**args):
@ -233,6 +235,10 @@ if __name__ == '__main__' :
if not os.path.exists(SYS_ARGS['export']) : if not os.path.exists(SYS_ARGS['export']) :
os.mkdir(SYS_ARGS['export']) os.mkdir(SYS_ARGS['export'])
SQL = builder.encode(**SYS_ARGS) SQL = builder.encode(**SYS_ARGS)
#
# Assuming the user wants to filter the records returned :
#
credentials = service_account.Credentials.from_service_account_file(SYS_ARGS['key']) credentials = service_account.Credentials.from_service_account_file(SYS_ARGS['key'])
df = pd.read_gbq(SQL,credentials =credentials,dialect='standard') df = pd.read_gbq(SQL,credentials =credentials,dialect='standard')
FILENAME = os.sep.join([SYS_ARGS['export'],SYS_ARGS['table']+'.csv']) FILENAME = os.sep.join([SYS_ARGS['export'],SYS_ARGS['table']+'.csv'])
@ -240,6 +246,9 @@ if __name__ == '__main__' :
# This would allow us to export it to wherever we see fit # This would allow us to export it to wherever we see fit
print (FILENAME) print (FILENAME)
df.to_csv(FILENAME,index=False) df.to_csv(FILENAME,index=False)
f = open(FILENAME.replace('.csv','.sql'),'w+')
f.write(SQL)
f.close()
elif 'pseudo' in SYS_ARGS : elif 'pseudo' in SYS_ARGS :
builder.process(**SYS_ARGS) builder.process(**SYS_ARGS)
else: else: