bug fix: process causing error when writing to bigquery

This commit is contained in:
Steve Nyemba 2020-03-27 00:34:05 -05:00
parent 205adf8fa6
commit e8906d1646
1 changed files with 7 additions and 3 deletions

View File

@ -4,7 +4,7 @@ from transport import factory
import numpy as np
import time
import os
from multiprocessing import Process
from multiprocessing import Process, Lock
import pandas as pd
from google.oauth2 import service_account
import data.maker
@ -16,9 +16,11 @@ from data.params import SYS_ARGS
DATASET='combined20191004v2_deid'
class Components :
lock = Lock()
class KEYS :
PIPELINE_KEY = 'pipeline'
SQL_FILTER = 'filter'
@staticmethod
def get_logger(**args) :
return factory.instance(type='mongo.MongoWriter',args={'dbname':'aou','doc':args['context']})
@ -232,10 +234,12 @@ class Components :
if 'dump' in args :
print (_args['data'].head())
else:
Components.lock.acquire()
data_comp.to_gbq(if_exists='append',destination_table=partial,credentials=credentials,chunksize=90000)
INSERT_FLAG = 'replace' if 'partition' not in args or 'segment' not in args else 'append'
_args['data'].to_gbq(if_exists='append',destination_table=complete,credentials=credentials,chunksize=90000)
Components.lock.release()
_id = 'dataset'
info = {"full":{_id:_fname,"rows":_args['data'].shape[0]},"partial":{"path":_pname,"rows":data_comp.shape[0]} }
if partition :
@ -327,8 +331,8 @@ if __name__ == '__main__' :
job.name = 'generator # '+str(index)
job.start()
jobs.append(job)
if len(jobs) == 1 :
job.join()
# if len(jobs) == 1 :
# job.join()
print (["Started ",len(jobs),"generators" if len(jobs)>1 else "generator" ])
while len(jobs)> 0 :