diff --git a/pipeline.py b/pipeline.py index fcf9912..6bbec2f 100644 --- a/pipeline.py +++ b/pipeline.py @@ -12,7 +12,7 @@ from data.params import SYS_ARGS # # The configuration array is now loaded and we will execute the pipe line as follows -DATASET='combined20190510' +DATASET='combined20191004v2_deid' class Components : @@ -233,11 +233,12 @@ if __name__ == '__main__' : index = int(SYS_ARGS['index']) if 'index' in SYS_ARGS else 0 args = (PIPELINE[index]) - args['dataset'] = 'combined20190510' + args = dict(args,**SYS_ARGS) args['max_rows'] = int(args['max_rows']) if 'max_rows' in args else 3 args['part_size']= int(args['part_size']) if 'part_size' in args else 3 - + if 'dataset' not in args : + args['dataset'] = 'combined20191004v2_deid' # # @TODO: