From a724a1129db86ba82d87fea518bb4d51c4cee546 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Tue, 27 Aug 2024 16:48:09 +0200 Subject: [PATCH] Add end to end integration tests for bundle JSON Schema --- .github/workflows/push.yml | 6 +- bundle/tests/schema/pass/basic.yml | 2 + bundle/tests/schema/pass/job.yml | 72 ++++++++++++++++++++ bundle/tests/schema/pass/ml.yml | 72 ++++++++++++++++++++ bundle/tests/schema/pass/pipeline.yml | 58 ++++++++++++++++ bundle/tests/schema/pass/quality_monitor.yml | 16 +++++ bundle/tests/schema/pass/run_job_task.yml | 56 +++++++++++++++ bundle/tests/schema/pass/schema.yml | 24 +++++++ 8 files changed, 304 insertions(+), 2 deletions(-) create mode 100644 bundle/tests/schema/pass/basic.yml create mode 100644 bundle/tests/schema/pass/job.yml create mode 100644 bundle/tests/schema/pass/ml.yml create mode 100644 bundle/tests/schema/pass/pipeline.yml create mode 100644 bundle/tests/schema/pass/quality_monitor.yml create mode 100644 bundle/tests/schema/pass/run_job_task.yml create mode 100644 bundle/tests/schema/pass/schema.yml diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 08edfb9da..19b416a51 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -107,11 +107,13 @@ jobs: run: npm install -g ajv-cli@5.0.0 # Assert that the generated bundle schema is a valid JSON schema by using - # ajv-cli to validate it against a sample configuration file. + # ajv-cli to validate it against bundle configuration files. # By default the ajv-cli runs in strict mode which will fail if the schema # itself is not valid. Strict mode is more strict than the JSON schema # specification. See for details: https://ajv.js.org/options.html#strict-mode-options - name: Validate bundle schema run: | go run main.go bundle schema > schema.json - ajv -s schema.json -d ./bundle/tests/basic/databricks.yml + for file in ./bundle/tests/schema/pass/*.yml; do + ajv -s schema.json -d $file + done diff --git a/bundle/tests/schema/pass/basic.yml b/bundle/tests/schema/pass/basic.yml new file mode 100644 index 000000000..de02d20bc --- /dev/null +++ b/bundle/tests/schema/pass/basic.yml @@ -0,0 +1,2 @@ +bundle: + name: basic diff --git a/bundle/tests/schema/pass/job.yml b/bundle/tests/schema/pass/job.yml new file mode 100644 index 000000000..0feb02869 --- /dev/null +++ b/bundle/tests/schema/pass/job.yml @@ -0,0 +1,72 @@ +bundle: + name: a job + +workspace: + host: "https://myworkspace.com" + root_path: /abc + +presets: + name_prefix: "[DEV]" + jobs_max_concurrent_runs: 10 + +variables: + simplevar: + default: true + description: "simplevar description" + + complexvar: + default: + key1: value1 + key2: value2 + key3: + - value3 + - value4 + description: "complexvar description" + +# TODO: Include enums for fields that are a part of the bundle config in the generate +# JSON schema. +# TODO: Are all references allowed for "resources" or just ids? +# TODO: Ideas for fails: +# - missing required fields +# - values of the wrong type +# - enums with the wrong value +# - invalid variable references, like not the right format. + +run_as: + service_principal_name: myserviceprincipal + +resources: + jobs: + myjob: + name: myjob + continuous: + pause_status: PAUSED + edit_mode: EDITABLE + max_concurrent_runs: 10 + description: "my job description" + email_notifications: + no_alert_for_skipped_runs: true + environments: + - environment_key: venv + spec: + dependencies: + - python=3.7 + client: "myclient" + format: MULTI_TASK + tags: + foo: bar + bar: baz + tasks: + - task_key: mytask + notebook_task: + notebook_path: ${var.simplevar} + existing_cluster_id: abcd + - task_key: mytask2 + for_each_task: + inputs: av + concurrency: 10 + task: + task_key: inside_for_each + notebook_task: + notebook_path: ${var.complexvar.key3[0]} + - ${var.complexvar} diff --git a/bundle/tests/schema/pass/ml.yml b/bundle/tests/schema/pass/ml.yml new file mode 100644 index 000000000..806631fe5 --- /dev/null +++ b/bundle/tests/schema/pass/ml.yml @@ -0,0 +1,72 @@ +bundle: + name: ML + +workspace: + host: "https://myworkspace.com" + root_path: /abc + +presets: + name_prefix: "[DEV]" + jobs_max_concurrent_runs: 10 + +variables: + simplevar: + default: "true" + description: "simplevar description" + + complexvar: + default: + key1: value1 + key2: value2 + key3: + - value3 + - value4 + description: "complexvar description" + +resources: + models: + mymodel: + creation_timestamp: 123 + description: "my model" + latest_versions: + - creation_timestamp: 123 + tags: ${var.complexvar.key1} + status: READY + permissions: + - service_principal_name: myserviceprincipal + level: CAN_MANAGE + + experiments: + myexperiment: + artifact_location: /dbfs/myexperiment + last_update_time: 123 + lifecycle_stage: ${var.simplevar} + permissions: + - service_principal_name: myserviceprincipal + level: CAN_MANAGE + + model_serving_endpoints: + myendpoint: + config: + served_models: + - model_name: ${resources.models.mymodel.name} + model_version: abc + scale_to_zero_enabled: true + workload_size: Large + name: myendpoint + + schemas: + myschema: + catalog_name: mycatalog + name: myschema + + registered_models: + myregisteredmodel: + catalog_name: mycatalog + name: myregisteredmodel + schema_name: ${resources.schemas.myschema.name} + grants: + - principal: abcd + privileges: + - SELECT + - INSERT diff --git a/bundle/tests/schema/pass/pipeline.yml b/bundle/tests/schema/pass/pipeline.yml new file mode 100644 index 000000000..71c9191ad --- /dev/null +++ b/bundle/tests/schema/pass/pipeline.yml @@ -0,0 +1,58 @@ +bundle: + name: a pipeline + +workspace: + host: "https://myworkspace.com" + root_path: /abc + +presets: + name_prefix: "[DEV]" + jobs_max_concurrent_runs: 10 + +variables: + simplevar: + default: true + description: "simplevar description" + + complexvar: + default: + key1: value1 + key2: value2 + key3: + - value3 + - value4 + description: "complexvar description" + +# TODO: Include enums for fields that are a part of the bundle config in the generate +# JSON schema. +# TODO: No need to include the regex patterns for string fields. Fix upstream. +# TODO: Clean up yaml-ls referneces in the tests, and the spec file as well. +artifacts: + mywheel: + path: ./mywheel.whl + type: WHEEL + +run_as: + service_principal_name: myserviceprincipal + +resources: + jobs: + myjob: + name: myjob + tasks: + - task_key: ${bundle.name} pipeline trigger + pipeline_task: + pipeline_id: ${resources.mypipeline.id} + + pipelines: + mypipeline: + name: mypipeline + libraries: + - whl: ./mywheel.whl + catalog: 3{var.complexvar.key2} + development: true + clusters: + - autoscale: + mode: ENHANCED + max_workers: 10 + min_workers: 1 diff --git a/bundle/tests/schema/pass/quality_monitor.yml b/bundle/tests/schema/pass/quality_monitor.yml new file mode 100644 index 000000000..a9be59329 --- /dev/null +++ b/bundle/tests/schema/pass/quality_monitor.yml @@ -0,0 +1,16 @@ +bundle: + name: quality_monitor + +resources: + quality_monitors: + myqualitymonitor: + inference_log: + granularities: + - a + - b + model_id_col: a + prediction_col: b + timestamp_col: c + problem_type: PROBLEM_TYPE_CLASSIFICATION + assets_dir: /dbfs/mnt/abc + output_schema_name: default diff --git a/bundle/tests/schema/pass/run_job_task.yml b/bundle/tests/schema/pass/run_job_task.yml new file mode 100644 index 000000000..be2ca22cd --- /dev/null +++ b/bundle/tests/schema/pass/run_job_task.yml @@ -0,0 +1,56 @@ +bundle: + name: a run job task + databricks_cli_version: 0.200.0 + compute_id: "mycompute" + + +variables: + simplevar: + default: 5678 + description: "simplevar description" + + complexvar: + default: + key1: 1234 + key2: value2 + key3: + - value3 + - 9999 + description: "complexvar description" + +resources: + jobs: + inner: + permissions: + - user_name: user1 + level: CAN_MANAGE + + name: inner job + tasks: + - task_key: inner notebook task + notebook_task: + notebook_path: /Users/abc/notebooks/inner + existing_cluster_id: abcd + + outer: + name: outer job + tasks: + - task_key: run job task 1 + run_job_task: + job_id: 1234 + + - task_key: run job task 2 + run_job_task: + job_id: ${var.complexvar.key1} + + - task_key: run job task 3 + run_job_task: + job_id: ${var.simplevar} + + - task_key: run job task 4 + run_job_task: + job_id: ${resources.inner.id} + + - task_key: run job task 5 + run_job_task: + job_id: ${var.complexvar.key3[1]} diff --git a/bundle/tests/schema/pass/schema.yml b/bundle/tests/schema/pass/schema.yml new file mode 100644 index 000000000..37d0f6f7a --- /dev/null +++ b/bundle/tests/schema/pass/schema.yml @@ -0,0 +1,24 @@ +bundle: + name: basic + +variables: + complexvar: + default: + key1: 1234 + key2: value2 + key3: + - value3 + - 9999 + description: complexvar description + +resources: + schemas: + myschema: + name: myschema + catalog_name: main + grants: + - ${var.complexvar} + - principal: ${workspace.current_user.me} + privileges: + - ${var.complexvar.key3[0]} + - ${var.complexvar.key2}