Add end to end integration tests for bundle JSON schema (#1726)

This commit is contained in:
shreyas-goenka 2024-09-11 14:45:56 +05:30 committed by GitHub
parent 5d2c0e3885
commit c61358407f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 358 additions and 2 deletions

View File

@ -107,11 +107,18 @@ jobs:
run: npm install -g ajv-cli@5.0.0 run: npm install -g ajv-cli@5.0.0
# Assert that the generated bundle schema is a valid JSON schema by using # Assert that the generated bundle schema is a valid JSON schema by using
# ajv-cli to validate it against a sample configuration file. # ajv-cli to validate it against bundle configuration files.
# By default the ajv-cli runs in strict mode which will fail if the schema # By default the ajv-cli runs in strict mode which will fail if the schema
# itself is not valid. Strict mode is more strict than the JSON schema # itself is not valid. Strict mode is more strict than the JSON schema
# specification. See for details: https://ajv.js.org/options.html#strict-mode-options # specification. See for details: https://ajv.js.org/options.html#strict-mode-options
- name: Validate bundle schema - name: Validate bundle schema
run: | run: |
go run main.go bundle schema > schema.json go run main.go bundle schema > schema.json
ajv -s schema.json -d ./bundle/tests/basic/databricks.yml
for file in ./bundle/internal/schema/testdata/pass/*.yml; do
ajv test -s schema.json -d $file --valid
done
for file in ./bundle/internal/schema/testdata/fail/*.yml; do
ajv test -s schema.json -d $file --invalid
done

View File

@ -0,0 +1,3 @@
bundle:
# expected type is 'string'
name: 1234

View File

@ -0,0 +1,4 @@
resources:
jobs:
myjob:
format: INVALID_VALUE

View File

@ -0,0 +1,6 @@
resources:
models:
mymodel:
latest_versions:
- creation_timestamp: 123
status: INVALID_VALUE

View File

@ -0,0 +1,8 @@
resources:
jobs:
outer:
name: outer job
tasks:
- task_key: run job task 1
run_job_task:
job_id: ${invalid.reference}

View File

@ -0,0 +1,5 @@
resources:
models:
mymodel:
latest_versions:
- creation_timestamp: ${invalid.reference}

View File

@ -0,0 +1,9 @@
resources:
jobs:
foo:
name: my job
tasks:
# All tasks need to have a task_key.
- notebook_task:
notebook_path: /Users/abc/notebooks/inner
existing_cluster_id: abcd

View File

@ -0,0 +1,5 @@
resources:
jobs:
myjob:
# unknown fields should cause schema failure.
unknown_field: "value"

View File

@ -0,0 +1,6 @@
resources:
models:
mymodel:
creation_timestamp: 123
description: "my model"
unknown: "value"

View File

@ -0,0 +1 @@
unknown: value

View File

@ -0,0 +1,11 @@
artifacts:
abc:
path: /Workspace/a/b/c
type: wheel
files:
- source: ./x.whl
resources:
jobs:
foo:
name: ${artifacts.abc.type}

View File

@ -0,0 +1,2 @@
bundle:
name: basic

View File

@ -0,0 +1,4 @@
targets:
development:
variables:
myvar: value

View File

@ -0,0 +1,63 @@
bundle:
name: a job
workspace:
host: "https://myworkspace.com"
root_path: /abc
presets:
name_prefix: "[DEV]"
jobs_max_concurrent_runs: 10
variables:
simplevar:
default: true
description: "simplevar description"
complexvar:
default:
key1: value1
key2: value2
key3:
- value3
- value4
description: "complexvar description"
run_as:
service_principal_name: myserviceprincipal
resources:
jobs:
myjob:
name: myjob
continuous:
pause_status: PAUSED
edit_mode: EDITABLE
max_concurrent_runs: 10
description: "my job description"
email_notifications:
no_alert_for_skipped_runs: true
environments:
- environment_key: venv
spec:
dependencies:
- python=3.7
client: "myclient"
format: MULTI_TASK
tags:
foo: bar
bar: baz
tasks:
- task_key: mytask
notebook_task:
notebook_path: ${var.simplevar}
existing_cluster_id: abcd
- task_key: mytask2
for_each_task:
inputs: av
concurrency: 10
task:
task_key: inside_for_each
notebook_task:
notebook_path: ${var.complexvar.key3[0]}
- ${var.complexvar}

View File

@ -0,0 +1,72 @@
bundle:
name: ML
workspace:
host: "https://myworkspace.com"
root_path: /abc
presets:
name_prefix: "[DEV]"
jobs_max_concurrent_runs: 10
variables:
simplevar:
default: "true"
description: "simplevar description"
complexvar:
default:
key1: value1
key2: value2
key3:
- value3
- value4
description: "complexvar description"
resources:
models:
mymodel:
creation_timestamp: 123
description: "my model"
latest_versions:
- creation_timestamp: 123
tags: ${var.complexvar.key1}
status: READY
permissions:
- service_principal_name: myserviceprincipal
level: CAN_MANAGE
experiments:
myexperiment:
artifact_location: /dbfs/myexperiment
last_update_time: ${var.complexvar.key2}
lifecycle_stage: ${var.simplevar}
permissions:
- service_principal_name: myserviceprincipal
level: CAN_MANAGE
model_serving_endpoints:
myendpoint:
config:
served_models:
- model_name: ${resources.models.mymodel.name}
model_version: abc
scale_to_zero_enabled: true
workload_size: Large
name: myendpoint
schemas:
myschema:
catalog_name: mycatalog
name: myschema
registered_models:
myregisteredmodel:
catalog_name: mycatalog
name: myregisteredmodel
schema_name: ${resources.schemas.myschema.name}
grants:
- principal: abcd
privileges:
- SELECT
- INSERT

View File

@ -0,0 +1,54 @@
bundle:
name: a pipeline
workspace:
host: "https://myworkspace.com"
root_path: /abc
presets:
name_prefix: "[DEV]"
jobs_max_concurrent_runs: 10
variables:
simplevar:
default: true
description: "simplevar description"
complexvar:
default:
key1: value1
key2: value2
key3:
- value3
- value4
description: "complexvar description"
artifacts:
mywheel:
path: ./mywheel.whl
type: WHEEL
run_as:
service_principal_name: myserviceprincipal
resources:
jobs:
myjob:
name: myjob
tasks:
- task_key: ${bundle.name} pipeline trigger
pipeline_task:
pipeline_id: ${resources.mypipeline.id}
pipelines:
mypipeline:
name: mypipeline
libraries:
- whl: ./mywheel.whl
catalog: 3{var.complexvar.key2}
development: true
clusters:
- autoscale:
mode: ENHANCED
max_workers: 10
min_workers: 1

View File

@ -0,0 +1,16 @@
bundle:
name: quality_monitor
resources:
quality_monitors:
myqualitymonitor:
inference_log:
granularities:
- a
- b
model_id_col: a
prediction_col: b
timestamp_col: c
problem_type: PROBLEM_TYPE_CLASSIFICATION
assets_dir: /dbfs/mnt/abc
output_schema_name: default

View File

@ -0,0 +1,56 @@
bundle:
name: a run job task
databricks_cli_version: 0.200.0
compute_id: "mycompute"
variables:
simplevar:
default: 5678
description: "simplevar description"
complexvar:
default:
key1: 1234
key2: value2
key3:
- value3
- 9999
description: "complexvar description"
resources:
jobs:
inner:
permissions:
- user_name: user1
level: CAN_MANAGE
name: inner job
tasks:
- task_key: inner notebook task
notebook_task:
notebook_path: /Users/abc/notebooks/inner
existing_cluster_id: abcd
outer:
name: outer job
tasks:
- task_key: run job task 1
run_job_task:
job_id: 1234
- task_key: run job task 2
run_job_task:
job_id: ${var.complexvar.key1}
- task_key: run job task 3
run_job_task:
job_id: ${var.simplevar}
- task_key: run job task 4
run_job_task:
job_id: ${resources.inner.id}
- task_key: run job task 5
run_job_task:
job_id: ${var.complexvar.key3[1]}

View File

@ -0,0 +1,24 @@
bundle:
name: basic
variables:
complexvar:
default:
key1: 1234
key2: value2
key3:
- value3
- 9999
description: complexvar description
resources:
schemas:
myschema:
name: myschema
catalog_name: main
grants:
- ${var.complexvar}
- principal: ${workspace.current_user.me}
privileges:
- ${var.complexvar.key3[0]}
- ${var.complexvar.key2}