Merge remote-tracking branch 'origin' into feature/uc-volumes

This commit is contained in:
Shreyas Goenka 2024-09-15 23:21:27 +02:00
commit fa545777bd
No known key found for this signature in database
GPG Key ID: 92A07DF49CCB0622
26 changed files with 404 additions and 17 deletions

View File

@ -107,11 +107,18 @@ jobs:
run: npm install -g ajv-cli@5.0.0
# Assert that the generated bundle schema is a valid JSON schema by using
# ajv-cli to validate it against a sample configuration file.
# ajv-cli to validate it against bundle configuration files.
# By default the ajv-cli runs in strict mode which will fail if the schema
# itself is not valid. Strict mode is more strict than the JSON schema
# specification. See for details: https://ajv.js.org/options.html#strict-mode-options
- name: Validate bundle schema
run: |
go run main.go bundle schema > schema.json
ajv -s schema.json -d ./bundle/tests/basic/databricks.yml
for file in ./bundle/internal/schema/testdata/pass/*.yml; do
ajv test -s schema.json -d $file --valid
done
for file in ./bundle/internal/schema/testdata/fail/*.yml; do
ajv test -s schema.json -d $file --invalid
done

View File

@ -25,6 +25,20 @@ func ConvertJobToValue(job *jobs.Job) (dyn.Value, error) {
value["tasks"] = dyn.NewValue(tasks, []dyn.Location{{Line: jobOrder.Get("tasks")}})
}
// We're processing job.Settings.Parameters separately to retain empty default values.
if len(job.Settings.Parameters) > 0 {
params := make([]dyn.Value, 0)
for _, parameter := range job.Settings.Parameters {
p := map[string]dyn.Value{
"name": dyn.NewValue(parameter.Name, []dyn.Location{{Line: 0}}), // We use Line: 0 to ensure that the name goes first.
"default": dyn.NewValue(parameter.Default, []dyn.Location{{Line: 1}}),
}
params = append(params, dyn.NewValue(p, []dyn.Location{}))
}
value["parameters"] = dyn.NewValue(params, []dyn.Location{{Line: jobOrder.Get("parameters")}})
}
return yamlsaver.ConvertToMapValue(job.Settings, jobOrder, []string{"format", "new_cluster", "existing_cluster_id"}, value)
}

View File

@ -0,0 +1,3 @@
bundle:
# expected type is 'string'
name: 1234

View File

@ -0,0 +1,4 @@
resources:
jobs:
myjob:
format: INVALID_VALUE

View File

@ -0,0 +1,6 @@
resources:
models:
mymodel:
latest_versions:
- creation_timestamp: 123
status: INVALID_VALUE

View File

@ -0,0 +1,8 @@
resources:
jobs:
outer:
name: outer job
tasks:
- task_key: run job task 1
run_job_task:
job_id: ${invalid.reference}

View File

@ -0,0 +1,5 @@
resources:
models:
mymodel:
latest_versions:
- creation_timestamp: ${invalid.reference}

View File

@ -0,0 +1,9 @@
resources:
jobs:
foo:
name: my job
tasks:
# All tasks need to have a task_key.
- notebook_task:
notebook_path: /Users/abc/notebooks/inner
existing_cluster_id: abcd

View File

@ -0,0 +1,5 @@
resources:
jobs:
myjob:
# unknown fields should cause schema failure.
unknown_field: "value"

View File

@ -0,0 +1,6 @@
resources:
models:
mymodel:
creation_timestamp: 123
description: "my model"
unknown: "value"

View File

@ -0,0 +1 @@
unknown: value

View File

@ -0,0 +1,11 @@
artifacts:
abc:
path: /Workspace/a/b/c
type: wheel
files:
- source: ./x.whl
resources:
jobs:
foo:
name: ${artifacts.abc.type}

View File

@ -0,0 +1,2 @@
bundle:
name: basic

View File

@ -0,0 +1,4 @@
targets:
development:
variables:
myvar: value

View File

@ -0,0 +1,63 @@
bundle:
name: a job
workspace:
host: "https://myworkspace.com"
root_path: /abc
presets:
name_prefix: "[DEV]"
jobs_max_concurrent_runs: 10
variables:
simplevar:
default: true
description: "simplevar description"
complexvar:
default:
key1: value1
key2: value2
key3:
- value3
- value4
description: "complexvar description"
run_as:
service_principal_name: myserviceprincipal
resources:
jobs:
myjob:
name: myjob
continuous:
pause_status: PAUSED
edit_mode: EDITABLE
max_concurrent_runs: 10
description: "my job description"
email_notifications:
no_alert_for_skipped_runs: true
environments:
- environment_key: venv
spec:
dependencies:
- python=3.7
client: "myclient"
format: MULTI_TASK
tags:
foo: bar
bar: baz
tasks:
- task_key: mytask
notebook_task:
notebook_path: ${var.simplevar}
existing_cluster_id: abcd
- task_key: mytask2
for_each_task:
inputs: av
concurrency: 10
task:
task_key: inside_for_each
notebook_task:
notebook_path: ${var.complexvar.key3[0]}
- ${var.complexvar}

View File

@ -0,0 +1,72 @@
bundle:
name: ML
workspace:
host: "https://myworkspace.com"
root_path: /abc
presets:
name_prefix: "[DEV]"
jobs_max_concurrent_runs: 10
variables:
simplevar:
default: "true"
description: "simplevar description"
complexvar:
default:
key1: value1
key2: value2
key3:
- value3
- value4
description: "complexvar description"
resources:
models:
mymodel:
creation_timestamp: 123
description: "my model"
latest_versions:
- creation_timestamp: 123
tags: ${var.complexvar.key1}
status: READY
permissions:
- service_principal_name: myserviceprincipal
level: CAN_MANAGE
experiments:
myexperiment:
artifact_location: /dbfs/myexperiment
last_update_time: ${var.complexvar.key2}
lifecycle_stage: ${var.simplevar}
permissions:
- service_principal_name: myserviceprincipal
level: CAN_MANAGE
model_serving_endpoints:
myendpoint:
config:
served_models:
- model_name: ${resources.models.mymodel.name}
model_version: abc
scale_to_zero_enabled: true
workload_size: Large
name: myendpoint
schemas:
myschema:
catalog_name: mycatalog
name: myschema
registered_models:
myregisteredmodel:
catalog_name: mycatalog
name: myregisteredmodel
schema_name: ${resources.schemas.myschema.name}
grants:
- principal: abcd
privileges:
- SELECT
- INSERT

View File

@ -0,0 +1,54 @@
bundle:
name: a pipeline
workspace:
host: "https://myworkspace.com"
root_path: /abc
presets:
name_prefix: "[DEV]"
jobs_max_concurrent_runs: 10
variables:
simplevar:
default: true
description: "simplevar description"
complexvar:
default:
key1: value1
key2: value2
key3:
- value3
- value4
description: "complexvar description"
artifacts:
mywheel:
path: ./mywheel.whl
type: WHEEL
run_as:
service_principal_name: myserviceprincipal
resources:
jobs:
myjob:
name: myjob
tasks:
- task_key: ${bundle.name} pipeline trigger
pipeline_task:
pipeline_id: ${resources.mypipeline.id}
pipelines:
mypipeline:
name: mypipeline
libraries:
- whl: ./mywheel.whl
catalog: 3{var.complexvar.key2}
development: true
clusters:
- autoscale:
mode: ENHANCED
max_workers: 10
min_workers: 1

View File

@ -0,0 +1,16 @@
bundle:
name: quality_monitor
resources:
quality_monitors:
myqualitymonitor:
inference_log:
granularities:
- a
- b
model_id_col: a
prediction_col: b
timestamp_col: c
problem_type: PROBLEM_TYPE_CLASSIFICATION
assets_dir: /dbfs/mnt/abc
output_schema_name: default

View File

@ -0,0 +1,56 @@
bundle:
name: a run job task
databricks_cli_version: 0.200.0
compute_id: "mycompute"
variables:
simplevar:
default: 5678
description: "simplevar description"
complexvar:
default:
key1: 1234
key2: value2
key3:
- value3
- 9999
description: "complexvar description"
resources:
jobs:
inner:
permissions:
- user_name: user1
level: CAN_MANAGE
name: inner job
tasks:
- task_key: inner notebook task
notebook_task:
notebook_path: /Users/abc/notebooks/inner
existing_cluster_id: abcd
outer:
name: outer job
tasks:
- task_key: run job task 1
run_job_task:
job_id: 1234
- task_key: run job task 2
run_job_task:
job_id: ${var.complexvar.key1}
- task_key: run job task 3
run_job_task:
job_id: ${var.simplevar}
- task_key: run job task 4
run_job_task:
job_id: ${resources.inner.id}
- task_key: run job task 5
run_job_task:
job_id: ${var.complexvar.key3[1]}

View File

@ -0,0 +1,24 @@
bundle:
name: basic
variables:
complexvar:
default:
key1: 1234
key2: value2
key3:
- value3
- 9999
description: complexvar description
resources:
schemas:
myschema:
name: myschema
catalog_name: main
grants:
- ${var.complexvar}
- principal: ${workspace.current_user.me}
privileges:
- ${var.complexvar.key3[0]}
- ${var.complexvar.key2}

View File

@ -152,6 +152,12 @@ func TestGenerateJobCommand(t *testing.T) {
},
},
},
Parameters: []jobs.JobParameterDefinition{
{
Name: "empty",
Default: "",
},
},
},
}, nil)
@ -198,6 +204,9 @@ func TestGenerateJobCommand(t *testing.T) {
- task_key: notebook_task
notebook_task:
notebook_path: %s
parameters:
- name: empty
default: ""
`, filepath.Join("..", "src", "notebook.py")), string(data))
data, err = os.ReadFile(filepath.Join(srcDir, "notebook.py"))

View File

@ -36,7 +36,8 @@ func TestAccUploadArtifactFileToCorrectRemotePath(t *testing.T) {
wsDir := internal.TemporaryWorkspaceDir(t, w)
b := &bundle.Bundle{
RootPath: dir,
RootPath: dir,
SyncRootPath: dir,
Config: config.Root{
Bundle: config.Bundle{
Target: "whatever",
@ -100,7 +101,8 @@ func TestAccUploadArtifactFileToCorrectRemotePathWithEnvironments(t *testing.T)
wsDir := internal.TemporaryWorkspaceDir(t, w)
b := &bundle.Bundle{
RootPath: dir,
RootPath: dir,
SyncRootPath: dir,
Config: config.Root{
Bundle: config.Bundle{
Target: "whatever",
@ -169,7 +171,8 @@ func TestAccUploadArtifactFileToCorrectRemotePathForVolumes(t *testing.T) {
touchEmptyFile(t, whlPath)
b := &bundle.Bundle{
RootPath: dir,
RootPath: dir,
SyncRootPath: dir,
Config: config.Root{
Bundle: config.Bundle{
Target: "whatever",

View File

@ -151,6 +151,8 @@ func isScalarValueInString(v dyn.Value) bool {
switch v.MustString() {
case "true", "false":
return true
case "":
return true
default:
_, err := parseNumber(v.MustString())
return err == nil

View File

@ -3,10 +3,11 @@ resources:
{{.project_name}}_job:
name: {{.project_name}}_job
schedule:
# Run every day at 9:27 AM
quartz_cron_expression: 21 27 9 * * ?
timezone_id: UTC
trigger:
# Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger
periodic:
interval: 1
unit: DAYS
email_notifications:
on_failure:

View File

@ -10,10 +10,11 @@ resources:
{{.project_name}}_job:
name: {{.project_name}}_job
schedule:
# Run every day at 8:37 AM
quartz_cron_expression: '44 37 8 * * ?'
timezone_id: Europe/Amsterdam
trigger:
# Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger
periodic:
interval: 1
unit: DAYS
{{- if not is_service_principal}}

View File

@ -4,10 +4,11 @@ resources:
{{.project_name}}_sql_job:
name: {{.project_name}}_sql_job
schedule:
# Run every day at 7:17 AM
quartz_cron_expression: '44 17 7 * * ?'
timezone_id: Europe/Amsterdam
trigger:
# Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger
periodic:
interval: 1
unit: DAYS
{{- if not is_service_principal}}