From b451905b6ea3091fac34775e3a14a5af9f649d53 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 9 Sep 2024 11:56:16 +0200 Subject: [PATCH 01/15] Expand library globs relative to the sync root (#1756) ## Changes Library glob expansion happens during deployment. Before that, all entries that refer to local paths in resource definitions are made relative to the _sync root_. Before #1694, they were made relative to the _bundle root_. This PR didn't update the library glob expansion code to use the sync root path. If you were using the sync paths setting with library globs, the CLI would fail to expand the globs because the code was using the wrong path to anchor those globs. This change fixes the issue. ## Tests Manually confirmed that this fixes the issue reported in #1755. --- bundle/libraries/expand_glob_references.go | 10 +-- .../libraries/expand_glob_references_test.go | 6 +- bundle/libraries/match_test.go | 8 +-- bundle/libraries/upload.go | 2 +- bundle/libraries/upload_test.go | 8 +-- .../bundle.yml | 3 + bundle/tests/python_wheel_test.go | 64 +++++++++---------- 7 files changed, 49 insertions(+), 52 deletions(-) diff --git a/bundle/libraries/expand_glob_references.go b/bundle/libraries/expand_glob_references.go index 9322a06b..c71615e0 100644 --- a/bundle/libraries/expand_glob_references.go +++ b/bundle/libraries/expand_glob_references.go @@ -39,7 +39,7 @@ func getLibDetails(v dyn.Value) (string, string, bool) { } func findMatches(b *bundle.Bundle, path string) ([]string, error) { - matches, err := filepath.Glob(filepath.Join(b.RootPath, path)) + matches, err := filepath.Glob(filepath.Join(b.SyncRootPath, path)) if err != nil { return nil, err } @@ -52,10 +52,10 @@ func findMatches(b *bundle.Bundle, path string) ([]string, error) { } } - // We make the matched path relative to the root path before storing it + // We make the matched path relative to the sync root path before storing it // to allow upload mutator to distinguish between local and remote paths for i, match := range matches { - matches[i], err = filepath.Rel(b.RootPath, match) + matches[i], err = filepath.Rel(b.SyncRootPath, match) if err != nil { return nil, err } @@ -211,8 +211,8 @@ func (e *expand) Name() string { // ExpandGlobReferences expands any glob references in the libraries or environments section // to corresponding local paths. -// We only expand local paths (i.e. paths that are relative to the root path). -// After expanding we make the paths relative to the root path to allow upload mutator later in the chain to +// We only expand local paths (i.e. paths that are relative to the sync root path). +// After expanding we make the paths relative to the sync root path to allow upload mutator later in the chain to // distinguish between local and remote paths. func ExpandGlobReferences() bundle.Mutator { return &expand{} diff --git a/bundle/libraries/expand_glob_references_test.go b/bundle/libraries/expand_glob_references_test.go index 34855b53..e7f2e169 100644 --- a/bundle/libraries/expand_glob_references_test.go +++ b/bundle/libraries/expand_glob_references_test.go @@ -23,7 +23,7 @@ func TestGlobReferencesExpandedForTaskLibraries(t *testing.T) { testutil.Touch(t, dir, "jar", "my2.jar") b := &bundle.Bundle{ - RootPath: dir, + SyncRootPath: dir, Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -104,7 +104,7 @@ func TestGlobReferencesExpandedForForeachTaskLibraries(t *testing.T) { testutil.Touch(t, dir, "jar", "my2.jar") b := &bundle.Bundle{ - RootPath: dir, + SyncRootPath: dir, Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -189,7 +189,7 @@ func TestGlobReferencesExpandedForEnvironmentsDeps(t *testing.T) { testutil.Touch(t, dir, "jar", "my2.jar") b := &bundle.Bundle{ - RootPath: dir, + SyncRootPath: dir, Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ diff --git a/bundle/libraries/match_test.go b/bundle/libraries/match_test.go index e60504c8..78765cbd 100644 --- a/bundle/libraries/match_test.go +++ b/bundle/libraries/match_test.go @@ -18,7 +18,7 @@ func TestValidateEnvironments(t *testing.T) { testutil.Touch(t, tmpDir, "wheel.whl") b := &bundle.Bundle{ - RootPath: tmpDir, + SyncRootPath: tmpDir, Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -50,7 +50,7 @@ func TestValidateEnvironmentsNoFile(t *testing.T) { tmpDir := t.TempDir() b := &bundle.Bundle{ - RootPath: tmpDir, + SyncRootPath: tmpDir, Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -84,7 +84,7 @@ func TestValidateTaskLibraries(t *testing.T) { testutil.Touch(t, tmpDir, "wheel.whl") b := &bundle.Bundle{ - RootPath: tmpDir, + SyncRootPath: tmpDir, Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -117,7 +117,7 @@ func TestValidateTaskLibrariesNoFile(t *testing.T) { tmpDir := t.TempDir() b := &bundle.Bundle{ - RootPath: tmpDir, + SyncRootPath: tmpDir, Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ diff --git a/bundle/libraries/upload.go b/bundle/libraries/upload.go index 224e7ab2..90a1a21f 100644 --- a/bundle/libraries/upload.go +++ b/bundle/libraries/upload.go @@ -74,7 +74,7 @@ func collectLocalLibraries(b *bundle.Bundle) (map[string][]configLocation, error return v, nil } - source = filepath.Join(b.RootPath, source) + source = filepath.Join(b.SyncRootPath, source) libs[source] = append(libs[source], configLocation{ configPath: p, location: v.Location(), diff --git a/bundle/libraries/upload_test.go b/bundle/libraries/upload_test.go index 82fe6e7c..44b194c5 100644 --- a/bundle/libraries/upload_test.go +++ b/bundle/libraries/upload_test.go @@ -24,7 +24,7 @@ func TestArtifactUploadForWorkspace(t *testing.T) { whlLocalPath := filepath.Join(whlFolder, "source.whl") b := &bundle.Bundle{ - RootPath: tmpDir, + SyncRootPath: tmpDir, Config: config.Root{ Workspace: config.Workspace{ ArtifactPath: "/foo/bar/artifacts", @@ -112,7 +112,7 @@ func TestArtifactUploadForVolumes(t *testing.T) { whlLocalPath := filepath.Join(whlFolder, "source.whl") b := &bundle.Bundle{ - RootPath: tmpDir, + SyncRootPath: tmpDir, Config: config.Root{ Workspace: config.Workspace{ ArtifactPath: "/Volumes/foo/bar/artifacts", @@ -200,7 +200,7 @@ func TestArtifactUploadWithNoLibraryReference(t *testing.T) { whlLocalPath := filepath.Join(whlFolder, "source.whl") b := &bundle.Bundle{ - RootPath: tmpDir, + SyncRootPath: tmpDir, Config: config.Root{ Workspace: config.Workspace{ ArtifactPath: "/Workspace/foo/bar/artifacts", @@ -240,7 +240,7 @@ func TestUploadMultipleLibraries(t *testing.T) { testutil.Touch(t, whlFolder, "source4.whl") b := &bundle.Bundle{ - RootPath: tmpDir, + SyncRootPath: tmpDir, Config: config.Root{ Workspace: config.Workspace{ ArtifactPath: "/foo/bar/artifacts", diff --git a/bundle/tests/python_wheel/python_wheel_no_artifact_no_setup/bundle.yml b/bundle/tests/python_wheel/python_wheel_no_artifact_no_setup/bundle.yml index 49286196..d0308430 100644 --- a/bundle/tests/python_wheel/python_wheel_no_artifact_no_setup/bundle.yml +++ b/bundle/tests/python_wheel/python_wheel_no_artifact_no_setup/bundle.yml @@ -1,6 +1,9 @@ bundle: name: python-wheel-local +workspace: + artifact_path: /foo/bar + resources: jobs: test_job: diff --git a/bundle/tests/python_wheel_test.go b/bundle/tests/python_wheel_test.go index c4d85703..c982c09d 100644 --- a/bundle/tests/python_wheel_test.go +++ b/bundle/tests/python_wheel_test.go @@ -15,11 +15,10 @@ import ( ) func TestPythonWheelBuild(t *testing.T) { - ctx := context.Background() - b, err := bundle.Load(ctx, "./python_wheel/python_wheel") - require.NoError(t, err) + b := loadTarget(t, "./python_wheel/python_wheel", "default") - diags := bundle.Apply(ctx, b, bundle.Seq(phases.Load(), phases.Build())) + ctx := context.Background() + diags := bundle.Apply(ctx, b, phases.Build()) require.NoError(t, diags.Error()) matches, err := filepath.Glob("./python_wheel/python_wheel/my_test_code/dist/my_test_code-*.whl") @@ -32,11 +31,10 @@ func TestPythonWheelBuild(t *testing.T) { } func TestPythonWheelBuildAutoDetect(t *testing.T) { - ctx := context.Background() - b, err := bundle.Load(ctx, "./python_wheel/python_wheel_no_artifact") - require.NoError(t, err) + b := loadTarget(t, "./python_wheel/python_wheel_no_artifact", "default") - diags := bundle.Apply(ctx, b, bundle.Seq(phases.Load(), phases.Build())) + ctx := context.Background() + diags := bundle.Apply(ctx, b, phases.Build()) require.NoError(t, diags.Error()) matches, err := filepath.Glob("./python_wheel/python_wheel_no_artifact/dist/my_test_code-*.whl") @@ -49,11 +47,10 @@ func TestPythonWheelBuildAutoDetect(t *testing.T) { } func TestPythonWheelBuildAutoDetectWithNotebookTask(t *testing.T) { - ctx := context.Background() - b, err := bundle.Load(ctx, "./python_wheel/python_wheel_no_artifact_notebook") - require.NoError(t, err) + b := loadTarget(t, "./python_wheel/python_wheel_no_artifact_notebook", "default") - diags := bundle.Apply(ctx, b, bundle.Seq(phases.Load(), phases.Build())) + ctx := context.Background() + diags := bundle.Apply(ctx, b, phases.Build()) require.NoError(t, diags.Error()) matches, err := filepath.Glob("./python_wheel/python_wheel_no_artifact_notebook/dist/my_test_code-*.whl") @@ -66,11 +63,10 @@ func TestPythonWheelBuildAutoDetectWithNotebookTask(t *testing.T) { } func TestPythonWheelWithDBFSLib(t *testing.T) { - ctx := context.Background() - b, err := bundle.Load(ctx, "./python_wheel/python_wheel_dbfs_lib") - require.NoError(t, err) + b := loadTarget(t, "./python_wheel/python_wheel_dbfs_lib", "default") - diags := bundle.Apply(ctx, b, bundle.Seq(phases.Load(), phases.Build())) + ctx := context.Background() + diags := bundle.Apply(ctx, b, phases.Build()) require.NoError(t, diags.Error()) match := libraries.ExpandGlobReferences() @@ -79,11 +75,11 @@ func TestPythonWheelWithDBFSLib(t *testing.T) { } func TestPythonWheelBuildNoBuildJustUpload(t *testing.T) { - ctx := context.Background() - b, err := bundle.Load(ctx, "./python_wheel/python_wheel_no_artifact_no_setup") - require.NoError(t, err) + b := loadTarget(t, "./python_wheel/python_wheel_no_artifact_no_setup", "default") - b.Config.Workspace.ArtifactPath = "/foo/bar" + ctx := context.Background() + diags := bundle.Apply(ctx, b, phases.Build()) + require.NoError(t, diags.Error()) mockFiler := mockfiler.NewMockFiler(t) mockFiler.EXPECT().Write( @@ -94,20 +90,20 @@ func TestPythonWheelBuildNoBuildJustUpload(t *testing.T) { filer.CreateParentDirectories, ).Return(nil) - u := libraries.UploadWithClient(mockFiler) - diags := bundle.Apply(ctx, b, bundle.Seq(phases.Load(), phases.Build(), libraries.ExpandGlobReferences(), u)) + diags = bundle.Apply(ctx, b, bundle.Seq( + libraries.ExpandGlobReferences(), + libraries.UploadWithClient(mockFiler), + )) require.NoError(t, diags.Error()) require.Empty(t, diags) - require.Equal(t, "/Workspace/foo/bar/.internal/my_test_code-0.0.1-py3-none-any.whl", b.Config.Resources.Jobs["test_job"].JobSettings.Tasks[0].Libraries[0].Whl) } func TestPythonWheelBuildWithEnvironmentKey(t *testing.T) { - ctx := context.Background() - b, err := bundle.Load(ctx, "./python_wheel/environment_key") - require.NoError(t, err) + b := loadTarget(t, "./python_wheel/environment_key", "default") - diags := bundle.Apply(ctx, b, bundle.Seq(phases.Load(), phases.Build())) + ctx := context.Background() + diags := bundle.Apply(ctx, b, phases.Build()) require.NoError(t, diags.Error()) matches, err := filepath.Glob("./python_wheel/environment_key/my_test_code/dist/my_test_code-*.whl") @@ -120,11 +116,10 @@ func TestPythonWheelBuildWithEnvironmentKey(t *testing.T) { } func TestPythonWheelBuildMultiple(t *testing.T) { - ctx := context.Background() - b, err := bundle.Load(ctx, "./python_wheel/python_wheel_multiple") - require.NoError(t, err) + b := loadTarget(t, "./python_wheel/python_wheel_multiple", "default") - diags := bundle.Apply(ctx, b, bundle.Seq(phases.Load(), phases.Build())) + ctx := context.Background() + diags := bundle.Apply(ctx, b, phases.Build()) require.NoError(t, diags.Error()) matches, err := filepath.Glob("./python_wheel/python_wheel_multiple/my_test_code/dist/my_test_code*.whl") @@ -137,11 +132,10 @@ func TestPythonWheelBuildMultiple(t *testing.T) { } func TestPythonWheelNoBuild(t *testing.T) { - ctx := context.Background() - b, err := bundle.Load(ctx, "./python_wheel/python_wheel_no_build") - require.NoError(t, err) + b := loadTarget(t, "./python_wheel/python_wheel_no_build", "default") - diags := bundle.Apply(ctx, b, bundle.Seq(phases.Load(), phases.Build())) + ctx := context.Background() + diags := bundle.Apply(ctx, b, phases.Build()) require.NoError(t, diags.Error()) match := libraries.ExpandGlobReferences() From 90244f9c169b8caac7f9c7ee0040c8e4bf38ee36 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Sep 2024 15:16:51 +0200 Subject: [PATCH 02/15] Bump golang.org/x/term from 0.23.0 to 0.24.0 (#1757) Bumps [golang.org/x/term](https://github.com/golang/term) from 0.23.0 to 0.24.0.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=golang.org/x/term&package-manager=go_modules&previous-version=0.23.0&new-version=0.24.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 9777106c..e14e3b78 100644 --- a/go.mod +++ b/go.mod @@ -25,7 +25,7 @@ require ( golang.org/x/mod v0.20.0 golang.org/x/oauth2 v0.22.0 golang.org/x/sync v0.8.0 - golang.org/x/term v0.23.0 + golang.org/x/term v0.24.0 golang.org/x/text v0.17.0 gopkg.in/ini.v1 v1.67.0 // Apache 2.0 gopkg.in/yaml.v3 v3.0.1 @@ -61,7 +61,7 @@ require ( go.opentelemetry.io/otel/trace v1.24.0 // indirect golang.org/x/crypto v0.24.0 // indirect golang.org/x/net v0.26.0 // indirect - golang.org/x/sys v0.23.0 // indirect + golang.org/x/sys v0.25.0 // indirect golang.org/x/time v0.5.0 // indirect google.golang.org/api v0.182.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240521202816-d264139d666e // indirect diff --git a/go.sum b/go.sum index b232e8e4..808a4803 100644 --- a/go.sum +++ b/go.sum @@ -208,10 +208,10 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= -golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= -golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= +golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= +golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM= +golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= From f19f7fa130e3aff75721e4b7e9d8091dc0d3a949 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Sep 2024 15:17:02 +0200 Subject: [PATCH 03/15] Bump golang.org/x/oauth2 from 0.22.0 to 0.23.0 (#1761) Bumps [golang.org/x/oauth2](https://github.com/golang/oauth2) from 0.22.0 to 0.23.0.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=golang.org/x/oauth2&package-manager=go_modules&previous-version=0.22.0&new-version=0.23.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index e14e3b78..4e435fea 100644 --- a/go.mod +++ b/go.mod @@ -23,7 +23,7 @@ require ( github.com/stretchr/testify v1.9.0 // MIT golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 golang.org/x/mod v0.20.0 - golang.org/x/oauth2 v0.22.0 + golang.org/x/oauth2 v0.23.0 golang.org/x/sync v0.8.0 golang.org/x/term v0.24.0 golang.org/x/text v0.17.0 diff --git a/go.sum b/go.sum index 808a4803..ff7e394b 100644 --- a/go.sum +++ b/go.sum @@ -191,8 +191,8 @@ golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwY golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA= -golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= +golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= From 0ef1ada14bab40b7a03ec8dc418bd4e948c90d50 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Sep 2024 15:40:02 +0200 Subject: [PATCH 04/15] Bump golang.org/x/text from 0.17.0 to 0.18.0 (#1759) Bumps [golang.org/x/text](https://github.com/golang/text) from 0.17.0 to 0.18.0.
Commits
  • 1e3e9fd all: rename Example test functions to prevent vet errors
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=golang.org/x/text&package-manager=go_modules&previous-version=0.17.0&new-version=0.18.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 4e435fea..e1f8e806 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( golang.org/x/oauth2 v0.23.0 golang.org/x/sync v0.8.0 golang.org/x/term v0.24.0 - golang.org/x/text v0.17.0 + golang.org/x/text v0.18.0 gopkg.in/ini.v1 v1.67.0 // Apache 2.0 gopkg.in/yaml.v3 v3.0.1 ) diff --git a/go.sum b/go.sum index ff7e394b..71efdc55 100644 --- a/go.sum +++ b/go.sum @@ -214,8 +214,8 @@ golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM= golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= -golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= +golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= From d3e221a116bfadfb525a82f1e31ad5dd7224b2cd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Sep 2024 16:49:39 +0200 Subject: [PATCH 05/15] Bump github.com/databricks/databricks-sdk-go from 0.45.0 to 0.46.0 (#1760) Bumps [github.com/databricks/databricks-sdk-go](https://github.com/databricks/databricks-sdk-go) from 0.45.0 to 0.46.0.
Release notes

Sourced from github.com/databricks/databricks-sdk-go's releases.

v0.46.0

Bug Fixes

  • Fail fast when authenticating if host is not configured (#1033).
  • Improve non-JSON error handling (#1031).

Internal Changes

  • Add TestAccCreateOboTokenOnAws to flaky test list (#1029).
  • Add workflows manage integration tests checks (#1032).
  • Fix TestMwsAccWorkspaces cleanup (#1028).
  • Improve integration test comment (#1035).
  • Temporary ignore Metastore test failures (#1027).
  • Update test to support new accounts (#1026).
  • Use statuses instead of checks (#1036).

API Changes:

OpenAPI SHA: d05898328669a3f8ab0c2ecee37db2673d3ea3f7, Date: 2024-09-04

Changelog

Sourced from github.com/databricks/databricks-sdk-go's changelog.

[Release] Release v0.46.0

Bug Fixes

  • Fail fast when authenticating if host is not configured (#1033).
  • Improve non-JSON error handling (#1031).

Internal Changes

  • Add TestAccCreateOboTokenOnAws to flaky test list (#1029).
  • Add workflows manage integration tests checks (#1032).
  • Fix TestMwsAccWorkspaces cleanup (#1028).
  • Improve integration test comment (#1035).
  • Temporary ignore Metastore test failures (#1027).
  • Update test to support new accounts (#1026).
  • Use statuses instead of checks (#1036).

API Changes:

OpenAPI SHA: d05898328669a3f8ab0c2ecee37db2673d3ea3f7, Date: 2024-09-04

Commits

Most Recent Ignore Conditions Applied to This Pull Request | Dependency Name | Ignore Conditions | | --- | --- | | github.com/databricks/databricks-sdk-go | [>= 0.28.a, < 0.29] |
[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/databricks/databricks-sdk-go&package-manager=go_modules&previous-version=0.45.0&new-version=0.46.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
--------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Andrew Nester --- .codegen/_openapi_sha | 2 +- bundle/schema/docs/bundle_descriptions.json | 12 +++ cmd/workspace/experiments/experiments.go | 7 +- cmd/workspace/metastores/metastores.go | 6 +- cmd/workspace/permissions/permissions.go | 21 +++-- .../quality-monitors/quality-monitors.go | 82 +++++++++++++++++++ go.mod | 2 +- go.sum | 4 +- 8 files changed, 120 insertions(+), 16 deletions(-) diff --git a/.codegen/_openapi_sha b/.codegen/_openapi_sha index 8b01a242..4ceeab3d 100644 --- a/.codegen/_openapi_sha +++ b/.codegen/_openapi_sha @@ -1 +1 @@ -3eae49b444cac5a0118a3503e5b7ecef7f96527a \ No newline at end of file +d05898328669a3f8ab0c2ecee37db2673d3ea3f7 \ No newline at end of file diff --git a/bundle/schema/docs/bundle_descriptions.json b/bundle/schema/docs/bundle_descriptions.json index 908a1c2b..f03a4480 100644 --- a/bundle/schema/docs/bundle_descriptions.json +++ b/bundle/schema/docs/bundle_descriptions.json @@ -2046,6 +2046,12 @@ "instance_profile_arn": { "description": "ARN of the instance profile that the served model will use to access AWS resources." }, + "max_provisioned_throughput": { + "description": "The maximum tokens per second that the endpoint can scale up to." + }, + "min_provisioned_throughput": { + "description": "The minimum tokens per second that the endpoint can scale down to." + }, "model_name": { "description": "The name of the model in Databricks Model Registry to be served or if the model resides in Unity Catalog, the full name of model,\nin the form of __catalog_name__.__schema_name__.__model_name__.\n" }, @@ -5147,6 +5153,12 @@ "instance_profile_arn": { "description": "ARN of the instance profile that the served model will use to access AWS resources." }, + "max_provisioned_throughput": { + "description": "The maximum tokens per second that the endpoint can scale up to." + }, + "min_provisioned_throughput": { + "description": "The minimum tokens per second that the endpoint can scale down to." + }, "model_name": { "description": "The name of the model in Databricks Model Registry to be served or if the model resides in Unity Catalog, the full name of model,\nin the form of __catalog_name__.__schema_name__.__model_name__.\n" }, diff --git a/cmd/workspace/experiments/experiments.go b/cmd/workspace/experiments/experiments.go index e1e00380..b1af2f86 100755 --- a/cmd/workspace/experiments/experiments.go +++ b/cmd/workspace/experiments/experiments.go @@ -941,7 +941,12 @@ func newListArtifacts() *cobra.Command { cmd.Long = `Get all artifacts. List artifacts for a run. Takes an optional artifact_path prefix. If it is - specified, the response contains only artifacts with the specified prefix.",` + specified, the response contains only artifacts with the specified prefix. + This API does not support pagination when listing artifacts in UC Volumes. A + maximum of 1000 artifacts will be retrieved for UC Volumes. Please call + /api/2.0/fs/directories{directory_path} for listing artifacts in UC Volumes, + which supports pagination. See [List directory contents | Files + API](/api/workspace/files/listdirectorycontents).` cmd.Annotations = make(map[string]string) diff --git a/cmd/workspace/metastores/metastores.go b/cmd/workspace/metastores/metastores.go index dd40bf92..22bcd3dc 100755 --- a/cmd/workspace/metastores/metastores.go +++ b/cmd/workspace/metastores/metastores.go @@ -88,7 +88,9 @@ func newAssign() *cobra.Command { Arguments: WORKSPACE_ID: A workspace ID. METASTORE_ID: The unique ID of the metastore. - DEFAULT_CATALOG_NAME: The name of the default catalog in the metastore.` + DEFAULT_CATALOG_NAME: The name of the default catalog in the metastore. This field is depracted. + Please use "Default Namespace API" to configure the default catalog for a + Databricks workspace.` cmd.Annotations = make(map[string]string) @@ -665,7 +667,7 @@ func newUpdateAssignment() *cobra.Command { // TODO: short flags cmd.Flags().Var(&updateAssignmentJson, "json", `either inline JSON string or @path/to/file.json with request body`) - cmd.Flags().StringVar(&updateAssignmentReq.DefaultCatalogName, "default-catalog-name", updateAssignmentReq.DefaultCatalogName, `The name of the default catalog for the metastore.`) + cmd.Flags().StringVar(&updateAssignmentReq.DefaultCatalogName, "default-catalog-name", updateAssignmentReq.DefaultCatalogName, `The name of the default catalog in the metastore.`) cmd.Flags().StringVar(&updateAssignmentReq.MetastoreId, "metastore-id", updateAssignmentReq.MetastoreId, `The unique ID of the metastore.`) cmd.Use = "update-assignment WORKSPACE_ID" diff --git a/cmd/workspace/permissions/permissions.go b/cmd/workspace/permissions/permissions.go index fd9c1a46..c6033e4a 100755 --- a/cmd/workspace/permissions/permissions.go +++ b/cmd/workspace/permissions/permissions.go @@ -117,9 +117,10 @@ func newGet() *cobra.Command { Arguments: REQUEST_OBJECT_TYPE: The type of the request object. Can be one of the following: alerts, - authorization, clusters, cluster-policies, dbsql-dashboards, directories, - experiments, files, instance-pools, jobs, notebooks, pipelines, queries, - registered-models, repos, serving-endpoints, or warehouses. + authorization, clusters, cluster-policies, dashboards, dbsql-dashboards, + directories, experiments, files, instance-pools, jobs, notebooks, + pipelines, queries, registered-models, repos, serving-endpoints, or + warehouses. REQUEST_OBJECT_ID: The id of the request object.` cmd.Annotations = make(map[string]string) @@ -245,9 +246,10 @@ func newSet() *cobra.Command { Arguments: REQUEST_OBJECT_TYPE: The type of the request object. Can be one of the following: alerts, - authorization, clusters, cluster-policies, dbsql-dashboards, directories, - experiments, files, instance-pools, jobs, notebooks, pipelines, queries, - registered-models, repos, serving-endpoints, or warehouses. + authorization, clusters, cluster-policies, dashboards, dbsql-dashboards, + directories, experiments, files, instance-pools, jobs, notebooks, + pipelines, queries, registered-models, repos, serving-endpoints, or + warehouses. REQUEST_OBJECT_ID: The id of the request object.` cmd.Annotations = make(map[string]string) @@ -319,9 +321,10 @@ func newUpdate() *cobra.Command { Arguments: REQUEST_OBJECT_TYPE: The type of the request object. Can be one of the following: alerts, - authorization, clusters, cluster-policies, dbsql-dashboards, directories, - experiments, files, instance-pools, jobs, notebooks, pipelines, queries, - registered-models, repos, serving-endpoints, or warehouses. + authorization, clusters, cluster-policies, dashboards, dbsql-dashboards, + directories, experiments, files, instance-pools, jobs, notebooks, + pipelines, queries, registered-models, repos, serving-endpoints, or + warehouses. REQUEST_OBJECT_ID: The id of the request object.` cmd.Annotations = make(map[string]string) diff --git a/cmd/workspace/quality-monitors/quality-monitors.go b/cmd/workspace/quality-monitors/quality-monitors.go index 95d99216..1ff9b017 100755 --- a/cmd/workspace/quality-monitors/quality-monitors.go +++ b/cmd/workspace/quality-monitors/quality-monitors.go @@ -41,6 +41,7 @@ func New() *cobra.Command { cmd.AddCommand(newGet()) cmd.AddCommand(newGetRefresh()) cmd.AddCommand(newListRefreshes()) + cmd.AddCommand(newRegenerateDashboard()) cmd.AddCommand(newRunRefresh()) cmd.AddCommand(newUpdate()) @@ -503,6 +504,87 @@ func newListRefreshes() *cobra.Command { return cmd } +// start regenerate-dashboard command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var regenerateDashboardOverrides []func( + *cobra.Command, + *catalog.RegenerateDashboardRequest, +) + +func newRegenerateDashboard() *cobra.Command { + cmd := &cobra.Command{} + + var regenerateDashboardReq catalog.RegenerateDashboardRequest + var regenerateDashboardJson flags.JsonFlag + + // TODO: short flags + cmd.Flags().Var(®enerateDashboardJson, "json", `either inline JSON string or @path/to/file.json with request body`) + + cmd.Flags().StringVar(®enerateDashboardReq.WarehouseId, "warehouse-id", regenerateDashboardReq.WarehouseId, `Optional argument to specify the warehouse for dashboard regeneration.`) + + cmd.Use = "regenerate-dashboard TABLE_NAME" + cmd.Short = `Regenerate a monitoring dashboard.` + cmd.Long = `Regenerate a monitoring dashboard. + + Regenerates the monitoring dashboard for the specified table. + + The caller must either: 1. be an owner of the table's parent catalog 2. have + **USE_CATALOG** on the table's parent catalog and be an owner of the table's + parent schema 3. have the following permissions: - **USE_CATALOG** on the + table's parent catalog - **USE_SCHEMA** on the table's parent schema - be an + owner of the table + + The call must be made from the workspace where the monitor was created. The + dashboard will be regenerated in the assets directory that was specified when + the monitor was created. + + Arguments: + TABLE_NAME: Full name of the table.` + + // This command is being previewed; hide from help output. + cmd.Hidden = true + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(1) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + if cmd.Flags().Changed("json") { + err = regenerateDashboardJson.Unmarshal(®enerateDashboardReq) + if err != nil { + return err + } + } + regenerateDashboardReq.TableName = args[0] + + response, err := w.QualityMonitors.RegenerateDashboard(ctx, regenerateDashboardReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range regenerateDashboardOverrides { + fn(cmd, ®enerateDashboardReq) + } + + return cmd +} + // start run-refresh command // Slice with functions to override default command behavior. diff --git a/go.mod b/go.mod index e1f8e806..ba41ef3a 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.22 require ( github.com/Masterminds/semver/v3 v3.3.0 // MIT github.com/briandowns/spinner v1.23.1 // Apache 2.0 - github.com/databricks/databricks-sdk-go v0.45.0 // Apache 2.0 + github.com/databricks/databricks-sdk-go v0.46.0 // Apache 2.0 github.com/fatih/color v1.17.0 // MIT github.com/ghodss/yaml v1.0.0 // MIT + NOTICE github.com/google/uuid v1.6.0 // BSD-3-Clause diff --git a/go.sum b/go.sum index 71efdc55..3d4a2cdc 100644 --- a/go.sum +++ b/go.sum @@ -32,8 +32,8 @@ github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGX github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cyphar/filepath-securejoin v0.2.4 h1:Ugdm7cg7i6ZK6x3xDF1oEu1nfkyfH53EtKeQYTC3kyg= github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= -github.com/databricks/databricks-sdk-go v0.45.0 h1:wdx5Wm/ESrahdHeq62WrjLeGjV4r722LLanD8ahI0Mo= -github.com/databricks/databricks-sdk-go v0.45.0/go.mod h1:ds+zbv5mlQG7nFEU5ojLtgN/u0/9YzZmKQES/CfedzU= +github.com/databricks/databricks-sdk-go v0.46.0 h1:D0TxmtSVAOsdnfzH4OGtAmcq+8TyA7Z6fA6JEYhupeY= +github.com/databricks/databricks-sdk-go v0.46.0/go.mod h1:ds+zbv5mlQG7nFEU5ojLtgN/u0/9YzZmKQES/CfedzU= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= From 28b39cd3f7c7c1af809e4cc7d9a5bd5f3f44daa0 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Tue, 10 Sep 2024 19:25:18 +0530 Subject: [PATCH 06/15] Make bundle JSON schema modular with `$defs` (#1700) ## Changes This PR makes sweeping changes to the way we generate and test the bundle JSON schema. The main benefits are: 1. More modular JSON schema. Every definition in the schema now is one level deep and points to references instead of inlining the entire schema for a field. This unblocks PyDABs from taking a dependency on the JSON schema. 2. Generate the JSON schema during CLI code generation. Directly stream it instead of computing it at runtime whenever a user calls `databricks bundle schema`. This is nice because we no longer need to embed a partial OpenAPI spec in the CLI. Down the line, we can add a `Schema()` method to every struct in the Databricks Go SDK and remove the dependency on the OpenAPI spec altogether. It'll become more important once we decouple Go SDK structs and methods from the underlying APIs. 3. Add enum values for Go SDK fields in the JSON schema. Better autocompletion and validation for these fields. As a follow-up, we can add enum values for non-Go SDK enums as well (created internal ticket to track). 4. Use "packageName.structName" as a key to read JSON schemas from the OpenAPI spec for Go SDK structs. Before, we would use an unrolled presentation of the JSON schema (stored in `bundle_descriptions.json`), which was complex to parse and include in the final JSON schema output. This also means loading values from the OpenAPI spec for `target` schema works automatically and no longer needs custom code. 5. Support recursive types (eg: `for_each_task`). With us now using $refs everywhere it's trivial to support. 6. Using complex variables would be invalid according to the schema generated before this PR. Now that bug is fixed. In the future adding more custom rules will be easier as well due to the single level nature of the JSON schema. Since this is a complete change of approach in how we generate the JSON schema, there are a few (very minor) regressions worth calling out. 1. We'll lose a few custom descriptions for non Go SDK structs that were a part of `bundle_descriptions.json`. Support for those can be added in the future as a followup. 2. Since now the final JSON schema is a static artefact, we lose some lead time for the signal that JSON schema integration tests are failing. It's okay though since we have a lot of coverage via the existing unit tests. ## Tests Unit tests. End to end tests are being added in this PR: https://github.com/databricks/cli/pull/1726 Previous unit tests were all deleted because they were bloated. Effort was made to make the new unit tests provide (almost) equivalent coverage. --- .codegen.json | 4 +- .gitattributes | 2 +- bundle/internal/bundle/schema/main.go | 42 - bundle/internal/schema/main.go | 93 + bundle/internal/schema/parser.go | 123 + bundle/schema/README.md | 18 - bundle/schema/docs.go | 109 - bundle/schema/docs/bundle_descriptions.json | 6447 ------------------- bundle/schema/docs_test.go | 62 - bundle/schema/embed.go | 6 + bundle/schema/embed_test.go | 71 + bundle/schema/jsonschema.json | 5524 ++++++++++++++++ bundle/schema/openapi.go | 293 - bundle/schema/openapi_test.go | 493 -- bundle/schema/schema.go | 287 - bundle/schema/schema_test.go | 1900 ------ bundle/schema/spec.go | 11 - bundle/schema/tracker.go | 53 - cmd/bundle/schema.go | 33 +- libs/dyn/dynvar/ref.go | 4 +- libs/jsonschema/from_type.go | 356 + libs/jsonschema/from_type_test.go | 521 ++ libs/jsonschema/schema.go | 42 +- libs/jsonschema/schema_test.go | 90 - libs/jsonschema/test_types/test_types.go | 25 + libs/template/config_test.go | 2 +- 26 files changed, 6731 insertions(+), 9880 deletions(-) delete mode 100644 bundle/internal/bundle/schema/main.go create mode 100644 bundle/internal/schema/main.go create mode 100644 bundle/internal/schema/parser.go delete mode 100644 bundle/schema/README.md delete mode 100644 bundle/schema/docs.go delete mode 100644 bundle/schema/docs/bundle_descriptions.json delete mode 100644 bundle/schema/docs_test.go create mode 100644 bundle/schema/embed.go create mode 100644 bundle/schema/embed_test.go create mode 100644 bundle/schema/jsonschema.json delete mode 100644 bundle/schema/openapi.go delete mode 100644 bundle/schema/openapi_test.go delete mode 100644 bundle/schema/schema.go delete mode 100644 bundle/schema/schema_test.go delete mode 100644 bundle/schema/spec.go delete mode 100644 bundle/schema/tracker.go create mode 100644 libs/jsonschema/from_type.go create mode 100644 libs/jsonschema/from_type_test.go create mode 100644 libs/jsonschema/test_types/test_types.go diff --git a/.codegen.json b/.codegen.json index 8cb42b41..077d072b 100644 --- a/.codegen.json +++ b/.codegen.json @@ -11,10 +11,10 @@ "toolchain": { "required": ["go"], "post_generate": [ - "go run ./bundle/internal/bundle/schema/main.go ./bundle/schema/docs/bundle_descriptions.json", + "go run ./bundle/internal/schema/*.go ./bundle/schema/jsonschema.json", "echo 'bundle/internal/tf/schema/\\*.go linguist-generated=true' >> ./.gitattributes", "echo 'go.sum linguist-generated=true' >> ./.gitattributes", - "echo 'bundle/schema/docs/bundle_descriptions.json linguist-generated=true' >> ./.gitattributes" + "echo 'bundle/schema/jsonschema.json linguist-generated=true' >> ./.gitattributes" ] } } diff --git a/.gitattributes b/.gitattributes index d82ab769..f35c4f81 100755 --- a/.gitattributes +++ b/.gitattributes @@ -120,4 +120,4 @@ cmd/workspace/workspace-conf/workspace-conf.go linguist-generated=true cmd/workspace/workspace/workspace.go linguist-generated=true bundle/internal/tf/schema/\*.go linguist-generated=true go.sum linguist-generated=true -bundle/schema/docs/bundle_descriptions.json linguist-generated=true +bundle/schema/jsonschema.json linguist-generated=true diff --git a/bundle/internal/bundle/schema/main.go b/bundle/internal/bundle/schema/main.go deleted file mode 100644 index c9cc7cd4..00000000 --- a/bundle/internal/bundle/schema/main.go +++ /dev/null @@ -1,42 +0,0 @@ -package main - -import ( - "encoding/json" - "fmt" - "log" - "os" - - "github.com/databricks/cli/bundle/schema" -) - -func main() { - if len(os.Args) != 2 { - fmt.Println("Usage: go run main.go ") - os.Exit(1) - } - - // Output file, to write the generated schema descriptions to. - outputFile := os.Args[1] - - // Input file, the databricks openapi spec. - inputFile := os.Getenv("DATABRICKS_OPENAPI_SPEC") - if inputFile == "" { - log.Fatal("DATABRICKS_OPENAPI_SPEC environment variable not set") - } - - // Generate the schema descriptions. - docs, err := schema.UpdateBundleDescriptions(inputFile) - if err != nil { - log.Fatal(err) - } - result, err := json.MarshalIndent(docs, "", " ") - if err != nil { - log.Fatal(err) - } - - // Write the schema descriptions to the output file. - err = os.WriteFile(outputFile, result, 0644) - if err != nil { - log.Fatal(err) - } -} diff --git a/bundle/internal/schema/main.go b/bundle/internal/schema/main.go new file mode 100644 index 00000000..3c1fb5da --- /dev/null +++ b/bundle/internal/schema/main.go @@ -0,0 +1,93 @@ +package main + +import ( + "encoding/json" + "fmt" + "log" + "os" + "reflect" + + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/variable" + "github.com/databricks/cli/libs/jsonschema" +) + +func interpolationPattern(s string) string { + return fmt.Sprintf(`\$\{(%s(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\[[0-9]+\])*)+)\}`, s) +} + +func addInterpolationPatterns(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { + if typ == reflect.TypeOf(config.Root{}) || typ == reflect.TypeOf(variable.Variable{}) { + return s + } + + switch s.Type { + case jsonschema.ArrayType, jsonschema.ObjectType: + // arrays and objects can have complex variable values specified. + return jsonschema.Schema{ + AnyOf: []jsonschema.Schema{ + s, + { + Type: jsonschema.StringType, + Pattern: interpolationPattern("var"), + }}, + } + case jsonschema.IntegerType, jsonschema.NumberType, jsonschema.BooleanType: + // primitives can have variable values, or references like ${bundle.xyz} + // or ${workspace.xyz} + return jsonschema.Schema{ + AnyOf: []jsonschema.Schema{ + s, + {Type: jsonschema.StringType, Pattern: interpolationPattern("resources")}, + {Type: jsonschema.StringType, Pattern: interpolationPattern("bundle")}, + {Type: jsonschema.StringType, Pattern: interpolationPattern("workspace")}, + {Type: jsonschema.StringType, Pattern: interpolationPattern("artifacts")}, + {Type: jsonschema.StringType, Pattern: interpolationPattern("var")}, + }, + } + default: + return s + } +} + +func main() { + if len(os.Args) != 2 { + fmt.Println("Usage: go run main.go ") + os.Exit(1) + } + + // Output file, where the generated JSON schema will be written to. + outputFile := os.Args[1] + + // Input file, the databricks openapi spec. + inputFile := os.Getenv("DATABRICKS_OPENAPI_SPEC") + if inputFile == "" { + log.Fatal("DATABRICKS_OPENAPI_SPEC environment variable not set") + } + + p, err := newParser(inputFile) + if err != nil { + log.Fatal(err) + } + + // Generate the JSON schema from the bundle Go struct. + s, err := jsonschema.FromType(reflect.TypeOf(config.Root{}), []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{ + p.addDescriptions, + p.addEnums, + addInterpolationPatterns, + }) + if err != nil { + log.Fatal(err) + } + + b, err := json.MarshalIndent(s, "", " ") + if err != nil { + log.Fatal(err) + } + + // Write the schema descriptions to the output file. + err = os.WriteFile(outputFile, b, 0644) + if err != nil { + log.Fatal(err) + } +} diff --git a/bundle/internal/schema/parser.go b/bundle/internal/schema/parser.go new file mode 100644 index 00000000..ef3d6e71 --- /dev/null +++ b/bundle/internal/schema/parser.go @@ -0,0 +1,123 @@ +package main + +import ( + "encoding/json" + "fmt" + "os" + "path" + "reflect" + "strings" + + "github.com/databricks/cli/libs/jsonschema" +) + +type Components struct { + Schemas map[string]jsonschema.Schema `json:"schemas,omitempty"` +} + +type Specification struct { + Components Components `json:"components"` +} + +type openapiParser struct { + ref map[string]jsonschema.Schema +} + +func newParser(path string) (*openapiParser, error) { + b, err := os.ReadFile(path) + if err != nil { + return nil, err + } + + spec := Specification{} + err = json.Unmarshal(b, &spec) + if err != nil { + return nil, err + } + + p := &openapiParser{} + p.ref = spec.Components.Schemas + return p, nil +} + +// This function checks if the input type: +// 1. Is a Databricks Go SDK type. +// 2. Has a Databricks Go SDK type embedded in it. +// +// If the above conditions are met, the function returns the JSON schema +// corresponding to the Databricks Go SDK type from the OpenAPI spec. +func (p *openapiParser) findRef(typ reflect.Type) (jsonschema.Schema, bool) { + typs := []reflect.Type{typ} + + // Check for embedded Databricks Go SDK types. + if typ.Kind() == reflect.Struct { + for i := 0; i < typ.NumField(); i++ { + if !typ.Field(i).Anonymous { + continue + } + + // Deference current type if it's a pointer. + ctyp := typ.Field(i).Type + for ctyp.Kind() == reflect.Ptr { + ctyp = ctyp.Elem() + } + + typs = append(typs, ctyp) + } + } + + for _, ctyp := range typs { + // Skip if it's not a Go SDK type. + if !strings.HasPrefix(ctyp.PkgPath(), "github.com/databricks/databricks-sdk-go") { + continue + } + + pkgName := path.Base(ctyp.PkgPath()) + k := fmt.Sprintf("%s.%s", pkgName, ctyp.Name()) + + // Skip if the type is not in the openapi spec. + _, ok := p.ref[k] + if !ok { + continue + } + + // Return the first Go SDK type found in the openapi spec. + return p.ref[k], true + } + + return jsonschema.Schema{}, false +} + +// Use the OpenAPI spec to load descriptions for the given type. +func (p *openapiParser) addDescriptions(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { + ref, ok := p.findRef(typ) + if !ok { + return s + } + + s.Description = ref.Description + for k, v := range s.Properties { + if refProp, ok := ref.Properties[k]; ok { + v.Description = refProp.Description + } + } + + return s +} + +// Use the OpenAPI spec add enum values for the given type. +func (p *openapiParser) addEnums(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { + ref, ok := p.findRef(typ) + if !ok { + return s + } + + s.Enum = append(s.Enum, ref.Enum...) + for k, v := range s.Properties { + if refProp, ok := ref.Properties[k]; ok { + v.Enum = append(v.Enum, refProp.Enum...) + } + } + + return s +} diff --git a/bundle/schema/README.md b/bundle/schema/README.md deleted file mode 100644 index bf6b87df..00000000 --- a/bundle/schema/README.md +++ /dev/null @@ -1,18 +0,0 @@ -### Overview - -`docs/bundle_descriptions.json` contains both autogenerated as well as manually written -descriptions for the json schema. Specifically -1. `resources` : almost all descriptions are autogenerated from the OpenAPI spec -2. `targets` : almost all descriptions are copied over from root level entities (eg: `bundle`, `artifacts`) -3. `bundle` : manually editted -4. `include` : manually editted -5. `workspace` : manually editted -6. `artifacts` : manually editted - -These descriptions are rendered in the inline documentation in an IDE - -### SOP: Add schema descriptions for new fields in bundle config - -Manually edit bundle_descriptions.json to add your descriptions. Note that the -descriptions in `resources` block is generated from the OpenAPI spec, and thus -any changes there will be overwritten. diff --git a/bundle/schema/docs.go b/bundle/schema/docs.go deleted file mode 100644 index 6e9289f9..00000000 --- a/bundle/schema/docs.go +++ /dev/null @@ -1,109 +0,0 @@ -package schema - -import ( - _ "embed" - "encoding/json" - "fmt" - "os" - "reflect" - - "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/libs/jsonschema" -) - -// A subset of Schema struct -type Docs struct { - Description string `json:"description"` - Properties map[string]*Docs `json:"properties,omitempty"` - Items *Docs `json:"items,omitempty"` - AdditionalProperties *Docs `json:"additionalproperties,omitempty"` -} - -//go:embed docs/bundle_descriptions.json -var bundleDocs []byte - -func (docs *Docs) refreshTargetsDocs() error { - targetsDocs, ok := docs.Properties["targets"] - if !ok || targetsDocs.AdditionalProperties == nil || - targetsDocs.AdditionalProperties.Properties == nil { - return fmt.Errorf("invalid targets descriptions") - } - targetProperties := targetsDocs.AdditionalProperties.Properties - propertiesToCopy := []string{"artifacts", "bundle", "resources", "workspace"} - for _, p := range propertiesToCopy { - targetProperties[p] = docs.Properties[p] - } - return nil -} - -func LoadBundleDescriptions() (*Docs, error) { - embedded := Docs{} - err := json.Unmarshal(bundleDocs, &embedded) - return &embedded, err -} - -func UpdateBundleDescriptions(openapiSpecPath string) (*Docs, error) { - embedded, err := LoadBundleDescriptions() - if err != nil { - return nil, err - } - - // Generate schema from the embedded descriptions, and convert it back to docs. - // This creates empty descriptions for any properties that were missing in the - // embedded descriptions. - schema, err := New(reflect.TypeOf(config.Root{}), embedded) - if err != nil { - return nil, err - } - docs := schemaToDocs(schema) - - // Load the Databricks OpenAPI spec - openapiSpec, err := os.ReadFile(openapiSpecPath) - if err != nil { - return nil, err - } - spec := &Specification{} - err = json.Unmarshal(openapiSpec, spec) - if err != nil { - return nil, err - } - openapiReader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - - // Generate descriptions for the "resources" field - resourcesDocs, err := openapiReader.ResourcesDocs() - if err != nil { - return nil, err - } - resourceSchema, err := New(reflect.TypeOf(config.Resources{}), resourcesDocs) - if err != nil { - return nil, err - } - docs.Properties["resources"] = schemaToDocs(resourceSchema) - docs.refreshTargetsDocs() - return docs, nil -} - -// *Docs are a subset of *Schema, this function selects that subset -func schemaToDocs(jsonSchema *jsonschema.Schema) *Docs { - // terminate recursion if schema is nil - if jsonSchema == nil { - return nil - } - docs := &Docs{ - Description: jsonSchema.Description, - } - if len(jsonSchema.Properties) > 0 { - docs.Properties = make(map[string]*Docs) - } - for k, v := range jsonSchema.Properties { - docs.Properties[k] = schemaToDocs(v) - } - docs.Items = schemaToDocs(jsonSchema.Items) - if additionalProperties, ok := jsonSchema.AdditionalProperties.(*jsonschema.Schema); ok { - docs.AdditionalProperties = schemaToDocs(additionalProperties) - } - return docs -} diff --git a/bundle/schema/docs/bundle_descriptions.json b/bundle/schema/docs/bundle_descriptions.json deleted file mode 100644 index f03a4480..00000000 --- a/bundle/schema/docs/bundle_descriptions.json +++ /dev/null @@ -1,6447 +0,0 @@ -{ - "description": "", - "properties": { - "artifacts": { - "description": "", - "additionalproperties": { - "description": "", - "properties": { - "build": { - "description": "" - }, - "executable": { - "description": "" - }, - "files": { - "description": "", - "items": { - "description": "", - "properties": { - "source": { - "description": "" - } - } - } - }, - "path": { - "description": "" - }, - "type": { - "description": "" - } - } - } - }, - "bundle": { - "description": "", - "properties": { - "compute_id": { - "description": "" - }, - "databricks_cli_version": { - "description": "" - }, - "deployment": { - "description": "", - "properties": { - "fail_on_active_runs": { - "description": "" - }, - "lock": { - "description": "", - "properties": { - "enabled": { - "description": "" - }, - "force": { - "description": "" - } - } - } - } - }, - "git": { - "description": "", - "properties": { - "branch": { - "description": "" - }, - "origin_url": { - "description": "" - } - } - }, - "name": { - "description": "" - } - } - }, - "experimental": { - "description": "", - "properties": { - "pydabs": { - "description": "", - "properties": { - "enabled": { - "description": "" - }, - "import": { - "description": "", - "items": { - "description": "" - } - }, - "venv_path": { - "description": "" - } - } - }, - "python_wheel_wrapper": { - "description": "" - }, - "scripts": { - "description": "", - "additionalproperties": { - "description": "" - } - }, - "use_legacy_run_as": { - "description": "" - } - } - }, - "include": { - "description": "", - "items": { - "description": "" - } - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "presets": { - "description": "", - "properties": { - "jobs_max_concurrent_runs": { - "description": "" - }, - "name_prefix": { - "description": "" - }, - "pipelines_development": { - "description": "" - }, - "tags": { - "description": "", - "additionalproperties": { - "description": "" - } - }, - "trigger_pause_status": { - "description": "" - } - } - }, - "resources": { - "description": "Collection of Databricks resources to deploy.", - "properties": { - "experiments": { - "description": "List of MLflow experiments", - "additionalproperties": { - "description": "", - "properties": { - "artifact_location": { - "description": "Location where artifacts for the experiment are stored." - }, - "creation_time": { - "description": "Creation time" - }, - "experiment_id": { - "description": "Unique identifier for the experiment." - }, - "last_update_time": { - "description": "Last update time" - }, - "lifecycle_stage": { - "description": "Current life cycle stage of the experiment: \"active\" or \"deleted\".\nDeleted experiments are not returned by APIs." - }, - "name": { - "description": "Human readable name that identifies the experiment." - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "tags": { - "description": "Tags: Additional metadata key-value pairs.", - "items": { - "description": "", - "properties": { - "key": { - "description": "The tag key." - }, - "value": { - "description": "The tag value." - } - } - } - } - } - } - }, - "jobs": { - "description": "List of Databricks jobs", - "additionalproperties": { - "description": "", - "properties": { - "continuous": { - "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", - "properties": { - "pause_status": { - "description": "Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED." - } - } - }, - "deployment": { - "description": "Deployment information for jobs managed by external sources.", - "properties": { - "kind": { - "description": "The kind of deployment that manages the job.\n\n* `BUNDLE`: The job is managed by Databricks Asset Bundle." - }, - "metadata_file_path": { - "description": "Path of the file that contains deployment metadata." - } - } - }, - "description": { - "description": "An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding." - }, - "edit_mode": { - "description": "Edit mode of the job.\n\n* `UI_LOCKED`: The job is in a locked UI state and cannot be modified.\n* `EDITABLE`: The job is in an editable state and can be modified." - }, - "email_notifications": { - "description": "An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted.", - "properties": { - "no_alert_for_skipped_runs": { - "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." - }, - "on_duration_warning_threshold_exceeded": { - "description": "A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent.", - "items": { - "description": "" - } - }, - "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - }, - "on_start": { - "description": "A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - }, - "on_streaming_backlog_exceeded": { - "description": "A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.", - "items": { - "description": "" - } - }, - "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - } - } - }, - "environments": { - "description": "A list of task execution environment specifications that can be referenced by tasks of this job.", - "items": { - "description": "", - "properties": { - "environment_key": { - "description": "The key of an environment. It has to be unique within a job." - }, - "spec": { - "description": "", - "properties": { - "client": { - "description": "Client version used by the environment\nThe client is the user-facing environment of the runtime.\nEach client comes with a specific set of pre-installed libraries.\nThe version is a string, consisting of the major client version." - }, - "dependencies": { - "description": "List of pip dependencies, as supported by the version of pip in this environment.\nEach dependency is a pip requirement file line https://pip.pypa.io/en/stable/reference/requirements-file-format/\nAllowed dependency could be \u003crequirement specifier\u003e, \u003carchive url/path\u003e, \u003clocal project path\u003e(WSFS or Volumes in Databricks), \u003cvcs project url\u003e\nE.g. dependencies: [\"foo==0.0.1\", \"-r /Workspace/test/requirements.txt\"]", - "items": { - "description": "" - } - } - } - } - } - } - }, - "format": { - "description": "Used to tell what is the format of the job. This field is ignored in Create/Update/Reset calls. When using the Jobs API 2.1 this value is always set to `\"MULTI_TASK\"`." - }, - "git_source": { - "description": "An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks.\n\nIf `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task.\n\nNote: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job.", - "properties": { - "git_branch": { - "description": "Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit." - }, - "git_commit": { - "description": "Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag." - }, - "git_provider": { - "description": "Unique identifier of the service used to host the Git repository. The value is case insensitive." - }, - "git_snapshot": { - "description": "", - "properties": { - "used_commit": { - "description": "Commit that was used to execute the run. If git_branch was specified, this points to the HEAD of the branch at the time of the run; if git_tag was specified, this points to the commit the tag points to." - } - } - }, - "git_tag": { - "description": "Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit." - }, - "git_url": { - "description": "URL of the repository to be cloned by this job." - }, - "job_source": { - "description": "The source of the job specification in the remote repository when the job is source controlled.", - "properties": { - "dirty_state": { - "description": "Dirty state indicates the job is not fully synced with the job specification in the remote repository.\n\nPossible values are:\n* `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced.\n* `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced." - }, - "import_from_git_branch": { - "description": "Name of the branch which the job is imported from." - }, - "job_config_path": { - "description": "Path of the job YAML file that contains the job specification." - } - } - } - } - }, - "health": { - "description": "", - "properties": { - "rules": { - "description": "", - "items": { - "description": "", - "properties": { - "metric": { - "description": "" - }, - "op": { - "description": "" - }, - "value": { - "description": "Specifies the threshold value that the health metric should obey to satisfy the health rule." - } - } - } - } - } - }, - "job_clusters": { - "description": "A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings.", - "items": { - "description": "", - "properties": { - "job_cluster_key": { - "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution." - }, - "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", - "properties": { - "apply_policy_default_values": { - "description": "When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied." - }, - "autoscale": { - "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", - "properties": { - "max_workers": { - "description": "The maximum number of workers to which the cluster can scale up when overloaded.\nNote that `max_workers` must be strictly greater than `min_workers`." - }, - "min_workers": { - "description": "The minimum number of workers to which the cluster can scale down when underutilized.\nIt is also the initial number of workers the cluster will have after creation." - } - } - }, - "autotermination_minutes": { - "description": "Automatically terminates the cluster after it is inactive for this time in minutes. If not set,\nthis cluster will not be automatically terminated. If specified, the threshold must be between\n10 and 10000 minutes.\nUsers can also set this value to 0 to explicitly disable automatic termination." - }, - "aws_attributes": { - "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "ebs_volume_count": { - "description": "The number of volumes launched for each instance. Users can choose up to 10 volumes.\nThis feature is only enabled for supported node types. Legacy node types cannot specify\ncustom EBS volumes.\nFor node types with no instance store, at least one EBS volume needs to be specified;\notherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.\nInstance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for\nscratch storage because heterogenously sized scratch devices can lead to inefficient disk\nutilization. If no EBS volumes are attached, Databricks will configure Spark to use instance\nstore volumes.\n\nPlease note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`\nwill be overridden." - }, - "ebs_volume_iops": { - "description": "If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_size": { - "description": "The size of each EBS volume (in GiB) launched for each instance. For general purpose\nSSD, this value must be within the range 100 - 4096. For throughput optimized HDD,\nthis value must be within the range 500 - 4096." - }, - "ebs_volume_throughput": { - "description": "If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_type": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nIf this value is greater than 0, the cluster driver node in particular will be placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "instance_profile_arn": { - "description": "Nodes for this cluster will only be placed on AWS instances with this instance profile. If\nommitted, nodes will be placed on instances without an IAM instance profile. The instance\nprofile must have previously been added to the Databricks environment by an account\nadministrator.\n\nThis feature may only be available to certain customer plans.\n\nIf this field is ommitted, we will pull in the default from the conf if it exists." - }, - "spot_bid_price_percent": { - "description": "The bid price for AWS spot instances, as a percentage of the corresponding instance type's\non-demand price.\nFor example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot\ninstance, then the bid price is half of the price of\non-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice\nthe price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.\nWhen spot instances are requested for this cluster, only spot instances whose bid price\npercentage matches this field will be considered.\nNote that, for safety, we enforce this field to be no more than 10000.\n\nThe default value and documentation here should be kept consistent with\nCommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent." - }, - "zone_id": { - "description": "Identifier for the availability zone/datacenter in which the cluster resides.\nThis string will be of a form like \"us-west-2a\". The provided availability\nzone must be in the same region as the Databricks deployment. For example, \"us-west-2a\"\nis not a valid zone id if the Databricks deployment resides in the \"us-east-1\" region.\nThis is an optional field at cluster creation, and if not specified, a default zone will be used.\nIf the zone specified is \"auto\", will try to place cluster in a zone with high availability,\nand will retry placement in a different AZ if there is not enough capacity.\nThe list of available zones as well as the default value can be found by using the\n`List Zones` method." - } - } - }, - "azure_attributes": { - "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nThis value should be greater than 0, to make sure the cluster driver node is placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "log_analytics_info": { - "description": "Defines values necessary to configure and run Azure Log Analytics agent", - "properties": { - "log_analytics_primary_key": { - "description": "\u003cneeds content added\u003e" - }, - "log_analytics_workspace_id": { - "description": "\u003cneeds content added\u003e" - } - } - }, - "spot_bid_max_price": { - "description": "The max bid price to be used for Azure spot instances.\nThe Max price for the bid cannot be higher than the on-demand price of the instance.\nIf not specified, the default value is -1, which specifies that the instance cannot be evicted\non the basis of price, and only on the basis of availability. Further, the value should \u003e 0 or -1." - } - } - }, - "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nTwo kinds of destinations (dbfs and s3) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", - "properties": { - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - } - } - }, - "cluster_name": { - "description": "Cluster name requested by the user. This doesn't have to be unique.\nIf not specified at creation, the cluster name will be an empty string.\n" - }, - "custom_tags": { - "description": "Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS\ninstances and EBS volumes) with these tags in addition to `default_tags`. Notes:\n\n- Currently, Databricks allows at most 45 custom tags\n\n- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags", - "additionalproperties": { - "description": "" - } - }, - "data_security_mode": { - "description": "" - }, - "docker_image": { - "description": "", - "properties": { - "basic_auth": { - "description": "", - "properties": { - "password": { - "description": "Password of the user" - }, - "username": { - "description": "Name of the user" - } - } - }, - "url": { - "description": "URL of the docker image." - } - } - }, - "driver_instance_pool_id": { - "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." - }, - "driver_node_type_id": { - "description": "The node type of the Spark driver. Note that this field is optional;\nif unset, the driver node type will be set as the same value\nas `node_type_id` defined above.\n" - }, - "enable_elastic_disk": { - "description": "Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk\nspace when its Spark workers are running low on disk space. This feature requires specific AWS\npermissions to function correctly - refer to the User Guide for more details." - }, - "enable_local_disk_encryption": { - "description": "Whether to enable LUKS on cluster VMs' local disks" - }, - "gcp_attributes": { - "description": "Attributes related to clusters running on Google Cloud Platform.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "boot_disk_size": { - "description": "boot disk size in GB" - }, - "google_service_account": { - "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." - }, - "local_ssd_count": { - "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." - }, - "use_preemptible_executors": { - "description": "This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default).\nNote: Soon to be deprecated, use the availability field instead." - }, - "zone_id": { - "description": "Identifier for the availability zone in which the cluster resides.\nThis can be one of the following:\n- \"HA\" =\u003e High availability, spread nodes across availability zones for a Databricks deployment region [default]\n- \"AUTO\" =\u003e Databricks picks an availability zone to schedule the cluster on.\n- A GCP availability zone =\u003e Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones." - } - } - }, - "init_scripts": { - "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", - "items": { - "description": "", - "properties": { - "abfss": { - "description": "destination needs to be provided. e.g.\n`{ \"abfss\" : { \"destination\" : \"abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e\" } }", - "properties": { - "destination": { - "description": "abfss destination, e.g. `abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e`." - } - } - }, - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "file": { - "description": "destination needs to be provided. e.g.\n`{ \"file\" : { \"destination\" : \"file:/my/local/file.sh\" } }`", - "properties": { - "destination": { - "description": "local file destination, e.g. `file:/my/local/file.sh`" - } - } - }, - "gcs": { - "description": "destination needs to be provided. e.g.\n`{ \"gcs\": { \"destination\": \"gs://my-bucket/file.sh\" } }`", - "properties": { - "destination": { - "description": "GCS destination/URI, e.g. `gs://my-bucket/some-prefix`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - }, - "volumes": { - "description": "destination needs to be provided. e.g.\n`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`" - } - } - }, - "workspace": { - "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" - } - } - } - } - } - }, - "instance_pool_id": { - "description": "The optional ID of the instance pool to which the cluster belongs." - }, - "node_type_id": { - "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n" - }, - "num_workers": { - "description": "Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned." - }, - "policy_id": { - "description": "The ID of the cluster policy used to create the cluster if applicable." - }, - "runtime_engine": { - "description": "" - }, - "single_user_name": { - "description": "Single user name if data_security_mode is `SINGLE_USER`" - }, - "spark_conf": { - "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", - "additionalproperties": { - "description": "" - } - }, - "spark_env_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs.\nPlease note that key-value pair of the form (X,Y) will be exported as is (i.e.,\n`export X='Y'`) while launching the driver and workers.\n\nIn order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending\nthem to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all\ndefault databricks managed environmental variables are included as well.\n\nExample Spark environment variables:\n`{\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"}` or\n`{\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true\"}`", - "additionalproperties": { - "description": "" - } - }, - "spark_version": { - "description": "The Spark version of the cluster, e.g. `3.3.x-scala2.11`.\nA list of available Spark versions can be retrieved by using\nthe :method:clusters/sparkVersions API call.\n" - }, - "ssh_public_keys": { - "description": "SSH public key contents that will be added to each Spark node in this cluster. The\ncorresponding private keys can be used to login with the user name `ubuntu` on port `2200`.\nUp to 10 keys can be specified.", - "items": { - "description": "" - } - }, - "workload_type": { - "description": "", - "properties": { - "clients": { - "description": " defined what type of clients can use the cluster. E.g. Notebooks, Jobs", - "properties": { - "jobs": { - "description": "With jobs set, the cluster can be used for jobs" - }, - "notebooks": { - "description": "With notebooks set, this cluster can be used for notebooks" - } - } - } - } - } - } - } - } - } - }, - "max_concurrent_runs": { - "description": "An optional maximum allowed number of concurrent runs of the job.\nSet this value if you want to be able to execute multiple runs of the same job concurrently.\nThis is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters.\nThis setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs.\nHowever, from then on, new runs are skipped unless there are fewer than 3 active runs.\nThis value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped." - }, - "name": { - "description": "An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding." - }, - "notification_settings": { - "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job.", - "properties": { - "no_alert_for_canceled_runs": { - "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." - }, - "no_alert_for_skipped_runs": { - "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." - } - } - }, - "parameters": { - "description": "Job-level parameter definitions", - "items": { - "description": "", - "properties": { - "default": { - "description": "Default value of the parameter." - }, - "name": { - "description": "The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.`" - } - } - } - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "queue": { - "description": "The queue settings of the job.", - "properties": { - "enabled": { - "description": "If true, enable queueing for the job. This is a required field." - } - } - }, - "run_as": { - "description": "", - "properties": { - "service_principal_name": { - "description": "Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role." - }, - "user_name": { - "description": "The email of an active workspace user. Non-admin users can only set this field to their own email." - } - } - }, - "schedule": { - "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", - "properties": { - "pause_status": { - "description": "Indicate whether this schedule is paused or not." - }, - "quartz_cron_expression": { - "description": "A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required." - }, - "timezone_id": { - "description": "A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required." - } - } - }, - "tags": { - "description": "A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job.", - "additionalproperties": { - "description": "" - } - }, - "tasks": { - "description": "A list of task specifications to be executed by this job.", - "items": { - "description": "", - "properties": { - "condition_task": { - "description": "If condition_task, specifies a condition with an outcome that can be used to control the execution of other tasks. Does not require a cluster to execute and does not support retries or notifications.", - "properties": { - "left": { - "description": "The left operand of the condition task. Can be either a string value or a job state or parameter reference." - }, - "op": { - "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison." - }, - "right": { - "description": "The right operand of the condition task. Can be either a string value or a job state or parameter reference." - } - } - }, - "dbt_task": { - "description": "If dbt_task, indicates that this must execute a dbt task. It requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse.", - "properties": { - "catalog": { - "description": "Optional name of the catalog to use. The value is the top level in the 3-level namespace of Unity Catalog (catalog / schema / relation). The catalog value can only be specified if a warehouse_id is specified. Requires dbt-databricks \u003e= 1.1.1." - }, - "commands": { - "description": "A list of dbt commands to execute. All commands must start with `dbt`. This parameter must not be empty. A maximum of up to 10 commands can be provided.", - "items": { - "description": "" - } - }, - "profiles_directory": { - "description": "Optional (relative) path to the profiles directory. Can only be specified if no warehouse_id is specified. If no warehouse_id is specified and this folder is unset, the root directory is used." - }, - "project_directory": { - "description": "Path to the project directory. Optional for Git sourced tasks, in which\ncase if no value is provided, the root of the Git repository is used." - }, - "schema": { - "description": "Optional schema to write to. This parameter is only used when a warehouse_id is also provided. If not provided, the `default` schema is used." - }, - "source": { - "description": "Optional location type of the project directory. When set to `WORKSPACE`, the project will be retrieved\nfrom the local Databricks workspace. When set to `GIT`, the project will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: Project is located in Databricks workspace.\n* `GIT`: Project is located in cloud Git provider." - }, - "warehouse_id": { - "description": "ID of the SQL warehouse to connect to. If provided, we automatically generate and provide the profile and connection details to dbt. It can be overridden on a per-command basis by using the `--profiles-dir` command line argument." - } - } - }, - "depends_on": { - "description": "An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true.\nThe key is `task_key`, and the value is the name assigned to the dependent task.", - "items": { - "description": "", - "properties": { - "outcome": { - "description": "Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run." - }, - "task_key": { - "description": "The name of the task this task depends on." - } - } - } - }, - "description": { - "description": "An optional description for this task." - }, - "disable_auto_optimization": { - "description": "An option to disable auto optimization in serverless" - }, - "email_notifications": { - "description": "An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails.", - "properties": { - "no_alert_for_skipped_runs": { - "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." - }, - "on_duration_warning_threshold_exceeded": { - "description": "A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent.", - "items": { - "description": "" - } - }, - "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - }, - "on_start": { - "description": "A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - }, - "on_streaming_backlog_exceeded": { - "description": "A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.", - "items": { - "description": "" - } - }, - "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - } - } - }, - "environment_key": { - "description": "The key that references an environment spec in a job. This field is required for Python script, Python wheel and dbt tasks when using serverless compute." - }, - "existing_cluster_id": { - "description": "If existing_cluster_id, the ID of an existing cluster that is used for all runs.\nWhen running jobs or tasks on an existing cluster, you may need to manually restart\nthe cluster if it stops responding. We suggest running jobs and tasks on new clusters for\ngreater reliability" - }, - "for_each_task": { - "description": "" - }, - "health": { - "description": "", - "properties": { - "rules": { - "description": "", - "items": { - "description": "", - "properties": { - "metric": { - "description": "" - }, - "op": { - "description": "" - }, - "value": { - "description": "Specifies the threshold value that the health metric should obey to satisfy the health rule." - } - } - } - } - } - }, - "job_cluster_key": { - "description": "If job_cluster_key, this task is executed reusing the cluster specified in `job.settings.job_clusters`." - }, - "libraries": { - "description": "An optional list of libraries to be installed on the cluster.\nThe default value is an empty list.", - "items": { - "description": "", - "properties": { - "cran": { - "description": "Specification of a CRAN library to be installed as part of the library", - "properties": { - "package": { - "description": "The name of the CRAN package to install." - }, - "repo": { - "description": "The repository where the package can be found. If not specified, the default CRAN repo is used." - } - } - }, - "egg": { - "description": "Deprecated. URI of the egg library to install. Installing Python egg files is deprecated and is not supported in Databricks Runtime 14.0 and above." - }, - "jar": { - "description": "URI of the JAR library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.\nFor example: `{ \"jar\": \"/Workspace/path/to/library.jar\" }`, `{ \"jar\" : \"/Volumes/path/to/library.jar\" }` or\n`{ \"jar\": \"s3://my-bucket/library.jar\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." - }, - "maven": { - "description": "Specification of a maven library to be installed. For example:\n`{ \"coordinates\": \"org.jsoup:jsoup:1.7.2\" }`", - "properties": { - "coordinates": { - "description": "Gradle-style maven coordinates. For example: \"org.jsoup:jsoup:1.7.2\"." - }, - "exclusions": { - "description": "List of dependences to exclude. For example: `[\"slf4j:slf4j\", \"*:hadoop-client\"]`.\n\nMaven dependency exclusions:\nhttps://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html.", - "items": { - "description": "" - } - }, - "repo": { - "description": "Maven repo to install the Maven package from. If omitted, both Maven Central Repository\nand Spark Packages are searched." - } - } - }, - "pypi": { - "description": "Specification of a PyPi library to be installed. For example:\n`{ \"package\": \"simplejson\" }`", - "properties": { - "package": { - "description": "The name of the pypi package to install. An optional exact version specification is also\nsupported. Examples: \"simplejson\" and \"simplejson==3.8.0\"." - }, - "repo": { - "description": "The repository where the package can be found. If not specified, the default pip index is\nused." - } - } - }, - "requirements": { - "description": "URI of the requirements.txt file to install. Only Workspace paths and Unity Catalog Volumes paths are supported.\nFor example: `{ \"requirements\": \"/Workspace/path/to/requirements.txt\" }` or `{ \"requirements\" : \"/Volumes/path/to/requirements.txt\" }`" - }, - "whl": { - "description": "URI of the wheel library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.\nFor example: `{ \"whl\": \"/Workspace/path/to/library.whl\" }`, `{ \"whl\" : \"/Volumes/path/to/library.whl\" }` or\n`{ \"whl\": \"s3://my-bucket/library.whl\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." - } - } - } - }, - "max_retries": { - "description": "An optional maximum number of times to retry an unsuccessful run. A run is considered to be unsuccessful if it completes with the `FAILED` result_state or `INTERNAL_ERROR` `life_cycle_state`. The value `-1` means to retry indefinitely and the value `0` means to never retry." - }, - "min_retry_interval_millis": { - "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried." - }, - "new_cluster": { - "description": "If new_cluster, a description of a new cluster that is created for each run.", - "properties": { - "apply_policy_default_values": { - "description": "When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied." - }, - "autoscale": { - "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", - "properties": { - "max_workers": { - "description": "The maximum number of workers to which the cluster can scale up when overloaded.\nNote that `max_workers` must be strictly greater than `min_workers`." - }, - "min_workers": { - "description": "The minimum number of workers to which the cluster can scale down when underutilized.\nIt is also the initial number of workers the cluster will have after creation." - } - } - }, - "autotermination_minutes": { - "description": "Automatically terminates the cluster after it is inactive for this time in minutes. If not set,\nthis cluster will not be automatically terminated. If specified, the threshold must be between\n10 and 10000 minutes.\nUsers can also set this value to 0 to explicitly disable automatic termination." - }, - "aws_attributes": { - "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "ebs_volume_count": { - "description": "The number of volumes launched for each instance. Users can choose up to 10 volumes.\nThis feature is only enabled for supported node types. Legacy node types cannot specify\ncustom EBS volumes.\nFor node types with no instance store, at least one EBS volume needs to be specified;\notherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.\nInstance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for\nscratch storage because heterogenously sized scratch devices can lead to inefficient disk\nutilization. If no EBS volumes are attached, Databricks will configure Spark to use instance\nstore volumes.\n\nPlease note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`\nwill be overridden." - }, - "ebs_volume_iops": { - "description": "If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_size": { - "description": "The size of each EBS volume (in GiB) launched for each instance. For general purpose\nSSD, this value must be within the range 100 - 4096. For throughput optimized HDD,\nthis value must be within the range 500 - 4096." - }, - "ebs_volume_throughput": { - "description": "If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_type": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nIf this value is greater than 0, the cluster driver node in particular will be placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "instance_profile_arn": { - "description": "Nodes for this cluster will only be placed on AWS instances with this instance profile. If\nommitted, nodes will be placed on instances without an IAM instance profile. The instance\nprofile must have previously been added to the Databricks environment by an account\nadministrator.\n\nThis feature may only be available to certain customer plans.\n\nIf this field is ommitted, we will pull in the default from the conf if it exists." - }, - "spot_bid_price_percent": { - "description": "The bid price for AWS spot instances, as a percentage of the corresponding instance type's\non-demand price.\nFor example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot\ninstance, then the bid price is half of the price of\non-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice\nthe price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.\nWhen spot instances are requested for this cluster, only spot instances whose bid price\npercentage matches this field will be considered.\nNote that, for safety, we enforce this field to be no more than 10000.\n\nThe default value and documentation here should be kept consistent with\nCommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent." - }, - "zone_id": { - "description": "Identifier for the availability zone/datacenter in which the cluster resides.\nThis string will be of a form like \"us-west-2a\". The provided availability\nzone must be in the same region as the Databricks deployment. For example, \"us-west-2a\"\nis not a valid zone id if the Databricks deployment resides in the \"us-east-1\" region.\nThis is an optional field at cluster creation, and if not specified, a default zone will be used.\nIf the zone specified is \"auto\", will try to place cluster in a zone with high availability,\nand will retry placement in a different AZ if there is not enough capacity.\nThe list of available zones as well as the default value can be found by using the\n`List Zones` method." - } - } - }, - "azure_attributes": { - "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nThis value should be greater than 0, to make sure the cluster driver node is placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "log_analytics_info": { - "description": "Defines values necessary to configure and run Azure Log Analytics agent", - "properties": { - "log_analytics_primary_key": { - "description": "\u003cneeds content added\u003e" - }, - "log_analytics_workspace_id": { - "description": "\u003cneeds content added\u003e" - } - } - }, - "spot_bid_max_price": { - "description": "The max bid price to be used for Azure spot instances.\nThe Max price for the bid cannot be higher than the on-demand price of the instance.\nIf not specified, the default value is -1, which specifies that the instance cannot be evicted\non the basis of price, and only on the basis of availability. Further, the value should \u003e 0 or -1." - } - } - }, - "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nTwo kinds of destinations (dbfs and s3) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", - "properties": { - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - } - } - }, - "cluster_name": { - "description": "Cluster name requested by the user. This doesn't have to be unique.\nIf not specified at creation, the cluster name will be an empty string.\n" - }, - "custom_tags": { - "description": "Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS\ninstances and EBS volumes) with these tags in addition to `default_tags`. Notes:\n\n- Currently, Databricks allows at most 45 custom tags\n\n- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags", - "additionalproperties": { - "description": "" - } - }, - "data_security_mode": { - "description": "" - }, - "docker_image": { - "description": "", - "properties": { - "basic_auth": { - "description": "", - "properties": { - "password": { - "description": "Password of the user" - }, - "username": { - "description": "Name of the user" - } - } - }, - "url": { - "description": "URL of the docker image." - } - } - }, - "driver_instance_pool_id": { - "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." - }, - "driver_node_type_id": { - "description": "The node type of the Spark driver. Note that this field is optional;\nif unset, the driver node type will be set as the same value\nas `node_type_id` defined above.\n" - }, - "enable_elastic_disk": { - "description": "Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk\nspace when its Spark workers are running low on disk space. This feature requires specific AWS\npermissions to function correctly - refer to the User Guide for more details." - }, - "enable_local_disk_encryption": { - "description": "Whether to enable LUKS on cluster VMs' local disks" - }, - "gcp_attributes": { - "description": "Attributes related to clusters running on Google Cloud Platform.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "boot_disk_size": { - "description": "boot disk size in GB" - }, - "google_service_account": { - "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." - }, - "local_ssd_count": { - "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." - }, - "use_preemptible_executors": { - "description": "This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default).\nNote: Soon to be deprecated, use the availability field instead." - }, - "zone_id": { - "description": "Identifier for the availability zone in which the cluster resides.\nThis can be one of the following:\n- \"HA\" =\u003e High availability, spread nodes across availability zones for a Databricks deployment region [default]\n- \"AUTO\" =\u003e Databricks picks an availability zone to schedule the cluster on.\n- A GCP availability zone =\u003e Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones." - } - } - }, - "init_scripts": { - "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", - "items": { - "description": "", - "properties": { - "abfss": { - "description": "destination needs to be provided. e.g.\n`{ \"abfss\" : { \"destination\" : \"abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e\" } }", - "properties": { - "destination": { - "description": "abfss destination, e.g. `abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e`." - } - } - }, - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "file": { - "description": "destination needs to be provided. e.g.\n`{ \"file\" : { \"destination\" : \"file:/my/local/file.sh\" } }`", - "properties": { - "destination": { - "description": "local file destination, e.g. `file:/my/local/file.sh`" - } - } - }, - "gcs": { - "description": "destination needs to be provided. e.g.\n`{ \"gcs\": { \"destination\": \"gs://my-bucket/file.sh\" } }`", - "properties": { - "destination": { - "description": "GCS destination/URI, e.g. `gs://my-bucket/some-prefix`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - }, - "volumes": { - "description": "destination needs to be provided. e.g.\n`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`" - } - } - }, - "workspace": { - "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" - } - } - } - } - } - }, - "instance_pool_id": { - "description": "The optional ID of the instance pool to which the cluster belongs." - }, - "node_type_id": { - "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n" - }, - "num_workers": { - "description": "Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned." - }, - "policy_id": { - "description": "The ID of the cluster policy used to create the cluster if applicable." - }, - "runtime_engine": { - "description": "" - }, - "single_user_name": { - "description": "Single user name if data_security_mode is `SINGLE_USER`" - }, - "spark_conf": { - "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", - "additionalproperties": { - "description": "" - } - }, - "spark_env_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs.\nPlease note that key-value pair of the form (X,Y) will be exported as is (i.e.,\n`export X='Y'`) while launching the driver and workers.\n\nIn order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending\nthem to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all\ndefault databricks managed environmental variables are included as well.\n\nExample Spark environment variables:\n`{\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"}` or\n`{\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true\"}`", - "additionalproperties": { - "description": "" - } - }, - "spark_version": { - "description": "The Spark version of the cluster, e.g. `3.3.x-scala2.11`.\nA list of available Spark versions can be retrieved by using\nthe :method:clusters/sparkVersions API call.\n" - }, - "ssh_public_keys": { - "description": "SSH public key contents that will be added to each Spark node in this cluster. The\ncorresponding private keys can be used to login with the user name `ubuntu` on port `2200`.\nUp to 10 keys can be specified.", - "items": { - "description": "" - } - }, - "workload_type": { - "description": "", - "properties": { - "clients": { - "description": " defined what type of clients can use the cluster. E.g. Notebooks, Jobs", - "properties": { - "jobs": { - "description": "With jobs set, the cluster can be used for jobs" - }, - "notebooks": { - "description": "With notebooks set, this cluster can be used for notebooks" - } - } - } - } - } - } - }, - "notebook_task": { - "description": "If notebook_task, indicates that this task must run a notebook. This field may not be specified in conjunction with spark_jar_task.", - "properties": { - "base_parameters": { - "description": "Base parameters to be used for each run of this job. If the run is initiated by a call to :method:jobs/run\nNow with parameters specified, the two parameters maps are merged. If the same key is specified in\n`base_parameters` and in `run-now`, the value from `run-now` is used.\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nIf the notebook takes a parameter that is not specified in the job’s `base_parameters` or the `run-now` override parameters,\nthe default value from the notebook is used.\n\nRetrieve these parameters in a notebook using [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets).\n\nThe JSON representation of this field cannot exceed 1MB.", - "additionalproperties": { - "description": "" - } - }, - "notebook_path": { - "description": "The path of the notebook to be run in the Databricks workspace or remote repository.\nFor notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash.\nFor notebooks stored in a remote repository, the path must be relative. This field is required." - }, - "source": { - "description": "Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved from the local Databricks workspace. When set to `GIT`, the notebook will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n* `WORKSPACE`: Notebook is located in Databricks workspace.\n* `GIT`: Notebook is located in cloud Git provider." - }, - "warehouse_id": { - "description": "Optional `warehouse_id` to run the notebook on a SQL warehouse. Classic SQL warehouses are NOT supported, please use serverless or pro SQL warehouses.\n\nNote that SQL warehouses only support SQL cells; if the notebook contains non-SQL cells, the run will fail." - } - } - }, - "notification_settings": { - "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this task.", - "properties": { - "alert_on_last_attempt": { - "description": "If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run." - }, - "no_alert_for_canceled_runs": { - "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." - }, - "no_alert_for_skipped_runs": { - "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." - } - } - }, - "pipeline_task": { - "description": "If pipeline_task, indicates that this task must execute a Pipeline.", - "properties": { - "full_refresh": { - "description": "If true, triggers a full refresh on the delta live table." - }, - "pipeline_id": { - "description": "The full name of the pipeline task to execute." - } - } - }, - "python_wheel_task": { - "description": "If python_wheel_task, indicates that this job must execute a PythonWheel.", - "properties": { - "entry_point": { - "description": "Named entry point to use, if it does not exist in the metadata of the package it executes the function from the package directly using `$packageName.$entryPoint()`" - }, - "named_parameters": { - "description": "Command-line parameters passed to Python wheel task in the form of `[\"--name=task\", \"--data=dbfs:/path/to/data.json\"]`. Leave it empty if `parameters` is not null.", - "additionalproperties": { - "description": "" - } - }, - "package_name": { - "description": "Name of the package to execute" - }, - "parameters": { - "description": "Command-line parameters passed to Python wheel task. Leave it empty if `named_parameters` is not null.", - "items": { - "description": "" - } - } - } - }, - "retry_on_timeout": { - "description": "An optional policy to specify whether to retry a job when it times out. The default behavior\nis to not retry on timeout." - }, - "run_if": { - "description": "An optional value specifying the condition determining whether the task is run once its dependencies have been completed.\n\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies have been completed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed" - }, - "run_job_task": { - "description": "If run_job_task, indicates that this task must execute another job.", - "properties": { - "dbt_commands": { - "description": "An array of commands to execute for jobs with the dbt task, for example `\"dbt_commands\": [\"dbt deps\", \"dbt seed\", \"dbt deps\", \"dbt seed\", \"dbt run\"]`", - "items": { - "description": "" - } - }, - "jar_params": { - "description": "A list of parameters for jobs with Spark JAR tasks, for example `\"jar_params\": [\"john doe\", \"35\"]`.\nThe parameters are used to invoke the main function of the main class specified in the Spark JAR task.\nIf not specified upon `run-now`, it defaults to an empty list.\njar_params cannot be specified in conjunction with notebook_params.\nThe JSON representation of this field (for example `{\"jar_params\":[\"john doe\",\"35\"]}`) cannot exceed 10,000 bytes.\n\nUse [Task parameter variables](/jobs.html\\\"#parameter-variables\\\") to set parameters containing information about job runs.", - "items": { - "description": "" - } - }, - "job_id": { - "description": "ID of the job to trigger." - }, - "job_parameters": { - "description": "Job-level parameters used to trigger the job.", - "additionalproperties": { - "description": "" - } - }, - "notebook_params": { - "description": "A map from keys to values for jobs with notebook task, for example `\"notebook_params\": {\"name\": \"john doe\", \"age\": \"35\"}`.\nThe map is passed to the notebook and is accessible through the [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html) function.\n\nIf not specified upon `run-now`, the triggered run uses the job’s base parameters.\n\nnotebook_params cannot be specified in conjunction with jar_params.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nThe JSON representation of this field (for example `{\"notebook_params\":{\"name\":\"john doe\",\"age\":\"35\"}}`) cannot exceed 10,000 bytes.", - "additionalproperties": { - "description": "" - } - }, - "pipeline_params": { - "description": "", - "properties": { - "full_refresh": { - "description": "If true, triggers a full refresh on the delta live table." - } - } - }, - "python_named_params": { - "description": "", - "additionalproperties": { - "description": "" - } - }, - "python_params": { - "description": "A list of parameters for jobs with Python tasks, for example `\"python_params\": [\"john doe\", \"35\"]`.\nThe parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it would overwrite\nthe parameters specified in job setting. The JSON representation of this field (for example `{\"python_params\":[\"john doe\",\"35\"]}`)\ncannot exceed 10,000 bytes.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nImportant\n\nThese parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error.\nExamples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis.", - "items": { - "description": "" - } - }, - "spark_submit_params": { - "description": "A list of parameters for jobs with spark submit task, for example `\"spark_submit_params\": [\"--class\", \"org.apache.spark.examples.SparkPi\"]`.\nThe parameters are passed to spark-submit script as command-line parameters. If specified upon `run-now`, it would overwrite the\nparameters specified in job setting. The JSON representation of this field (for example `{\"python_params\":[\"john doe\",\"35\"]}`)\ncannot exceed 10,000 bytes.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs\n\nImportant\n\nThese parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error.\nExamples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis.", - "items": { - "description": "" - } - }, - "sql_params": { - "description": "A map from keys to values for jobs with SQL task, for example `\"sql_params\": {\"name\": \"john doe\", \"age\": \"35\"}`. The SQL alert task does not support custom parameters.", - "additionalproperties": { - "description": "" - } - } - } - }, - "spark_jar_task": { - "description": "If spark_jar_task, indicates that this task must run a JAR.", - "properties": { - "jar_uri": { - "description": "Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create." - }, - "main_class_name": { - "description": "The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library.\n\nThe code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail." - }, - "parameters": { - "description": "Parameters passed to the main method.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", - "items": { - "description": "" - } - } - } - }, - "spark_python_task": { - "description": "If spark_python_task, indicates that this task must run a Python file.", - "properties": { - "parameters": { - "description": "Command line parameters passed to the Python file.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", - "items": { - "description": "" - } - }, - "python_file": { - "description": "The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required." - }, - "source": { - "description": "Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved from the local\nDatabricks workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a Databricks workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository." - } - } - }, - "spark_submit_task": { - "description": "If `spark_submit_task`, indicates that this task must be launched by the spark submit script. This task can run only on new clusters.\n\nIn the `new_cluster` specification, `libraries` and `spark_conf` are not supported. Instead, use `--jars` and `--py-files` to add Java and Python libraries and `--conf` to set the Spark configurations.\n\n`master`, `deploy-mode`, and `executor-cores` are automatically configured by Databricks; you _cannot_ specify them in parameters.\n\nBy default, the Spark submit job uses all available memory (excluding reserved memory for Databricks services). You can set `--driver-memory`, and `--executor-memory` to a smaller value to leave some room for off-heap usage.\n\nThe `--jars`, `--py-files`, `--files` arguments support DBFS and S3 paths.", - "properties": { - "parameters": { - "description": "Command-line parameters passed to spark submit.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", - "items": { - "description": "" - } - } - } - }, - "sql_task": { - "description": "If sql_task, indicates that this job must execute a SQL task.", - "properties": { - "alert": { - "description": "If alert, indicates that this job must refresh a SQL alert.", - "properties": { - "alert_id": { - "description": "The canonical identifier of the SQL alert." - }, - "pause_subscriptions": { - "description": "If true, the alert notifications are not sent to subscribers." - }, - "subscriptions": { - "description": "If specified, alert notifications are sent to subscribers.", - "items": { - "description": "", - "properties": { - "destination_id": { - "description": "The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications." - }, - "user_name": { - "description": "The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications." - } - } - } - } - } - }, - "dashboard": { - "description": "If dashboard, indicates that this job must refresh a SQL dashboard.", - "properties": { - "custom_subject": { - "description": "Subject of the email sent to subscribers of this task." - }, - "dashboard_id": { - "description": "The canonical identifier of the SQL dashboard." - }, - "pause_subscriptions": { - "description": "If true, the dashboard snapshot is not taken, and emails are not sent to subscribers." - }, - "subscriptions": { - "description": "If specified, dashboard snapshots are sent to subscriptions.", - "items": { - "description": "", - "properties": { - "destination_id": { - "description": "The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications." - }, - "user_name": { - "description": "The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications." - } - } - } - } - } - }, - "file": { - "description": "If file, indicates that this job runs a SQL file in a remote Git repository.", - "properties": { - "path": { - "description": "Path of the SQL file. Must be relative if the source is a remote Git repository and absolute for workspace paths." - }, - "source": { - "description": "Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\nfrom the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: SQL file is located in Databricks workspace.\n* `GIT`: SQL file is located in cloud Git provider." - } - } - }, - "parameters": { - "description": "Parameters to be used for each run of this job. The SQL alert task does not support custom parameters.", - "additionalproperties": { - "description": "" - } - }, - "query": { - "description": "If query, indicates that this job must execute a SQL query.", - "properties": { - "query_id": { - "description": "The canonical identifier of the SQL query." - } - } - }, - "warehouse_id": { - "description": "The canonical identifier of the SQL warehouse. Recommended to use with serverless or pro SQL warehouses. Classic SQL warehouses are only supported for SQL alert, dashboard and query tasks and are limited to scheduled single-task jobs." - } - } - }, - "task_key": { - "description": "A unique name for the task. This field is used to refer to this task from other tasks.\nThis field is required and must be unique within its parent job.\nOn Update or Reset, this field is used to reference the tasks to be updated or reset." - }, - "timeout_seconds": { - "description": "An optional timeout applied to each run of this job task. A value of `0` means no timeout." - }, - "webhook_notifications": { - "description": "A collection of system notification IDs to notify when runs of this task begin or complete. The default behavior is to not send any system notifications.", - "properties": { - "on_duration_warning_threshold_exceeded": { - "description": "An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_failure": { - "description": "An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_start": { - "description": "An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_streaming_backlog_exceeded": { - "description": "An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.\nA maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_success": { - "description": "An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - } - } - } - } - } - }, - "timeout_seconds": { - "description": "An optional timeout applied to each run of this job. A value of `0` means no timeout." - }, - "trigger": { - "description": "A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", - "properties": { - "file_arrival": { - "description": "File arrival trigger settings.", - "properties": { - "min_time_between_triggers_seconds": { - "description": "If set, the trigger starts a run only after the specified amount of time passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds" - }, - "url": { - "description": "URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location." - }, - "wait_after_last_change_seconds": { - "description": "If set, the trigger starts a run only after no file activity has occurred for the specified amount of time.\nThis makes it possible to wait for a batch of incoming files to arrive before triggering a run. The\nminimum allowed value is 60 seconds." - } - } - }, - "pause_status": { - "description": "Whether this trigger is paused or not." - }, - "periodic": { - "description": "Periodic trigger settings.", - "properties": { - "interval": { - "description": "The interval at which the trigger should run." - }, - "unit": { - "description": "The unit of time for the interval." - } - } - }, - "table": { - "description": "Old table trigger settings name. Deprecated in favor of `table_update`.", - "properties": { - "condition": { - "description": "The table(s) condition based on which to trigger a job run." - }, - "min_time_between_triggers_seconds": { - "description": "If set, the trigger starts a run only after the specified amount of time has passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds." - }, - "table_names": { - "description": "A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`.", - "items": { - "description": "" - } - }, - "wait_after_last_change_seconds": { - "description": "If set, the trigger starts a run only after no table updates have occurred for the specified time\nand can be used to wait for a series of table updates before triggering a run. The\nminimum allowed value is 60 seconds." - } - } - }, - "table_update": { - "description": "", - "properties": { - "condition": { - "description": "The table(s) condition based on which to trigger a job run." - }, - "min_time_between_triggers_seconds": { - "description": "If set, the trigger starts a run only after the specified amount of time has passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds." - }, - "table_names": { - "description": "A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`.", - "items": { - "description": "" - } - }, - "wait_after_last_change_seconds": { - "description": "If set, the trigger starts a run only after no table updates have occurred for the specified time\nand can be used to wait for a series of table updates before triggering a run. The\nminimum allowed value is 60 seconds." - } - } - } - } - }, - "webhook_notifications": { - "description": "A collection of system notification IDs to notify when runs of this job begin or complete.", - "properties": { - "on_duration_warning_threshold_exceeded": { - "description": "An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_failure": { - "description": "An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_start": { - "description": "An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_streaming_backlog_exceeded": { - "description": "An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.\nA maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_success": { - "description": "An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - } - } - } - } - } - }, - "model_serving_endpoints": { - "description": "List of Model Serving Endpoints", - "additionalproperties": { - "description": "", - "properties": { - "config": { - "description": "The core config of the serving endpoint.", - "properties": { - "auto_capture_config": { - "description": "Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.", - "properties": { - "catalog_name": { - "description": "The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled." - }, - "enabled": { - "description": "Indicates whether the inference table is enabled." - }, - "schema_name": { - "description": "The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled." - }, - "table_name_prefix": { - "description": "The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled." - } - } - }, - "served_entities": { - "description": "A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served entities.", - "items": { - "description": "", - "properties": { - "entity_name": { - "description": "The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC),\nor a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of\n__catalog_name__.__schema_name__.__model_name__.\n" - }, - "entity_version": { - "description": "The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC." - }, - "environment_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity.\nNote: this is an experimental feature and subject to change. \nExample entity environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`", - "additionalproperties": { - "description": "" - } - }, - "external_model": { - "description": "The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled)\ncan be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model,\nit cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.\nThe task type of all external models within an endpoint must be the same.\n", - "properties": { - "ai21labs_config": { - "description": "AI21Labs Config. Only required if the provider is 'ai21labs'.", - "properties": { - "ai21labs_api_key": { - "description": "The Databricks secret key reference for an AI21 Labs API key. If you prefer to paste your API key directly, see `ai21labs_api_key_plaintext`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`." - }, - "ai21labs_api_key_plaintext": { - "description": "An AI21 Labs API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `ai21labs_api_key`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`." - } - } - }, - "amazon_bedrock_config": { - "description": "Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'.", - "properties": { - "aws_access_key_id": { - "description": "The Databricks secret key reference for an AWS access key ID with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`." - }, - "aws_access_key_id_plaintext": { - "description": "An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`." - }, - "aws_region": { - "description": "The AWS region to use. Bedrock has to be enabled there." - }, - "aws_secret_access_key": { - "description": "The Databricks secret key reference for an AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_secret_access_key_plaintext`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`." - }, - "aws_secret_access_key_plaintext": { - "description": "An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_secret_access_key`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`." - }, - "bedrock_provider": { - "description": "The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon." - } - } - }, - "anthropic_config": { - "description": "Anthropic Config. Only required if the provider is 'anthropic'.", - "properties": { - "anthropic_api_key": { - "description": "The Databricks secret key reference for an Anthropic API key. If you prefer to paste your API key directly, see `anthropic_api_key_plaintext`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`." - }, - "anthropic_api_key_plaintext": { - "description": "The Anthropic API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `anthropic_api_key`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`." - } - } - }, - "cohere_config": { - "description": "Cohere Config. Only required if the provider is 'cohere'.", - "properties": { - "cohere_api_base": { - "description": "This is an optional field to provide a customized base URL for the Cohere API. \nIf left unspecified, the standard Cohere base URL is used.\n" - }, - "cohere_api_key": { - "description": "The Databricks secret key reference for a Cohere API key. If you prefer to paste your API key directly, see `cohere_api_key_plaintext`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`." - }, - "cohere_api_key_plaintext": { - "description": "The Cohere API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `cohere_api_key`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`." - } - } - }, - "databricks_model_serving_config": { - "description": "Databricks Model Serving Config. Only required if the provider is 'databricks-model-serving'.", - "properties": { - "databricks_api_token": { - "description": "The Databricks secret key reference for a Databricks API token that corresponds to a user or service\nprincipal with Can Query access to the model serving endpoint pointed to by this external model.\nIf you prefer to paste your API key directly, see `databricks_api_token_plaintext`.\nYou must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`.\n" - }, - "databricks_api_token_plaintext": { - "description": "The Databricks API token that corresponds to a user or service\nprincipal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.\nIf you prefer to reference your key using Databricks Secrets, see `databricks_api_token`.\nYou must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`.\n" - }, - "databricks_workspace_url": { - "description": "The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.\n" - } - } - }, - "google_cloud_vertex_ai_config": { - "description": "Google Cloud Vertex AI Config. Only required if the provider is 'google-cloud-vertex-ai'.", - "properties": { - "private_key": { - "description": "The Databricks secret key reference for a private key for the service account which has access to the Google Cloud Vertex AI Service. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to paste your API key directly, see `private_key_plaintext`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`" - }, - "private_key_plaintext": { - "description": "The private key for the service account which has access to the Google Cloud Vertex AI Service provided as a plaintext secret. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to reference your key using Databricks Secrets, see `private_key`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`." - }, - "project_id": { - "description": "This is the Google Cloud project id that the service account is associated with." - }, - "region": { - "description": "This is the region for the Google Cloud Vertex AI Service. See [supported regions](https://cloud.google.com/vertex-ai/docs/general/locations) for more details. Some models are only available in specific regions." - } - } - }, - "name": { - "description": "The name of the external model." - }, - "openai_config": { - "description": "OpenAI Config. Only required if the provider is 'openai'.", - "properties": { - "microsoft_entra_client_id": { - "description": "This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.\n" - }, - "microsoft_entra_client_secret": { - "description": "The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.\nIf you prefer to paste your client secret directly, see `microsoft_entra_client_secret_plaintext`.\nYou must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`.\n" - }, - "microsoft_entra_client_secret_plaintext": { - "description": "The client secret used for Microsoft Entra ID authentication provided as a plaintext string.\nIf you prefer to reference your key using Databricks Secrets, see `microsoft_entra_client_secret`.\nYou must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`.\n" - }, - "microsoft_entra_tenant_id": { - "description": "This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.\n" - }, - "openai_api_base": { - "description": "This is a field to provide a customized base URl for the OpenAI API.\nFor Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service\nprovided by Azure.\nFor other OpenAI API types, this field is optional, and if left unspecified, the standard OpenAI base URL is used.\n" - }, - "openai_api_key": { - "description": "The Databricks secret key reference for an OpenAI API key using the OpenAI or Azure service. If you prefer to paste your API key directly, see `openai_api_key_plaintext`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`." - }, - "openai_api_key_plaintext": { - "description": "The OpenAI API key using the OpenAI or Azure service provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `openai_api_key`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`." - }, - "openai_api_type": { - "description": "This is an optional field to specify the type of OpenAI API to use.\nFor Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security\naccess validation protocol. For access token validation, use azure. For authentication using Azure Active\nDirectory (Azure AD) use, azuread.\n" - }, - "openai_api_version": { - "description": "This is an optional field to specify the OpenAI API version.\nFor Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to\nutilize, specified by a date.\n" - }, - "openai_deployment_name": { - "description": "This field is only required for Azure OpenAI and is the name of the deployment resource for the\nAzure OpenAI service.\n" - }, - "openai_organization": { - "description": "This is an optional field to specify the organization in OpenAI or Azure OpenAI.\n" - } - } - }, - "palm_config": { - "description": "PaLM Config. Only required if the provider is 'palm'.", - "properties": { - "palm_api_key": { - "description": "The Databricks secret key reference for a PaLM API key. If you prefer to paste your API key directly, see `palm_api_key_plaintext`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`." - }, - "palm_api_key_plaintext": { - "description": "The PaLM API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `palm_api_key`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`." - } - } - }, - "provider": { - "description": "The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic',\n'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'.\",\n" - }, - "task": { - "description": "The task type of the external model." - } - } - }, - "instance_profile_arn": { - "description": "ARN of the instance profile that the served entity uses to access AWS resources." - }, - "max_provisioned_throughput": { - "description": "The maximum tokens per second that the endpoint can scale up to." - }, - "min_provisioned_throughput": { - "description": "The minimum tokens per second that the endpoint can scale down to." - }, - "name": { - "description": "The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores.\nIf not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other\nentities, it defaults to \u003centity-name\u003e-\u003centity-version\u003e.\n" - }, - "scale_to_zero_enabled": { - "description": "Whether the compute resources for the served entity should scale down to zero." - }, - "workload_size": { - "description": "The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.\n" - }, - "workload_type": { - "description": "The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n\"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n" - } - } - } - }, - "served_models": { - "description": "(Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A serving endpoint can have up to 15 served models.", - "items": { - "description": "", - "properties": { - "environment_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this model.\nNote: this is an experimental feature and subject to change. \nExample model environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`", - "additionalproperties": { - "description": "" - } - }, - "instance_profile_arn": { - "description": "ARN of the instance profile that the served model will use to access AWS resources." - }, - "max_provisioned_throughput": { - "description": "The maximum tokens per second that the endpoint can scale up to." - }, - "min_provisioned_throughput": { - "description": "The minimum tokens per second that the endpoint can scale down to." - }, - "model_name": { - "description": "The name of the model in Databricks Model Registry to be served or if the model resides in Unity Catalog, the full name of model,\nin the form of __catalog_name__.__schema_name__.__model_name__.\n" - }, - "model_version": { - "description": "The version of the model in Databricks Model Registry or Unity Catalog to be served." - }, - "name": { - "description": "The name of a served model. It must be unique across an endpoint. If not specified, this field will default to \u003cmodel-name\u003e-\u003cmodel-version\u003e.\nA served model name can consist of alphanumeric characters, dashes, and underscores.\n" - }, - "scale_to_zero_enabled": { - "description": "Whether the compute resources for the served model should scale down to zero." - }, - "workload_size": { - "description": "The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0.\n" - }, - "workload_type": { - "description": "The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n\"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n" - } - } - } - }, - "traffic_config": { - "description": "The traffic config defining how invocations to the serving endpoint should be routed.", - "properties": { - "routes": { - "description": "The list of routes that define traffic to each served entity.", - "items": { - "description": "", - "properties": { - "served_model_name": { - "description": "The name of the served model this route configures traffic for." - }, - "traffic_percentage": { - "description": "The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive." - } - } - } - } - } - } - } - }, - "name": { - "description": "The name of the serving endpoint. This field is required and must be unique across a Databricks workspace.\nAn endpoint name can consist of alphanumeric characters, dashes, and underscores.\n" - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "rate_limits": { - "description": "Rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.", - "items": { - "description": "", - "properties": { - "calls": { - "description": "Used to specify how many calls are allowed for a key within the renewal_period." - }, - "key": { - "description": "Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified." - }, - "renewal_period": { - "description": "Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported." - } - } - } - }, - "route_optimized": { - "description": "Enable route optimization for the serving endpoint." - }, - "tags": { - "description": "Tags to be attached to the serving endpoint and automatically propagated to billing logs.", - "items": { - "description": "", - "properties": { - "key": { - "description": "Key field for a serving endpoint tag." - }, - "value": { - "description": "Optional value field for a serving endpoint tag." - } - } - } - } - } - } - }, - "models": { - "description": "List of MLflow models", - "additionalproperties": { - "description": "", - "properties": { - "creation_timestamp": { - "description": "Timestamp recorded when this `registered_model` was created." - }, - "description": { - "description": "Description of this `registered_model`." - }, - "last_updated_timestamp": { - "description": "Timestamp recorded when metadata for this `registered_model` was last updated." - }, - "latest_versions": { - "description": "Collection of latest model versions for each stage.\nOnly contains models with current `READY` status.", - "items": { - "description": "", - "properties": { - "creation_timestamp": { - "description": "Timestamp recorded when this `model_version` was created." - }, - "current_stage": { - "description": "Current stage for this `model_version`." - }, - "description": { - "description": "Description of this `model_version`." - }, - "last_updated_timestamp": { - "description": "Timestamp recorded when metadata for this `model_version` was last updated." - }, - "name": { - "description": "Unique name of the model" - }, - "run_id": { - "description": "MLflow run ID used when creating `model_version`, if `source` was generated by an\nexperiment run stored in MLflow tracking server." - }, - "run_link": { - "description": "Run Link: Direct link to the run that generated this version" - }, - "source": { - "description": "URI indicating the location of the source model artifacts, used when creating `model_version`" - }, - "status": { - "description": "Current status of `model_version`" - }, - "status_message": { - "description": "Details on current `status`, if it is pending or failed." - }, - "tags": { - "description": "Tags: Additional metadata key-value pairs for this `model_version`.", - "items": { - "description": "", - "properties": { - "key": { - "description": "The tag key." - }, - "value": { - "description": "The tag value." - } - } - } - }, - "user_id": { - "description": "User that created this `model_version`." - }, - "version": { - "description": "Model's version number." - } - } - } - }, - "name": { - "description": "Unique name for the model." - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "tags": { - "description": "Tags: Additional metadata key-value pairs for this `registered_model`.", - "items": { - "description": "", - "properties": { - "key": { - "description": "The tag key." - }, - "value": { - "description": "The tag value." - } - } - } - }, - "user_id": { - "description": "User that created this `registered_model`" - } - } - } - }, - "pipelines": { - "description": "List of DLT pipelines", - "additionalproperties": { - "description": "", - "properties": { - "catalog": { - "description": "A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog." - }, - "channel": { - "description": "DLT Release Channel that specifies which version to use." - }, - "clusters": { - "description": "Cluster settings for this pipeline deployment.", - "items": { - "description": "", - "properties": { - "apply_policy_default_values": { - "description": "Note: This field won't be persisted. Only API users will check this field." - }, - "autoscale": { - "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", - "properties": { - "max_workers": { - "description": "The maximum number of workers to which the cluster can scale up when overloaded. `max_workers` must be strictly greater than `min_workers`." - }, - "min_workers": { - "description": "The minimum number of workers the cluster can scale down to when underutilized.\nIt is also the initial number of workers the cluster will have after creation." - }, - "mode": { - "description": "Databricks Enhanced Autoscaling optimizes cluster utilization by automatically\nallocating cluster resources based on workload volume, with minimal impact to\nthe data processing latency of your pipelines. Enhanced Autoscaling is available\nfor `updates` clusters only. The legacy autoscaling feature is used for `maintenance`\nclusters.\n" - } - } - }, - "aws_attributes": { - "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "ebs_volume_count": { - "description": "The number of volumes launched for each instance. Users can choose up to 10 volumes.\nThis feature is only enabled for supported node types. Legacy node types cannot specify\ncustom EBS volumes.\nFor node types with no instance store, at least one EBS volume needs to be specified;\notherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.\nInstance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for\nscratch storage because heterogenously sized scratch devices can lead to inefficient disk\nutilization. If no EBS volumes are attached, Databricks will configure Spark to use instance\nstore volumes.\n\nPlease note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`\nwill be overridden." - }, - "ebs_volume_iops": { - "description": "If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_size": { - "description": "The size of each EBS volume (in GiB) launched for each instance. For general purpose\nSSD, this value must be within the range 100 - 4096. For throughput optimized HDD,\nthis value must be within the range 500 - 4096." - }, - "ebs_volume_throughput": { - "description": "If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_type": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nIf this value is greater than 0, the cluster driver node in particular will be placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "instance_profile_arn": { - "description": "Nodes for this cluster will only be placed on AWS instances with this instance profile. If\nommitted, nodes will be placed on instances without an IAM instance profile. The instance\nprofile must have previously been added to the Databricks environment by an account\nadministrator.\n\nThis feature may only be available to certain customer plans.\n\nIf this field is ommitted, we will pull in the default from the conf if it exists." - }, - "spot_bid_price_percent": { - "description": "The bid price for AWS spot instances, as a percentage of the corresponding instance type's\non-demand price.\nFor example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot\ninstance, then the bid price is half of the price of\non-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice\nthe price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.\nWhen spot instances are requested for this cluster, only spot instances whose bid price\npercentage matches this field will be considered.\nNote that, for safety, we enforce this field to be no more than 10000.\n\nThe default value and documentation here should be kept consistent with\nCommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent." - }, - "zone_id": { - "description": "Identifier for the availability zone/datacenter in which the cluster resides.\nThis string will be of a form like \"us-west-2a\". The provided availability\nzone must be in the same region as the Databricks deployment. For example, \"us-west-2a\"\nis not a valid zone id if the Databricks deployment resides in the \"us-east-1\" region.\nThis is an optional field at cluster creation, and if not specified, a default zone will be used.\nIf the zone specified is \"auto\", will try to place cluster in a zone with high availability,\nand will retry placement in a different AZ if there is not enough capacity.\nThe list of available zones as well as the default value can be found by using the\n`List Zones` method." - } - } - }, - "azure_attributes": { - "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nThis value should be greater than 0, to make sure the cluster driver node is placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "log_analytics_info": { - "description": "Defines values necessary to configure and run Azure Log Analytics agent", - "properties": { - "log_analytics_primary_key": { - "description": "\u003cneeds content added\u003e" - }, - "log_analytics_workspace_id": { - "description": "\u003cneeds content added\u003e" - } - } - }, - "spot_bid_max_price": { - "description": "The max bid price to be used for Azure spot instances.\nThe Max price for the bid cannot be higher than the on-demand price of the instance.\nIf not specified, the default value is -1, which specifies that the instance cannot be evicted\non the basis of price, and only on the basis of availability. Further, the value should \u003e 0 or -1." - } - } - }, - "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nOnly dbfs destinations are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.\n", - "properties": { - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - } - } - }, - "custom_tags": { - "description": "Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS\ninstances and EBS volumes) with these tags in addition to `default_tags`. Notes:\n\n- Currently, Databricks allows at most 45 custom tags\n\n- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags", - "additionalproperties": { - "description": "" - } - }, - "driver_instance_pool_id": { - "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." - }, - "driver_node_type_id": { - "description": "The node type of the Spark driver.\nNote that this field is optional; if unset, the driver node type will be set as the same value\nas `node_type_id` defined above." - }, - "enable_local_disk_encryption": { - "description": "Whether to enable local disk encryption for the cluster." - }, - "gcp_attributes": { - "description": "Attributes related to clusters running on Google Cloud Platform.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "boot_disk_size": { - "description": "boot disk size in GB" - }, - "google_service_account": { - "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." - }, - "local_ssd_count": { - "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." - }, - "use_preemptible_executors": { - "description": "This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default).\nNote: Soon to be deprecated, use the availability field instead." - }, - "zone_id": { - "description": "Identifier for the availability zone in which the cluster resides.\nThis can be one of the following:\n- \"HA\" =\u003e High availability, spread nodes across availability zones for a Databricks deployment region [default]\n- \"AUTO\" =\u003e Databricks picks an availability zone to schedule the cluster on.\n- A GCP availability zone =\u003e Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones." - } - } - }, - "init_scripts": { - "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", - "items": { - "description": "", - "properties": { - "abfss": { - "description": "destination needs to be provided. e.g.\n`{ \"abfss\" : { \"destination\" : \"abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e\" } }", - "properties": { - "destination": { - "description": "abfss destination, e.g. `abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e`." - } - } - }, - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "file": { - "description": "destination needs to be provided. e.g.\n`{ \"file\" : { \"destination\" : \"file:/my/local/file.sh\" } }`", - "properties": { - "destination": { - "description": "local file destination, e.g. `file:/my/local/file.sh`" - } - } - }, - "gcs": { - "description": "destination needs to be provided. e.g.\n`{ \"gcs\": { \"destination\": \"gs://my-bucket/file.sh\" } }`", - "properties": { - "destination": { - "description": "GCS destination/URI, e.g. `gs://my-bucket/some-prefix`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - }, - "volumes": { - "description": "destination needs to be provided. e.g.\n`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`" - } - } - }, - "workspace": { - "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" - } - } - } - } - } - }, - "instance_pool_id": { - "description": "The optional ID of the instance pool to which the cluster belongs." - }, - "label": { - "description": "A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`." - }, - "node_type_id": { - "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n" - }, - "num_workers": { - "description": "Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned." - }, - "policy_id": { - "description": "The ID of the cluster policy used to create the cluster if applicable." - }, - "spark_conf": { - "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nSee :method:clusters/create for more details.\n", - "additionalproperties": { - "description": "" - } - }, - "spark_env_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs.\nPlease note that key-value pair of the form (X,Y) will be exported as is (i.e.,\n`export X='Y'`) while launching the driver and workers.\n\nIn order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending\nthem to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all\ndefault databricks managed environmental variables are included as well.\n\nExample Spark environment variables:\n`{\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"}` or\n`{\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true\"}`", - "additionalproperties": { - "description": "" - } - }, - "ssh_public_keys": { - "description": "SSH public key contents that will be added to each Spark node in this cluster. The\ncorresponding private keys can be used to login with the user name `ubuntu` on port `2200`.\nUp to 10 keys can be specified.", - "items": { - "description": "" - } - } - } - } - }, - "configuration": { - "description": "String-String configuration for this pipeline execution.", - "additionalproperties": { - "description": "" - } - }, - "continuous": { - "description": "Whether the pipeline is continuous or triggered. This replaces `trigger`." - }, - "deployment": { - "description": "Deployment type of this pipeline.", - "properties": { - "kind": { - "description": "The deployment method that manages the pipeline." - }, - "metadata_file_path": { - "description": "The path to the file containing metadata about the deployment." - } - } - }, - "development": { - "description": "Whether the pipeline is in Development mode. Defaults to false." - }, - "edition": { - "description": "Pipeline product edition." - }, - "filters": { - "description": "Filters on which Pipeline packages to include in the deployed graph.", - "properties": { - "exclude": { - "description": "Paths to exclude.", - "items": { - "description": "" - } - }, - "include": { - "description": "Paths to include.", - "items": { - "description": "" - } - } - } - }, - "gateway_definition": { - "description": "The definition of a gateway pipeline to support CDC.", - "properties": { - "connection_id": { - "description": "Immutable. The Unity Catalog connection this gateway pipeline uses to communicate with the source." - }, - "gateway_storage_catalog": { - "description": "Required, Immutable. The name of the catalog for the gateway pipeline's storage location." - }, - "gateway_storage_name": { - "description": "Optional. The Unity Catalog-compatible name for the gateway storage location.\nThis is the destination to use for the data that is extracted by the gateway.\nDelta Live Tables system will automatically create the storage location under the catalog and schema.\n" - }, - "gateway_storage_schema": { - "description": "Required, Immutable. The name of the schema for the gateway pipelines's storage location." - } - } - }, - "id": { - "description": "Unique identifier for this pipeline." - }, - "ingestion_definition": { - "description": "The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings.", - "properties": { - "connection_name": { - "description": "Immutable. The Unity Catalog connection this ingestion pipeline uses to communicate with the source. Specify either ingestion_gateway_id or connection_name." - }, - "ingestion_gateway_id": { - "description": "Immutable. Identifier for the ingestion gateway used by this ingestion pipeline to communicate with the source. Specify either ingestion_gateway_id or connection_name." - }, - "objects": { - "description": "Required. Settings specifying tables to replicate and the destination for the replicated tables.", - "items": { - "description": "", - "properties": { - "schema": { - "description": "Select tables from a specific source schema.", - "properties": { - "destination_catalog": { - "description": "Required. Destination catalog to store tables." - }, - "destination_schema": { - "description": "Required. Destination schema to store tables in. Tables with the same name as the source tables are created in this destination schema. The pipeline fails If a table with the same name already exists." - }, - "source_catalog": { - "description": "The source catalog name. Might be optional depending on the type of source." - }, - "source_schema": { - "description": "Required. Schema name in the source database." - }, - "table_configuration": { - "description": "Configuration settings to control the ingestion of tables. These settings are applied to all tables in this schema and override the table_configuration defined in the IngestionPipelineDefinition object.", - "properties": { - "primary_keys": { - "description": "The primary key of the table used to apply changes.", - "items": { - "description": "" - } - }, - "salesforce_include_formula_fields": { - "description": "If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector" - }, - "scd_type": { - "description": "The SCD type to use to ingest the table." - } - } - } - } - }, - "table": { - "description": "Select tables from a specific source table.", - "properties": { - "destination_catalog": { - "description": "Required. Destination catalog to store table." - }, - "destination_schema": { - "description": "Required. Destination schema to store table." - }, - "destination_table": { - "description": "Optional. Destination table name. The pipeline fails If a table with that name already exists. If not set, the source table name is used." - }, - "source_catalog": { - "description": "Source catalog name. Might be optional depending on the type of source." - }, - "source_schema": { - "description": "Schema name in the source database. Might be optional depending on the type of source." - }, - "source_table": { - "description": "Required. Table name in the source database." - }, - "table_configuration": { - "description": "Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object and the SchemaSpec.", - "properties": { - "primary_keys": { - "description": "The primary key of the table used to apply changes.", - "items": { - "description": "" - } - }, - "salesforce_include_formula_fields": { - "description": "If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector" - }, - "scd_type": { - "description": "The SCD type to use to ingest the table." - } - } - } - } - } - } - } - }, - "table_configuration": { - "description": "Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline.", - "properties": { - "primary_keys": { - "description": "The primary key of the table used to apply changes.", - "items": { - "description": "" - } - }, - "salesforce_include_formula_fields": { - "description": "If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector" - }, - "scd_type": { - "description": "The SCD type to use to ingest the table." - } - } - } - } - }, - "libraries": { - "description": "Libraries or code needed by this deployment.", - "items": { - "description": "", - "properties": { - "file": { - "description": "The path to a file that defines a pipeline and is stored in the Databricks Repos.\n", - "properties": { - "path": { - "description": "The absolute path of the file." - } - } - }, - "jar": { - "description": "URI of the jar to be installed. Currently only DBFS is supported.\n" - }, - "maven": { - "description": "Specification of a maven library to be installed.\n", - "properties": { - "coordinates": { - "description": "Gradle-style maven coordinates. For example: \"org.jsoup:jsoup:1.7.2\"." - }, - "exclusions": { - "description": "List of dependences to exclude. For example: `[\"slf4j:slf4j\", \"*:hadoop-client\"]`.\n\nMaven dependency exclusions:\nhttps://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html.", - "items": { - "description": "" - } - }, - "repo": { - "description": "Maven repo to install the Maven package from. If omitted, both Maven Central Repository\nand Spark Packages are searched." - } - } - }, - "notebook": { - "description": "The path to a notebook that defines a pipeline and is stored in the Databricks workspace.\n", - "properties": { - "path": { - "description": "The absolute path of the notebook." - } - } - }, - "whl": { - "description": "URI of the whl to be installed." - } - } - } - }, - "name": { - "description": "Friendly identifier for this pipeline." - }, - "notifications": { - "description": "List of notification settings for this pipeline.", - "items": { - "description": "", - "properties": { - "alerts": { - "description": "A list of alerts that trigger the sending of notifications to the configured\ndestinations. The supported alerts are:\n\n* `on-update-success`: A pipeline update completes successfully.\n* `on-update-failure`: Each time a pipeline update fails.\n* `on-update-fatal-failure`: A pipeline update fails with a non-retryable (fatal) error.\n* `on-flow-failure`: A single data flow fails.\n", - "items": { - "description": "" - } - }, - "email_recipients": { - "description": "A list of email addresses notified when a configured alert is triggered.\n", - "items": { - "description": "" - } - } - } - } - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "photon": { - "description": "Whether Photon is enabled for this pipeline." - }, - "serverless": { - "description": "Whether serverless compute is enabled for this pipeline." - }, - "storage": { - "description": "DBFS root directory for storing checkpoints and tables." - }, - "target": { - "description": "Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`." - }, - "trigger": { - "description": "Which pipeline trigger to use. Deprecated: Use `continuous` instead.", - "properties": { - "cron": { - "description": "", - "properties": { - "quartz_cron_schedule": { - "description": "" - }, - "timezone_id": { - "description": "" - } - } - }, - "manual": { - "description": "" - } - } - } - } - } - }, - "quality_monitors": { - "description": "", - "additionalproperties": { - "description": "", - "properties": { - "assets_dir": { - "description": "" - }, - "baseline_table_name": { - "description": "" - }, - "custom_metrics": { - "description": "", - "items": { - "description": "", - "properties": { - "definition": { - "description": "" - }, - "input_columns": { - "description": "", - "items": { - "description": "" - } - }, - "name": { - "description": "" - }, - "output_data_type": { - "description": "" - }, - "type": { - "description": "" - } - } - } - }, - "data_classification_config": { - "description": "", - "properties": { - "enabled": { - "description": "" - } - } - }, - "inference_log": { - "description": "", - "properties": { - "granularities": { - "description": "", - "items": { - "description": "" - } - }, - "label_col": { - "description": "" - }, - "model_id_col": { - "description": "" - }, - "prediction_col": { - "description": "" - }, - "prediction_proba_col": { - "description": "" - }, - "problem_type": { - "description": "" - }, - "timestamp_col": { - "description": "" - } - } - }, - "notifications": { - "description": "", - "properties": { - "on_failure": { - "description": "", - "properties": { - "email_addresses": { - "description": "", - "items": { - "description": "" - } - } - } - }, - "on_new_classification_tag_detected": { - "description": "", - "properties": { - "email_addresses": { - "description": "", - "items": { - "description": "" - } - } - } - } - } - }, - "output_schema_name": { - "description": "" - }, - "schedule": { - "description": "", - "properties": { - "pause_status": { - "description": "" - }, - "quartz_cron_expression": { - "description": "" - }, - "timezone_id": { - "description": "" - } - } - }, - "skip_builtin_dashboard": { - "description": "" - }, - "slicing_exprs": { - "description": "", - "items": { - "description": "" - } - }, - "snapshot": { - "description": "" - }, - "time_series": { - "description": "", - "properties": { - "granularities": { - "description": "", - "items": { - "description": "" - } - }, - "timestamp_col": { - "description": "" - } - } - }, - "warehouse_id": { - "description": "" - } - } - } - }, - "registered_models": { - "description": "List of Registered Models", - "additionalproperties": { - "description": "", - "properties": { - "catalog_name": { - "description": "The name of the catalog where the schema and the registered model reside" - }, - "comment": { - "description": "The comment attached to the registered model" - }, - "grants": { - "description": "", - "items": { - "description": "", - "properties": { - "principal": { - "description": "" - }, - "privileges": { - "description": "", - "items": { - "description": "" - } - } - } - } - }, - "name": { - "description": "The name of the registered model" - }, - "schema_name": { - "description": "The name of the schema where the registered model resides" - }, - "storage_location": { - "description": "The storage location on the cloud under which model version data files are stored" - } - } - } - }, - "schemas": { - "description": "", - "additionalproperties": { - "description": "", - "properties": { - "catalog_name": { - "description": "" - }, - "comment": { - "description": "" - }, - "grants": { - "description": "", - "items": { - "description": "", - "properties": { - "principal": { - "description": "" - }, - "privileges": { - "description": "", - "items": { - "description": "" - } - } - } - } - }, - "name": { - "description": "" - }, - "properties": { - "description": "", - "additionalproperties": { - "description": "" - } - }, - "storage_root": { - "description": "" - } - } - } - } - } - }, - "run_as": { - "description": "", - "properties": { - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - }, - "sync": { - "description": "", - "properties": { - "exclude": { - "description": "", - "items": { - "description": "" - } - }, - "include": { - "description": "", - "items": { - "description": "" - } - }, - "paths": { - "description": "", - "items": { - "description": "" - } - } - } - }, - "targets": { - "description": "", - "additionalproperties": { - "description": "", - "properties": { - "artifacts": { - "description": "", - "additionalproperties": { - "description": "", - "properties": { - "build": { - "description": "" - }, - "executable": { - "description": "" - }, - "files": { - "description": "", - "items": { - "description": "", - "properties": { - "source": { - "description": "" - } - } - } - }, - "path": { - "description": "" - }, - "type": { - "description": "" - } - } - } - }, - "bundle": { - "description": "", - "properties": { - "compute_id": { - "description": "" - }, - "databricks_cli_version": { - "description": "" - }, - "deployment": { - "description": "", - "properties": { - "fail_on_active_runs": { - "description": "" - }, - "lock": { - "description": "", - "properties": { - "enabled": { - "description": "" - }, - "force": { - "description": "" - } - } - } - } - }, - "git": { - "description": "", - "properties": { - "branch": { - "description": "" - }, - "origin_url": { - "description": "" - } - } - }, - "name": { - "description": "" - } - } - }, - "compute_id": { - "description": "" - }, - "default": { - "description": "" - }, - "git": { - "description": "", - "properties": { - "branch": { - "description": "" - }, - "origin_url": { - "description": "" - } - } - }, - "mode": { - "description": "" - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "presets": { - "description": "", - "properties": { - "jobs_max_concurrent_runs": { - "description": "" - }, - "name_prefix": { - "description": "" - }, - "pipelines_development": { - "description": "" - }, - "tags": { - "description": "", - "additionalproperties": { - "description": "" - } - }, - "trigger_pause_status": { - "description": "" - } - } - }, - "resources": { - "description": "Collection of Databricks resources to deploy.", - "properties": { - "experiments": { - "description": "List of MLflow experiments", - "additionalproperties": { - "description": "", - "properties": { - "artifact_location": { - "description": "Location where artifacts for the experiment are stored." - }, - "creation_time": { - "description": "Creation time" - }, - "experiment_id": { - "description": "Unique identifier for the experiment." - }, - "last_update_time": { - "description": "Last update time" - }, - "lifecycle_stage": { - "description": "Current life cycle stage of the experiment: \"active\" or \"deleted\".\nDeleted experiments are not returned by APIs." - }, - "name": { - "description": "Human readable name that identifies the experiment." - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "tags": { - "description": "Tags: Additional metadata key-value pairs.", - "items": { - "description": "", - "properties": { - "key": { - "description": "The tag key." - }, - "value": { - "description": "The tag value." - } - } - } - } - } - } - }, - "jobs": { - "description": "List of Databricks jobs", - "additionalproperties": { - "description": "", - "properties": { - "continuous": { - "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", - "properties": { - "pause_status": { - "description": "Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED." - } - } - }, - "deployment": { - "description": "Deployment information for jobs managed by external sources.", - "properties": { - "kind": { - "description": "The kind of deployment that manages the job.\n\n* `BUNDLE`: The job is managed by Databricks Asset Bundle." - }, - "metadata_file_path": { - "description": "Path of the file that contains deployment metadata." - } - } - }, - "description": { - "description": "An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding." - }, - "edit_mode": { - "description": "Edit mode of the job.\n\n* `UI_LOCKED`: The job is in a locked UI state and cannot be modified.\n* `EDITABLE`: The job is in an editable state and can be modified." - }, - "email_notifications": { - "description": "An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted.", - "properties": { - "no_alert_for_skipped_runs": { - "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." - }, - "on_duration_warning_threshold_exceeded": { - "description": "A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent.", - "items": { - "description": "" - } - }, - "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - }, - "on_start": { - "description": "A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - }, - "on_streaming_backlog_exceeded": { - "description": "A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.", - "items": { - "description": "" - } - }, - "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - } - } - }, - "environments": { - "description": "A list of task execution environment specifications that can be referenced by tasks of this job.", - "items": { - "description": "", - "properties": { - "environment_key": { - "description": "The key of an environment. It has to be unique within a job." - }, - "spec": { - "description": "", - "properties": { - "client": { - "description": "Client version used by the environment\nThe client is the user-facing environment of the runtime.\nEach client comes with a specific set of pre-installed libraries.\nThe version is a string, consisting of the major client version." - }, - "dependencies": { - "description": "List of pip dependencies, as supported by the version of pip in this environment.\nEach dependency is a pip requirement file line https://pip.pypa.io/en/stable/reference/requirements-file-format/\nAllowed dependency could be \u003crequirement specifier\u003e, \u003carchive url/path\u003e, \u003clocal project path\u003e(WSFS or Volumes in Databricks), \u003cvcs project url\u003e\nE.g. dependencies: [\"foo==0.0.1\", \"-r /Workspace/test/requirements.txt\"]", - "items": { - "description": "" - } - } - } - } - } - } - }, - "format": { - "description": "Used to tell what is the format of the job. This field is ignored in Create/Update/Reset calls. When using the Jobs API 2.1 this value is always set to `\"MULTI_TASK\"`." - }, - "git_source": { - "description": "An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks.\n\nIf `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task.\n\nNote: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job.", - "properties": { - "git_branch": { - "description": "Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit." - }, - "git_commit": { - "description": "Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag." - }, - "git_provider": { - "description": "Unique identifier of the service used to host the Git repository. The value is case insensitive." - }, - "git_snapshot": { - "description": "", - "properties": { - "used_commit": { - "description": "Commit that was used to execute the run. If git_branch was specified, this points to the HEAD of the branch at the time of the run; if git_tag was specified, this points to the commit the tag points to." - } - } - }, - "git_tag": { - "description": "Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit." - }, - "git_url": { - "description": "URL of the repository to be cloned by this job." - }, - "job_source": { - "description": "The source of the job specification in the remote repository when the job is source controlled.", - "properties": { - "dirty_state": { - "description": "Dirty state indicates the job is not fully synced with the job specification in the remote repository.\n\nPossible values are:\n* `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced.\n* `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced." - }, - "import_from_git_branch": { - "description": "Name of the branch which the job is imported from." - }, - "job_config_path": { - "description": "Path of the job YAML file that contains the job specification." - } - } - } - } - }, - "health": { - "description": "", - "properties": { - "rules": { - "description": "", - "items": { - "description": "", - "properties": { - "metric": { - "description": "" - }, - "op": { - "description": "" - }, - "value": { - "description": "Specifies the threshold value that the health metric should obey to satisfy the health rule." - } - } - } - } - } - }, - "job_clusters": { - "description": "A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings.", - "items": { - "description": "", - "properties": { - "job_cluster_key": { - "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution." - }, - "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", - "properties": { - "apply_policy_default_values": { - "description": "When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied." - }, - "autoscale": { - "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", - "properties": { - "max_workers": { - "description": "The maximum number of workers to which the cluster can scale up when overloaded.\nNote that `max_workers` must be strictly greater than `min_workers`." - }, - "min_workers": { - "description": "The minimum number of workers to which the cluster can scale down when underutilized.\nIt is also the initial number of workers the cluster will have after creation." - } - } - }, - "autotermination_minutes": { - "description": "Automatically terminates the cluster after it is inactive for this time in minutes. If not set,\nthis cluster will not be automatically terminated. If specified, the threshold must be between\n10 and 10000 minutes.\nUsers can also set this value to 0 to explicitly disable automatic termination." - }, - "aws_attributes": { - "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "ebs_volume_count": { - "description": "The number of volumes launched for each instance. Users can choose up to 10 volumes.\nThis feature is only enabled for supported node types. Legacy node types cannot specify\ncustom EBS volumes.\nFor node types with no instance store, at least one EBS volume needs to be specified;\notherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.\nInstance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for\nscratch storage because heterogenously sized scratch devices can lead to inefficient disk\nutilization. If no EBS volumes are attached, Databricks will configure Spark to use instance\nstore volumes.\n\nPlease note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`\nwill be overridden." - }, - "ebs_volume_iops": { - "description": "If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_size": { - "description": "The size of each EBS volume (in GiB) launched for each instance. For general purpose\nSSD, this value must be within the range 100 - 4096. For throughput optimized HDD,\nthis value must be within the range 500 - 4096." - }, - "ebs_volume_throughput": { - "description": "If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_type": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nIf this value is greater than 0, the cluster driver node in particular will be placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "instance_profile_arn": { - "description": "Nodes for this cluster will only be placed on AWS instances with this instance profile. If\nommitted, nodes will be placed on instances without an IAM instance profile. The instance\nprofile must have previously been added to the Databricks environment by an account\nadministrator.\n\nThis feature may only be available to certain customer plans.\n\nIf this field is ommitted, we will pull in the default from the conf if it exists." - }, - "spot_bid_price_percent": { - "description": "The bid price for AWS spot instances, as a percentage of the corresponding instance type's\non-demand price.\nFor example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot\ninstance, then the bid price is half of the price of\non-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice\nthe price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.\nWhen spot instances are requested for this cluster, only spot instances whose bid price\npercentage matches this field will be considered.\nNote that, for safety, we enforce this field to be no more than 10000.\n\nThe default value and documentation here should be kept consistent with\nCommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent." - }, - "zone_id": { - "description": "Identifier for the availability zone/datacenter in which the cluster resides.\nThis string will be of a form like \"us-west-2a\". The provided availability\nzone must be in the same region as the Databricks deployment. For example, \"us-west-2a\"\nis not a valid zone id if the Databricks deployment resides in the \"us-east-1\" region.\nThis is an optional field at cluster creation, and if not specified, a default zone will be used.\nIf the zone specified is \"auto\", will try to place cluster in a zone with high availability,\nand will retry placement in a different AZ if there is not enough capacity.\nThe list of available zones as well as the default value can be found by using the\n`List Zones` method." - } - } - }, - "azure_attributes": { - "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nThis value should be greater than 0, to make sure the cluster driver node is placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "log_analytics_info": { - "description": "Defines values necessary to configure and run Azure Log Analytics agent", - "properties": { - "log_analytics_primary_key": { - "description": "\u003cneeds content added\u003e" - }, - "log_analytics_workspace_id": { - "description": "\u003cneeds content added\u003e" - } - } - }, - "spot_bid_max_price": { - "description": "The max bid price to be used for Azure spot instances.\nThe Max price for the bid cannot be higher than the on-demand price of the instance.\nIf not specified, the default value is -1, which specifies that the instance cannot be evicted\non the basis of price, and only on the basis of availability. Further, the value should \u003e 0 or -1." - } - } - }, - "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nTwo kinds of destinations (dbfs and s3) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", - "properties": { - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - } - } - }, - "cluster_name": { - "description": "Cluster name requested by the user. This doesn't have to be unique.\nIf not specified at creation, the cluster name will be an empty string.\n" - }, - "custom_tags": { - "description": "Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS\ninstances and EBS volumes) with these tags in addition to `default_tags`. Notes:\n\n- Currently, Databricks allows at most 45 custom tags\n\n- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags", - "additionalproperties": { - "description": "" - } - }, - "data_security_mode": { - "description": "" - }, - "docker_image": { - "description": "", - "properties": { - "basic_auth": { - "description": "", - "properties": { - "password": { - "description": "Password of the user" - }, - "username": { - "description": "Name of the user" - } - } - }, - "url": { - "description": "URL of the docker image." - } - } - }, - "driver_instance_pool_id": { - "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." - }, - "driver_node_type_id": { - "description": "The node type of the Spark driver. Note that this field is optional;\nif unset, the driver node type will be set as the same value\nas `node_type_id` defined above.\n" - }, - "enable_elastic_disk": { - "description": "Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk\nspace when its Spark workers are running low on disk space. This feature requires specific AWS\npermissions to function correctly - refer to the User Guide for more details." - }, - "enable_local_disk_encryption": { - "description": "Whether to enable LUKS on cluster VMs' local disks" - }, - "gcp_attributes": { - "description": "Attributes related to clusters running on Google Cloud Platform.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "boot_disk_size": { - "description": "boot disk size in GB" - }, - "google_service_account": { - "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." - }, - "local_ssd_count": { - "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." - }, - "use_preemptible_executors": { - "description": "This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default).\nNote: Soon to be deprecated, use the availability field instead." - }, - "zone_id": { - "description": "Identifier for the availability zone in which the cluster resides.\nThis can be one of the following:\n- \"HA\" =\u003e High availability, spread nodes across availability zones for a Databricks deployment region [default]\n- \"AUTO\" =\u003e Databricks picks an availability zone to schedule the cluster on.\n- A GCP availability zone =\u003e Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones." - } - } - }, - "init_scripts": { - "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", - "items": { - "description": "", - "properties": { - "abfss": { - "description": "destination needs to be provided. e.g.\n`{ \"abfss\" : { \"destination\" : \"abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e\" } }", - "properties": { - "destination": { - "description": "abfss destination, e.g. `abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e`." - } - } - }, - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "file": { - "description": "destination needs to be provided. e.g.\n`{ \"file\" : { \"destination\" : \"file:/my/local/file.sh\" } }`", - "properties": { - "destination": { - "description": "local file destination, e.g. `file:/my/local/file.sh`" - } - } - }, - "gcs": { - "description": "destination needs to be provided. e.g.\n`{ \"gcs\": { \"destination\": \"gs://my-bucket/file.sh\" } }`", - "properties": { - "destination": { - "description": "GCS destination/URI, e.g. `gs://my-bucket/some-prefix`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - }, - "volumes": { - "description": "destination needs to be provided. e.g.\n`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`" - } - } - }, - "workspace": { - "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" - } - } - } - } - } - }, - "instance_pool_id": { - "description": "The optional ID of the instance pool to which the cluster belongs." - }, - "node_type_id": { - "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n" - }, - "num_workers": { - "description": "Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned." - }, - "policy_id": { - "description": "The ID of the cluster policy used to create the cluster if applicable." - }, - "runtime_engine": { - "description": "" - }, - "single_user_name": { - "description": "Single user name if data_security_mode is `SINGLE_USER`" - }, - "spark_conf": { - "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", - "additionalproperties": { - "description": "" - } - }, - "spark_env_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs.\nPlease note that key-value pair of the form (X,Y) will be exported as is (i.e.,\n`export X='Y'`) while launching the driver and workers.\n\nIn order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending\nthem to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all\ndefault databricks managed environmental variables are included as well.\n\nExample Spark environment variables:\n`{\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"}` or\n`{\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true\"}`", - "additionalproperties": { - "description": "" - } - }, - "spark_version": { - "description": "The Spark version of the cluster, e.g. `3.3.x-scala2.11`.\nA list of available Spark versions can be retrieved by using\nthe :method:clusters/sparkVersions API call.\n" - }, - "ssh_public_keys": { - "description": "SSH public key contents that will be added to each Spark node in this cluster. The\ncorresponding private keys can be used to login with the user name `ubuntu` on port `2200`.\nUp to 10 keys can be specified.", - "items": { - "description": "" - } - }, - "workload_type": { - "description": "", - "properties": { - "clients": { - "description": " defined what type of clients can use the cluster. E.g. Notebooks, Jobs", - "properties": { - "jobs": { - "description": "With jobs set, the cluster can be used for jobs" - }, - "notebooks": { - "description": "With notebooks set, this cluster can be used for notebooks" - } - } - } - } - } - } - } - } - } - }, - "max_concurrent_runs": { - "description": "An optional maximum allowed number of concurrent runs of the job.\nSet this value if you want to be able to execute multiple runs of the same job concurrently.\nThis is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters.\nThis setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs.\nHowever, from then on, new runs are skipped unless there are fewer than 3 active runs.\nThis value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped." - }, - "name": { - "description": "An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding." - }, - "notification_settings": { - "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job.", - "properties": { - "no_alert_for_canceled_runs": { - "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." - }, - "no_alert_for_skipped_runs": { - "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." - } - } - }, - "parameters": { - "description": "Job-level parameter definitions", - "items": { - "description": "", - "properties": { - "default": { - "description": "Default value of the parameter." - }, - "name": { - "description": "The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.`" - } - } - } - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "queue": { - "description": "The queue settings of the job.", - "properties": { - "enabled": { - "description": "If true, enable queueing for the job. This is a required field." - } - } - }, - "run_as": { - "description": "", - "properties": { - "service_principal_name": { - "description": "Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role." - }, - "user_name": { - "description": "The email of an active workspace user. Non-admin users can only set this field to their own email." - } - } - }, - "schedule": { - "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", - "properties": { - "pause_status": { - "description": "Indicate whether this schedule is paused or not." - }, - "quartz_cron_expression": { - "description": "A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required." - }, - "timezone_id": { - "description": "A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required." - } - } - }, - "tags": { - "description": "A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job.", - "additionalproperties": { - "description": "" - } - }, - "tasks": { - "description": "A list of task specifications to be executed by this job.", - "items": { - "description": "", - "properties": { - "condition_task": { - "description": "If condition_task, specifies a condition with an outcome that can be used to control the execution of other tasks. Does not require a cluster to execute and does not support retries or notifications.", - "properties": { - "left": { - "description": "The left operand of the condition task. Can be either a string value or a job state or parameter reference." - }, - "op": { - "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison." - }, - "right": { - "description": "The right operand of the condition task. Can be either a string value or a job state or parameter reference." - } - } - }, - "dbt_task": { - "description": "If dbt_task, indicates that this must execute a dbt task. It requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse.", - "properties": { - "catalog": { - "description": "Optional name of the catalog to use. The value is the top level in the 3-level namespace of Unity Catalog (catalog / schema / relation). The catalog value can only be specified if a warehouse_id is specified. Requires dbt-databricks \u003e= 1.1.1." - }, - "commands": { - "description": "A list of dbt commands to execute. All commands must start with `dbt`. This parameter must not be empty. A maximum of up to 10 commands can be provided.", - "items": { - "description": "" - } - }, - "profiles_directory": { - "description": "Optional (relative) path to the profiles directory. Can only be specified if no warehouse_id is specified. If no warehouse_id is specified and this folder is unset, the root directory is used." - }, - "project_directory": { - "description": "Path to the project directory. Optional for Git sourced tasks, in which\ncase if no value is provided, the root of the Git repository is used." - }, - "schema": { - "description": "Optional schema to write to. This parameter is only used when a warehouse_id is also provided. If not provided, the `default` schema is used." - }, - "source": { - "description": "Optional location type of the project directory. When set to `WORKSPACE`, the project will be retrieved\nfrom the local Databricks workspace. When set to `GIT`, the project will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: Project is located in Databricks workspace.\n* `GIT`: Project is located in cloud Git provider." - }, - "warehouse_id": { - "description": "ID of the SQL warehouse to connect to. If provided, we automatically generate and provide the profile and connection details to dbt. It can be overridden on a per-command basis by using the `--profiles-dir` command line argument." - } - } - }, - "depends_on": { - "description": "An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true.\nThe key is `task_key`, and the value is the name assigned to the dependent task.", - "items": { - "description": "", - "properties": { - "outcome": { - "description": "Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run." - }, - "task_key": { - "description": "The name of the task this task depends on." - } - } - } - }, - "description": { - "description": "An optional description for this task." - }, - "disable_auto_optimization": { - "description": "An option to disable auto optimization in serverless" - }, - "email_notifications": { - "description": "An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails.", - "properties": { - "no_alert_for_skipped_runs": { - "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." - }, - "on_duration_warning_threshold_exceeded": { - "description": "A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent.", - "items": { - "description": "" - } - }, - "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - }, - "on_start": { - "description": "A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - }, - "on_streaming_backlog_exceeded": { - "description": "A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.", - "items": { - "description": "" - } - }, - "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - } - } - }, - "environment_key": { - "description": "The key that references an environment spec in a job. This field is required for Python script, Python wheel and dbt tasks when using serverless compute." - }, - "existing_cluster_id": { - "description": "If existing_cluster_id, the ID of an existing cluster that is used for all runs.\nWhen running jobs or tasks on an existing cluster, you may need to manually restart\nthe cluster if it stops responding. We suggest running jobs and tasks on new clusters for\ngreater reliability" - }, - "for_each_task": { - "description": "" - }, - "health": { - "description": "", - "properties": { - "rules": { - "description": "", - "items": { - "description": "", - "properties": { - "metric": { - "description": "" - }, - "op": { - "description": "" - }, - "value": { - "description": "Specifies the threshold value that the health metric should obey to satisfy the health rule." - } - } - } - } - } - }, - "job_cluster_key": { - "description": "If job_cluster_key, this task is executed reusing the cluster specified in `job.settings.job_clusters`." - }, - "libraries": { - "description": "An optional list of libraries to be installed on the cluster.\nThe default value is an empty list.", - "items": { - "description": "", - "properties": { - "cran": { - "description": "Specification of a CRAN library to be installed as part of the library", - "properties": { - "package": { - "description": "The name of the CRAN package to install." - }, - "repo": { - "description": "The repository where the package can be found. If not specified, the default CRAN repo is used." - } - } - }, - "egg": { - "description": "Deprecated. URI of the egg library to install. Installing Python egg files is deprecated and is not supported in Databricks Runtime 14.0 and above." - }, - "jar": { - "description": "URI of the JAR library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.\nFor example: `{ \"jar\": \"/Workspace/path/to/library.jar\" }`, `{ \"jar\" : \"/Volumes/path/to/library.jar\" }` or\n`{ \"jar\": \"s3://my-bucket/library.jar\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." - }, - "maven": { - "description": "Specification of a maven library to be installed. For example:\n`{ \"coordinates\": \"org.jsoup:jsoup:1.7.2\" }`", - "properties": { - "coordinates": { - "description": "Gradle-style maven coordinates. For example: \"org.jsoup:jsoup:1.7.2\"." - }, - "exclusions": { - "description": "List of dependences to exclude. For example: `[\"slf4j:slf4j\", \"*:hadoop-client\"]`.\n\nMaven dependency exclusions:\nhttps://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html.", - "items": { - "description": "" - } - }, - "repo": { - "description": "Maven repo to install the Maven package from. If omitted, both Maven Central Repository\nand Spark Packages are searched." - } - } - }, - "pypi": { - "description": "Specification of a PyPi library to be installed. For example:\n`{ \"package\": \"simplejson\" }`", - "properties": { - "package": { - "description": "The name of the pypi package to install. An optional exact version specification is also\nsupported. Examples: \"simplejson\" and \"simplejson==3.8.0\"." - }, - "repo": { - "description": "The repository where the package can be found. If not specified, the default pip index is\nused." - } - } - }, - "requirements": { - "description": "URI of the requirements.txt file to install. Only Workspace paths and Unity Catalog Volumes paths are supported.\nFor example: `{ \"requirements\": \"/Workspace/path/to/requirements.txt\" }` or `{ \"requirements\" : \"/Volumes/path/to/requirements.txt\" }`" - }, - "whl": { - "description": "URI of the wheel library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.\nFor example: `{ \"whl\": \"/Workspace/path/to/library.whl\" }`, `{ \"whl\" : \"/Volumes/path/to/library.whl\" }` or\n`{ \"whl\": \"s3://my-bucket/library.whl\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." - } - } - } - }, - "max_retries": { - "description": "An optional maximum number of times to retry an unsuccessful run. A run is considered to be unsuccessful if it completes with the `FAILED` result_state or `INTERNAL_ERROR` `life_cycle_state`. The value `-1` means to retry indefinitely and the value `0` means to never retry." - }, - "min_retry_interval_millis": { - "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried." - }, - "new_cluster": { - "description": "If new_cluster, a description of a new cluster that is created for each run.", - "properties": { - "apply_policy_default_values": { - "description": "When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied." - }, - "autoscale": { - "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", - "properties": { - "max_workers": { - "description": "The maximum number of workers to which the cluster can scale up when overloaded.\nNote that `max_workers` must be strictly greater than `min_workers`." - }, - "min_workers": { - "description": "The minimum number of workers to which the cluster can scale down when underutilized.\nIt is also the initial number of workers the cluster will have after creation." - } - } - }, - "autotermination_minutes": { - "description": "Automatically terminates the cluster after it is inactive for this time in minutes. If not set,\nthis cluster will not be automatically terminated. If specified, the threshold must be between\n10 and 10000 minutes.\nUsers can also set this value to 0 to explicitly disable automatic termination." - }, - "aws_attributes": { - "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "ebs_volume_count": { - "description": "The number of volumes launched for each instance. Users can choose up to 10 volumes.\nThis feature is only enabled for supported node types. Legacy node types cannot specify\ncustom EBS volumes.\nFor node types with no instance store, at least one EBS volume needs to be specified;\notherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.\nInstance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for\nscratch storage because heterogenously sized scratch devices can lead to inefficient disk\nutilization. If no EBS volumes are attached, Databricks will configure Spark to use instance\nstore volumes.\n\nPlease note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`\nwill be overridden." - }, - "ebs_volume_iops": { - "description": "If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_size": { - "description": "The size of each EBS volume (in GiB) launched for each instance. For general purpose\nSSD, this value must be within the range 100 - 4096. For throughput optimized HDD,\nthis value must be within the range 500 - 4096." - }, - "ebs_volume_throughput": { - "description": "If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_type": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nIf this value is greater than 0, the cluster driver node in particular will be placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "instance_profile_arn": { - "description": "Nodes for this cluster will only be placed on AWS instances with this instance profile. If\nommitted, nodes will be placed on instances without an IAM instance profile. The instance\nprofile must have previously been added to the Databricks environment by an account\nadministrator.\n\nThis feature may only be available to certain customer plans.\n\nIf this field is ommitted, we will pull in the default from the conf if it exists." - }, - "spot_bid_price_percent": { - "description": "The bid price for AWS spot instances, as a percentage of the corresponding instance type's\non-demand price.\nFor example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot\ninstance, then the bid price is half of the price of\non-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice\nthe price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.\nWhen spot instances are requested for this cluster, only spot instances whose bid price\npercentage matches this field will be considered.\nNote that, for safety, we enforce this field to be no more than 10000.\n\nThe default value and documentation here should be kept consistent with\nCommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent." - }, - "zone_id": { - "description": "Identifier for the availability zone/datacenter in which the cluster resides.\nThis string will be of a form like \"us-west-2a\". The provided availability\nzone must be in the same region as the Databricks deployment. For example, \"us-west-2a\"\nis not a valid zone id if the Databricks deployment resides in the \"us-east-1\" region.\nThis is an optional field at cluster creation, and if not specified, a default zone will be used.\nIf the zone specified is \"auto\", will try to place cluster in a zone with high availability,\nand will retry placement in a different AZ if there is not enough capacity.\nThe list of available zones as well as the default value can be found by using the\n`List Zones` method." - } - } - }, - "azure_attributes": { - "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nThis value should be greater than 0, to make sure the cluster driver node is placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "log_analytics_info": { - "description": "Defines values necessary to configure and run Azure Log Analytics agent", - "properties": { - "log_analytics_primary_key": { - "description": "\u003cneeds content added\u003e" - }, - "log_analytics_workspace_id": { - "description": "\u003cneeds content added\u003e" - } - } - }, - "spot_bid_max_price": { - "description": "The max bid price to be used for Azure spot instances.\nThe Max price for the bid cannot be higher than the on-demand price of the instance.\nIf not specified, the default value is -1, which specifies that the instance cannot be evicted\non the basis of price, and only on the basis of availability. Further, the value should \u003e 0 or -1." - } - } - }, - "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nTwo kinds of destinations (dbfs and s3) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", - "properties": { - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - } - } - }, - "cluster_name": { - "description": "Cluster name requested by the user. This doesn't have to be unique.\nIf not specified at creation, the cluster name will be an empty string.\n" - }, - "custom_tags": { - "description": "Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS\ninstances and EBS volumes) with these tags in addition to `default_tags`. Notes:\n\n- Currently, Databricks allows at most 45 custom tags\n\n- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags", - "additionalproperties": { - "description": "" - } - }, - "data_security_mode": { - "description": "" - }, - "docker_image": { - "description": "", - "properties": { - "basic_auth": { - "description": "", - "properties": { - "password": { - "description": "Password of the user" - }, - "username": { - "description": "Name of the user" - } - } - }, - "url": { - "description": "URL of the docker image." - } - } - }, - "driver_instance_pool_id": { - "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." - }, - "driver_node_type_id": { - "description": "The node type of the Spark driver. Note that this field is optional;\nif unset, the driver node type will be set as the same value\nas `node_type_id` defined above.\n" - }, - "enable_elastic_disk": { - "description": "Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk\nspace when its Spark workers are running low on disk space. This feature requires specific AWS\npermissions to function correctly - refer to the User Guide for more details." - }, - "enable_local_disk_encryption": { - "description": "Whether to enable LUKS on cluster VMs' local disks" - }, - "gcp_attributes": { - "description": "Attributes related to clusters running on Google Cloud Platform.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "boot_disk_size": { - "description": "boot disk size in GB" - }, - "google_service_account": { - "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." - }, - "local_ssd_count": { - "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." - }, - "use_preemptible_executors": { - "description": "This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default).\nNote: Soon to be deprecated, use the availability field instead." - }, - "zone_id": { - "description": "Identifier for the availability zone in which the cluster resides.\nThis can be one of the following:\n- \"HA\" =\u003e High availability, spread nodes across availability zones for a Databricks deployment region [default]\n- \"AUTO\" =\u003e Databricks picks an availability zone to schedule the cluster on.\n- A GCP availability zone =\u003e Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones." - } - } - }, - "init_scripts": { - "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", - "items": { - "description": "", - "properties": { - "abfss": { - "description": "destination needs to be provided. e.g.\n`{ \"abfss\" : { \"destination\" : \"abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e\" } }", - "properties": { - "destination": { - "description": "abfss destination, e.g. `abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e`." - } - } - }, - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "file": { - "description": "destination needs to be provided. e.g.\n`{ \"file\" : { \"destination\" : \"file:/my/local/file.sh\" } }`", - "properties": { - "destination": { - "description": "local file destination, e.g. `file:/my/local/file.sh`" - } - } - }, - "gcs": { - "description": "destination needs to be provided. e.g.\n`{ \"gcs\": { \"destination\": \"gs://my-bucket/file.sh\" } }`", - "properties": { - "destination": { - "description": "GCS destination/URI, e.g. `gs://my-bucket/some-prefix`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - }, - "volumes": { - "description": "destination needs to be provided. e.g.\n`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`" - } - } - }, - "workspace": { - "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" - } - } - } - } - } - }, - "instance_pool_id": { - "description": "The optional ID of the instance pool to which the cluster belongs." - }, - "node_type_id": { - "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n" - }, - "num_workers": { - "description": "Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned." - }, - "policy_id": { - "description": "The ID of the cluster policy used to create the cluster if applicable." - }, - "runtime_engine": { - "description": "" - }, - "single_user_name": { - "description": "Single user name if data_security_mode is `SINGLE_USER`" - }, - "spark_conf": { - "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", - "additionalproperties": { - "description": "" - } - }, - "spark_env_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs.\nPlease note that key-value pair of the form (X,Y) will be exported as is (i.e.,\n`export X='Y'`) while launching the driver and workers.\n\nIn order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending\nthem to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all\ndefault databricks managed environmental variables are included as well.\n\nExample Spark environment variables:\n`{\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"}` or\n`{\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true\"}`", - "additionalproperties": { - "description": "" - } - }, - "spark_version": { - "description": "The Spark version of the cluster, e.g. `3.3.x-scala2.11`.\nA list of available Spark versions can be retrieved by using\nthe :method:clusters/sparkVersions API call.\n" - }, - "ssh_public_keys": { - "description": "SSH public key contents that will be added to each Spark node in this cluster. The\ncorresponding private keys can be used to login with the user name `ubuntu` on port `2200`.\nUp to 10 keys can be specified.", - "items": { - "description": "" - } - }, - "workload_type": { - "description": "", - "properties": { - "clients": { - "description": " defined what type of clients can use the cluster. E.g. Notebooks, Jobs", - "properties": { - "jobs": { - "description": "With jobs set, the cluster can be used for jobs" - }, - "notebooks": { - "description": "With notebooks set, this cluster can be used for notebooks" - } - } - } - } - } - } - }, - "notebook_task": { - "description": "If notebook_task, indicates that this task must run a notebook. This field may not be specified in conjunction with spark_jar_task.", - "properties": { - "base_parameters": { - "description": "Base parameters to be used for each run of this job. If the run is initiated by a call to :method:jobs/run\nNow with parameters specified, the two parameters maps are merged. If the same key is specified in\n`base_parameters` and in `run-now`, the value from `run-now` is used.\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nIf the notebook takes a parameter that is not specified in the job’s `base_parameters` or the `run-now` override parameters,\nthe default value from the notebook is used.\n\nRetrieve these parameters in a notebook using [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets).\n\nThe JSON representation of this field cannot exceed 1MB.", - "additionalproperties": { - "description": "" - } - }, - "notebook_path": { - "description": "The path of the notebook to be run in the Databricks workspace or remote repository.\nFor notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash.\nFor notebooks stored in a remote repository, the path must be relative. This field is required." - }, - "source": { - "description": "Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved from the local Databricks workspace. When set to `GIT`, the notebook will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n* `WORKSPACE`: Notebook is located in Databricks workspace.\n* `GIT`: Notebook is located in cloud Git provider." - }, - "warehouse_id": { - "description": "Optional `warehouse_id` to run the notebook on a SQL warehouse. Classic SQL warehouses are NOT supported, please use serverless or pro SQL warehouses.\n\nNote that SQL warehouses only support SQL cells; if the notebook contains non-SQL cells, the run will fail." - } - } - }, - "notification_settings": { - "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this task.", - "properties": { - "alert_on_last_attempt": { - "description": "If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run." - }, - "no_alert_for_canceled_runs": { - "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." - }, - "no_alert_for_skipped_runs": { - "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." - } - } - }, - "pipeline_task": { - "description": "If pipeline_task, indicates that this task must execute a Pipeline.", - "properties": { - "full_refresh": { - "description": "If true, triggers a full refresh on the delta live table." - }, - "pipeline_id": { - "description": "The full name of the pipeline task to execute." - } - } - }, - "python_wheel_task": { - "description": "If python_wheel_task, indicates that this job must execute a PythonWheel.", - "properties": { - "entry_point": { - "description": "Named entry point to use, if it does not exist in the metadata of the package it executes the function from the package directly using `$packageName.$entryPoint()`" - }, - "named_parameters": { - "description": "Command-line parameters passed to Python wheel task in the form of `[\"--name=task\", \"--data=dbfs:/path/to/data.json\"]`. Leave it empty if `parameters` is not null.", - "additionalproperties": { - "description": "" - } - }, - "package_name": { - "description": "Name of the package to execute" - }, - "parameters": { - "description": "Command-line parameters passed to Python wheel task. Leave it empty if `named_parameters` is not null.", - "items": { - "description": "" - } - } - } - }, - "retry_on_timeout": { - "description": "An optional policy to specify whether to retry a job when it times out. The default behavior\nis to not retry on timeout." - }, - "run_if": { - "description": "An optional value specifying the condition determining whether the task is run once its dependencies have been completed.\n\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies have been completed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed" - }, - "run_job_task": { - "description": "If run_job_task, indicates that this task must execute another job.", - "properties": { - "dbt_commands": { - "description": "An array of commands to execute for jobs with the dbt task, for example `\"dbt_commands\": [\"dbt deps\", \"dbt seed\", \"dbt deps\", \"dbt seed\", \"dbt run\"]`", - "items": { - "description": "" - } - }, - "jar_params": { - "description": "A list of parameters for jobs with Spark JAR tasks, for example `\"jar_params\": [\"john doe\", \"35\"]`.\nThe parameters are used to invoke the main function of the main class specified in the Spark JAR task.\nIf not specified upon `run-now`, it defaults to an empty list.\njar_params cannot be specified in conjunction with notebook_params.\nThe JSON representation of this field (for example `{\"jar_params\":[\"john doe\",\"35\"]}`) cannot exceed 10,000 bytes.\n\nUse [Task parameter variables](/jobs.html\\\"#parameter-variables\\\") to set parameters containing information about job runs.", - "items": { - "description": "" - } - }, - "job_id": { - "description": "ID of the job to trigger." - }, - "job_parameters": { - "description": "Job-level parameters used to trigger the job.", - "additionalproperties": { - "description": "" - } - }, - "notebook_params": { - "description": "A map from keys to values for jobs with notebook task, for example `\"notebook_params\": {\"name\": \"john doe\", \"age\": \"35\"}`.\nThe map is passed to the notebook and is accessible through the [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html) function.\n\nIf not specified upon `run-now`, the triggered run uses the job’s base parameters.\n\nnotebook_params cannot be specified in conjunction with jar_params.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nThe JSON representation of this field (for example `{\"notebook_params\":{\"name\":\"john doe\",\"age\":\"35\"}}`) cannot exceed 10,000 bytes.", - "additionalproperties": { - "description": "" - } - }, - "pipeline_params": { - "description": "", - "properties": { - "full_refresh": { - "description": "If true, triggers a full refresh on the delta live table." - } - } - }, - "python_named_params": { - "description": "", - "additionalproperties": { - "description": "" - } - }, - "python_params": { - "description": "A list of parameters for jobs with Python tasks, for example `\"python_params\": [\"john doe\", \"35\"]`.\nThe parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it would overwrite\nthe parameters specified in job setting. The JSON representation of this field (for example `{\"python_params\":[\"john doe\",\"35\"]}`)\ncannot exceed 10,000 bytes.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nImportant\n\nThese parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error.\nExamples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis.", - "items": { - "description": "" - } - }, - "spark_submit_params": { - "description": "A list of parameters for jobs with spark submit task, for example `\"spark_submit_params\": [\"--class\", \"org.apache.spark.examples.SparkPi\"]`.\nThe parameters are passed to spark-submit script as command-line parameters. If specified upon `run-now`, it would overwrite the\nparameters specified in job setting. The JSON representation of this field (for example `{\"python_params\":[\"john doe\",\"35\"]}`)\ncannot exceed 10,000 bytes.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs\n\nImportant\n\nThese parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error.\nExamples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis.", - "items": { - "description": "" - } - }, - "sql_params": { - "description": "A map from keys to values for jobs with SQL task, for example `\"sql_params\": {\"name\": \"john doe\", \"age\": \"35\"}`. The SQL alert task does not support custom parameters.", - "additionalproperties": { - "description": "" - } - } - } - }, - "spark_jar_task": { - "description": "If spark_jar_task, indicates that this task must run a JAR.", - "properties": { - "jar_uri": { - "description": "Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create." - }, - "main_class_name": { - "description": "The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library.\n\nThe code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail." - }, - "parameters": { - "description": "Parameters passed to the main method.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", - "items": { - "description": "" - } - } - } - }, - "spark_python_task": { - "description": "If spark_python_task, indicates that this task must run a Python file.", - "properties": { - "parameters": { - "description": "Command line parameters passed to the Python file.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", - "items": { - "description": "" - } - }, - "python_file": { - "description": "The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required." - }, - "source": { - "description": "Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved from the local\nDatabricks workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a Databricks workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository." - } - } - }, - "spark_submit_task": { - "description": "If `spark_submit_task`, indicates that this task must be launched by the spark submit script. This task can run only on new clusters.\n\nIn the `new_cluster` specification, `libraries` and `spark_conf` are not supported. Instead, use `--jars` and `--py-files` to add Java and Python libraries and `--conf` to set the Spark configurations.\n\n`master`, `deploy-mode`, and `executor-cores` are automatically configured by Databricks; you _cannot_ specify them in parameters.\n\nBy default, the Spark submit job uses all available memory (excluding reserved memory for Databricks services). You can set `--driver-memory`, and `--executor-memory` to a smaller value to leave some room for off-heap usage.\n\nThe `--jars`, `--py-files`, `--files` arguments support DBFS and S3 paths.", - "properties": { - "parameters": { - "description": "Command-line parameters passed to spark submit.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", - "items": { - "description": "" - } - } - } - }, - "sql_task": { - "description": "If sql_task, indicates that this job must execute a SQL task.", - "properties": { - "alert": { - "description": "If alert, indicates that this job must refresh a SQL alert.", - "properties": { - "alert_id": { - "description": "The canonical identifier of the SQL alert." - }, - "pause_subscriptions": { - "description": "If true, the alert notifications are not sent to subscribers." - }, - "subscriptions": { - "description": "If specified, alert notifications are sent to subscribers.", - "items": { - "description": "", - "properties": { - "destination_id": { - "description": "The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications." - }, - "user_name": { - "description": "The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications." - } - } - } - } - } - }, - "dashboard": { - "description": "If dashboard, indicates that this job must refresh a SQL dashboard.", - "properties": { - "custom_subject": { - "description": "Subject of the email sent to subscribers of this task." - }, - "dashboard_id": { - "description": "The canonical identifier of the SQL dashboard." - }, - "pause_subscriptions": { - "description": "If true, the dashboard snapshot is not taken, and emails are not sent to subscribers." - }, - "subscriptions": { - "description": "If specified, dashboard snapshots are sent to subscriptions.", - "items": { - "description": "", - "properties": { - "destination_id": { - "description": "The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications." - }, - "user_name": { - "description": "The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications." - } - } - } - } - } - }, - "file": { - "description": "If file, indicates that this job runs a SQL file in a remote Git repository.", - "properties": { - "path": { - "description": "Path of the SQL file. Must be relative if the source is a remote Git repository and absolute for workspace paths." - }, - "source": { - "description": "Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\nfrom the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: SQL file is located in Databricks workspace.\n* `GIT`: SQL file is located in cloud Git provider." - } - } - }, - "parameters": { - "description": "Parameters to be used for each run of this job. The SQL alert task does not support custom parameters.", - "additionalproperties": { - "description": "" - } - }, - "query": { - "description": "If query, indicates that this job must execute a SQL query.", - "properties": { - "query_id": { - "description": "The canonical identifier of the SQL query." - } - } - }, - "warehouse_id": { - "description": "The canonical identifier of the SQL warehouse. Recommended to use with serverless or pro SQL warehouses. Classic SQL warehouses are only supported for SQL alert, dashboard and query tasks and are limited to scheduled single-task jobs." - } - } - }, - "task_key": { - "description": "A unique name for the task. This field is used to refer to this task from other tasks.\nThis field is required and must be unique within its parent job.\nOn Update or Reset, this field is used to reference the tasks to be updated or reset." - }, - "timeout_seconds": { - "description": "An optional timeout applied to each run of this job task. A value of `0` means no timeout." - }, - "webhook_notifications": { - "description": "A collection of system notification IDs to notify when runs of this task begin or complete. The default behavior is to not send any system notifications.", - "properties": { - "on_duration_warning_threshold_exceeded": { - "description": "An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_failure": { - "description": "An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_start": { - "description": "An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_streaming_backlog_exceeded": { - "description": "An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.\nA maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_success": { - "description": "An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - } - } - } - } - } - }, - "timeout_seconds": { - "description": "An optional timeout applied to each run of this job. A value of `0` means no timeout." - }, - "trigger": { - "description": "A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", - "properties": { - "file_arrival": { - "description": "File arrival trigger settings.", - "properties": { - "min_time_between_triggers_seconds": { - "description": "If set, the trigger starts a run only after the specified amount of time passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds" - }, - "url": { - "description": "URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location." - }, - "wait_after_last_change_seconds": { - "description": "If set, the trigger starts a run only after no file activity has occurred for the specified amount of time.\nThis makes it possible to wait for a batch of incoming files to arrive before triggering a run. The\nminimum allowed value is 60 seconds." - } - } - }, - "pause_status": { - "description": "Whether this trigger is paused or not." - }, - "periodic": { - "description": "Periodic trigger settings.", - "properties": { - "interval": { - "description": "The interval at which the trigger should run." - }, - "unit": { - "description": "The unit of time for the interval." - } - } - }, - "table": { - "description": "Old table trigger settings name. Deprecated in favor of `table_update`.", - "properties": { - "condition": { - "description": "The table(s) condition based on which to trigger a job run." - }, - "min_time_between_triggers_seconds": { - "description": "If set, the trigger starts a run only after the specified amount of time has passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds." - }, - "table_names": { - "description": "A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`.", - "items": { - "description": "" - } - }, - "wait_after_last_change_seconds": { - "description": "If set, the trigger starts a run only after no table updates have occurred for the specified time\nand can be used to wait for a series of table updates before triggering a run. The\nminimum allowed value is 60 seconds." - } - } - }, - "table_update": { - "description": "", - "properties": { - "condition": { - "description": "The table(s) condition based on which to trigger a job run." - }, - "min_time_between_triggers_seconds": { - "description": "If set, the trigger starts a run only after the specified amount of time has passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds." - }, - "table_names": { - "description": "A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`.", - "items": { - "description": "" - } - }, - "wait_after_last_change_seconds": { - "description": "If set, the trigger starts a run only after no table updates have occurred for the specified time\nand can be used to wait for a series of table updates before triggering a run. The\nminimum allowed value is 60 seconds." - } - } - } - } - }, - "webhook_notifications": { - "description": "A collection of system notification IDs to notify when runs of this job begin or complete.", - "properties": { - "on_duration_warning_threshold_exceeded": { - "description": "An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_failure": { - "description": "An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_start": { - "description": "An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_streaming_backlog_exceeded": { - "description": "An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.\nA maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_success": { - "description": "An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - } - } - } - } - } - }, - "model_serving_endpoints": { - "description": "List of Model Serving Endpoints", - "additionalproperties": { - "description": "", - "properties": { - "config": { - "description": "The core config of the serving endpoint.", - "properties": { - "auto_capture_config": { - "description": "Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.", - "properties": { - "catalog_name": { - "description": "The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled." - }, - "enabled": { - "description": "Indicates whether the inference table is enabled." - }, - "schema_name": { - "description": "The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled." - }, - "table_name_prefix": { - "description": "The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled." - } - } - }, - "served_entities": { - "description": "A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served entities.", - "items": { - "description": "", - "properties": { - "entity_name": { - "description": "The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC),\nor a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of\n__catalog_name__.__schema_name__.__model_name__.\n" - }, - "entity_version": { - "description": "The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC." - }, - "environment_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity.\nNote: this is an experimental feature and subject to change. \nExample entity environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`", - "additionalproperties": { - "description": "" - } - }, - "external_model": { - "description": "The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled)\ncan be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model,\nit cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.\nThe task type of all external models within an endpoint must be the same.\n", - "properties": { - "ai21labs_config": { - "description": "AI21Labs Config. Only required if the provider is 'ai21labs'.", - "properties": { - "ai21labs_api_key": { - "description": "The Databricks secret key reference for an AI21 Labs API key. If you prefer to paste your API key directly, see `ai21labs_api_key_plaintext`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`." - }, - "ai21labs_api_key_plaintext": { - "description": "An AI21 Labs API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `ai21labs_api_key`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`." - } - } - }, - "amazon_bedrock_config": { - "description": "Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'.", - "properties": { - "aws_access_key_id": { - "description": "The Databricks secret key reference for an AWS access key ID with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`." - }, - "aws_access_key_id_plaintext": { - "description": "An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`." - }, - "aws_region": { - "description": "The AWS region to use. Bedrock has to be enabled there." - }, - "aws_secret_access_key": { - "description": "The Databricks secret key reference for an AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_secret_access_key_plaintext`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`." - }, - "aws_secret_access_key_plaintext": { - "description": "An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_secret_access_key`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`." - }, - "bedrock_provider": { - "description": "The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon." - } - } - }, - "anthropic_config": { - "description": "Anthropic Config. Only required if the provider is 'anthropic'.", - "properties": { - "anthropic_api_key": { - "description": "The Databricks secret key reference for an Anthropic API key. If you prefer to paste your API key directly, see `anthropic_api_key_plaintext`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`." - }, - "anthropic_api_key_plaintext": { - "description": "The Anthropic API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `anthropic_api_key`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`." - } - } - }, - "cohere_config": { - "description": "Cohere Config. Only required if the provider is 'cohere'.", - "properties": { - "cohere_api_base": { - "description": "This is an optional field to provide a customized base URL for the Cohere API. \nIf left unspecified, the standard Cohere base URL is used.\n" - }, - "cohere_api_key": { - "description": "The Databricks secret key reference for a Cohere API key. If you prefer to paste your API key directly, see `cohere_api_key_plaintext`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`." - }, - "cohere_api_key_plaintext": { - "description": "The Cohere API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `cohere_api_key`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`." - } - } - }, - "databricks_model_serving_config": { - "description": "Databricks Model Serving Config. Only required if the provider is 'databricks-model-serving'.", - "properties": { - "databricks_api_token": { - "description": "The Databricks secret key reference for a Databricks API token that corresponds to a user or service\nprincipal with Can Query access to the model serving endpoint pointed to by this external model.\nIf you prefer to paste your API key directly, see `databricks_api_token_plaintext`.\nYou must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`.\n" - }, - "databricks_api_token_plaintext": { - "description": "The Databricks API token that corresponds to a user or service\nprincipal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.\nIf you prefer to reference your key using Databricks Secrets, see `databricks_api_token`.\nYou must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`.\n" - }, - "databricks_workspace_url": { - "description": "The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.\n" - } - } - }, - "google_cloud_vertex_ai_config": { - "description": "Google Cloud Vertex AI Config. Only required if the provider is 'google-cloud-vertex-ai'.", - "properties": { - "private_key": { - "description": "The Databricks secret key reference for a private key for the service account which has access to the Google Cloud Vertex AI Service. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to paste your API key directly, see `private_key_plaintext`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`" - }, - "private_key_plaintext": { - "description": "The private key for the service account which has access to the Google Cloud Vertex AI Service provided as a plaintext secret. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to reference your key using Databricks Secrets, see `private_key`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`." - }, - "project_id": { - "description": "This is the Google Cloud project id that the service account is associated with." - }, - "region": { - "description": "This is the region for the Google Cloud Vertex AI Service. See [supported regions](https://cloud.google.com/vertex-ai/docs/general/locations) for more details. Some models are only available in specific regions." - } - } - }, - "name": { - "description": "The name of the external model." - }, - "openai_config": { - "description": "OpenAI Config. Only required if the provider is 'openai'.", - "properties": { - "microsoft_entra_client_id": { - "description": "This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.\n" - }, - "microsoft_entra_client_secret": { - "description": "The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.\nIf you prefer to paste your client secret directly, see `microsoft_entra_client_secret_plaintext`.\nYou must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`.\n" - }, - "microsoft_entra_client_secret_plaintext": { - "description": "The client secret used for Microsoft Entra ID authentication provided as a plaintext string.\nIf you prefer to reference your key using Databricks Secrets, see `microsoft_entra_client_secret`.\nYou must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`.\n" - }, - "microsoft_entra_tenant_id": { - "description": "This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.\n" - }, - "openai_api_base": { - "description": "This is a field to provide a customized base URl for the OpenAI API.\nFor Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service\nprovided by Azure.\nFor other OpenAI API types, this field is optional, and if left unspecified, the standard OpenAI base URL is used.\n" - }, - "openai_api_key": { - "description": "The Databricks secret key reference for an OpenAI API key using the OpenAI or Azure service. If you prefer to paste your API key directly, see `openai_api_key_plaintext`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`." - }, - "openai_api_key_plaintext": { - "description": "The OpenAI API key using the OpenAI or Azure service provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `openai_api_key`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`." - }, - "openai_api_type": { - "description": "This is an optional field to specify the type of OpenAI API to use.\nFor Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security\naccess validation protocol. For access token validation, use azure. For authentication using Azure Active\nDirectory (Azure AD) use, azuread.\n" - }, - "openai_api_version": { - "description": "This is an optional field to specify the OpenAI API version.\nFor Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to\nutilize, specified by a date.\n" - }, - "openai_deployment_name": { - "description": "This field is only required for Azure OpenAI and is the name of the deployment resource for the\nAzure OpenAI service.\n" - }, - "openai_organization": { - "description": "This is an optional field to specify the organization in OpenAI or Azure OpenAI.\n" - } - } - }, - "palm_config": { - "description": "PaLM Config. Only required if the provider is 'palm'.", - "properties": { - "palm_api_key": { - "description": "The Databricks secret key reference for a PaLM API key. If you prefer to paste your API key directly, see `palm_api_key_plaintext`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`." - }, - "palm_api_key_plaintext": { - "description": "The PaLM API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `palm_api_key`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`." - } - } - }, - "provider": { - "description": "The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic',\n'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'.\",\n" - }, - "task": { - "description": "The task type of the external model." - } - } - }, - "instance_profile_arn": { - "description": "ARN of the instance profile that the served entity uses to access AWS resources." - }, - "max_provisioned_throughput": { - "description": "The maximum tokens per second that the endpoint can scale up to." - }, - "min_provisioned_throughput": { - "description": "The minimum tokens per second that the endpoint can scale down to." - }, - "name": { - "description": "The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores.\nIf not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other\nentities, it defaults to \u003centity-name\u003e-\u003centity-version\u003e.\n" - }, - "scale_to_zero_enabled": { - "description": "Whether the compute resources for the served entity should scale down to zero." - }, - "workload_size": { - "description": "The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.\n" - }, - "workload_type": { - "description": "The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n\"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n" - } - } - } - }, - "served_models": { - "description": "(Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A serving endpoint can have up to 15 served models.", - "items": { - "description": "", - "properties": { - "environment_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this model.\nNote: this is an experimental feature and subject to change. \nExample model environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`", - "additionalproperties": { - "description": "" - } - }, - "instance_profile_arn": { - "description": "ARN of the instance profile that the served model will use to access AWS resources." - }, - "max_provisioned_throughput": { - "description": "The maximum tokens per second that the endpoint can scale up to." - }, - "min_provisioned_throughput": { - "description": "The minimum tokens per second that the endpoint can scale down to." - }, - "model_name": { - "description": "The name of the model in Databricks Model Registry to be served or if the model resides in Unity Catalog, the full name of model,\nin the form of __catalog_name__.__schema_name__.__model_name__.\n" - }, - "model_version": { - "description": "The version of the model in Databricks Model Registry or Unity Catalog to be served." - }, - "name": { - "description": "The name of a served model. It must be unique across an endpoint. If not specified, this field will default to \u003cmodel-name\u003e-\u003cmodel-version\u003e.\nA served model name can consist of alphanumeric characters, dashes, and underscores.\n" - }, - "scale_to_zero_enabled": { - "description": "Whether the compute resources for the served model should scale down to zero." - }, - "workload_size": { - "description": "The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0.\n" - }, - "workload_type": { - "description": "The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n\"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n" - } - } - } - }, - "traffic_config": { - "description": "The traffic config defining how invocations to the serving endpoint should be routed.", - "properties": { - "routes": { - "description": "The list of routes that define traffic to each served entity.", - "items": { - "description": "", - "properties": { - "served_model_name": { - "description": "The name of the served model this route configures traffic for." - }, - "traffic_percentage": { - "description": "The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive." - } - } - } - } - } - } - } - }, - "name": { - "description": "The name of the serving endpoint. This field is required and must be unique across a Databricks workspace.\nAn endpoint name can consist of alphanumeric characters, dashes, and underscores.\n" - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "rate_limits": { - "description": "Rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.", - "items": { - "description": "", - "properties": { - "calls": { - "description": "Used to specify how many calls are allowed for a key within the renewal_period." - }, - "key": { - "description": "Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified." - }, - "renewal_period": { - "description": "Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported." - } - } - } - }, - "route_optimized": { - "description": "Enable route optimization for the serving endpoint." - }, - "tags": { - "description": "Tags to be attached to the serving endpoint and automatically propagated to billing logs.", - "items": { - "description": "", - "properties": { - "key": { - "description": "Key field for a serving endpoint tag." - }, - "value": { - "description": "Optional value field for a serving endpoint tag." - } - } - } - } - } - } - }, - "models": { - "description": "List of MLflow models", - "additionalproperties": { - "description": "", - "properties": { - "creation_timestamp": { - "description": "Timestamp recorded when this `registered_model` was created." - }, - "description": { - "description": "Description of this `registered_model`." - }, - "last_updated_timestamp": { - "description": "Timestamp recorded when metadata for this `registered_model` was last updated." - }, - "latest_versions": { - "description": "Collection of latest model versions for each stage.\nOnly contains models with current `READY` status.", - "items": { - "description": "", - "properties": { - "creation_timestamp": { - "description": "Timestamp recorded when this `model_version` was created." - }, - "current_stage": { - "description": "Current stage for this `model_version`." - }, - "description": { - "description": "Description of this `model_version`." - }, - "last_updated_timestamp": { - "description": "Timestamp recorded when metadata for this `model_version` was last updated." - }, - "name": { - "description": "Unique name of the model" - }, - "run_id": { - "description": "MLflow run ID used when creating `model_version`, if `source` was generated by an\nexperiment run stored in MLflow tracking server." - }, - "run_link": { - "description": "Run Link: Direct link to the run that generated this version" - }, - "source": { - "description": "URI indicating the location of the source model artifacts, used when creating `model_version`" - }, - "status": { - "description": "Current status of `model_version`" - }, - "status_message": { - "description": "Details on current `status`, if it is pending or failed." - }, - "tags": { - "description": "Tags: Additional metadata key-value pairs for this `model_version`.", - "items": { - "description": "", - "properties": { - "key": { - "description": "The tag key." - }, - "value": { - "description": "The tag value." - } - } - } - }, - "user_id": { - "description": "User that created this `model_version`." - }, - "version": { - "description": "Model's version number." - } - } - } - }, - "name": { - "description": "Unique name for the model." - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "tags": { - "description": "Tags: Additional metadata key-value pairs for this `registered_model`.", - "items": { - "description": "", - "properties": { - "key": { - "description": "The tag key." - }, - "value": { - "description": "The tag value." - } - } - } - }, - "user_id": { - "description": "User that created this `registered_model`" - } - } - } - }, - "pipelines": { - "description": "List of DLT pipelines", - "additionalproperties": { - "description": "", - "properties": { - "catalog": { - "description": "A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog." - }, - "channel": { - "description": "DLT Release Channel that specifies which version to use." - }, - "clusters": { - "description": "Cluster settings for this pipeline deployment.", - "items": { - "description": "", - "properties": { - "apply_policy_default_values": { - "description": "Note: This field won't be persisted. Only API users will check this field." - }, - "autoscale": { - "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", - "properties": { - "max_workers": { - "description": "The maximum number of workers to which the cluster can scale up when overloaded. `max_workers` must be strictly greater than `min_workers`." - }, - "min_workers": { - "description": "The minimum number of workers the cluster can scale down to when underutilized.\nIt is also the initial number of workers the cluster will have after creation." - }, - "mode": { - "description": "Databricks Enhanced Autoscaling optimizes cluster utilization by automatically\nallocating cluster resources based on workload volume, with minimal impact to\nthe data processing latency of your pipelines. Enhanced Autoscaling is available\nfor `updates` clusters only. The legacy autoscaling feature is used for `maintenance`\nclusters.\n" - } - } - }, - "aws_attributes": { - "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "ebs_volume_count": { - "description": "The number of volumes launched for each instance. Users can choose up to 10 volumes.\nThis feature is only enabled for supported node types. Legacy node types cannot specify\ncustom EBS volumes.\nFor node types with no instance store, at least one EBS volume needs to be specified;\notherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.\nInstance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for\nscratch storage because heterogenously sized scratch devices can lead to inefficient disk\nutilization. If no EBS volumes are attached, Databricks will configure Spark to use instance\nstore volumes.\n\nPlease note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`\nwill be overridden." - }, - "ebs_volume_iops": { - "description": "If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_size": { - "description": "The size of each EBS volume (in GiB) launched for each instance. For general purpose\nSSD, this value must be within the range 100 - 4096. For throughput optimized HDD,\nthis value must be within the range 500 - 4096." - }, - "ebs_volume_throughput": { - "description": "If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_type": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nIf this value is greater than 0, the cluster driver node in particular will be placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "instance_profile_arn": { - "description": "Nodes for this cluster will only be placed on AWS instances with this instance profile. If\nommitted, nodes will be placed on instances without an IAM instance profile. The instance\nprofile must have previously been added to the Databricks environment by an account\nadministrator.\n\nThis feature may only be available to certain customer plans.\n\nIf this field is ommitted, we will pull in the default from the conf if it exists." - }, - "spot_bid_price_percent": { - "description": "The bid price for AWS spot instances, as a percentage of the corresponding instance type's\non-demand price.\nFor example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot\ninstance, then the bid price is half of the price of\non-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice\nthe price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.\nWhen spot instances are requested for this cluster, only spot instances whose bid price\npercentage matches this field will be considered.\nNote that, for safety, we enforce this field to be no more than 10000.\n\nThe default value and documentation here should be kept consistent with\nCommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent." - }, - "zone_id": { - "description": "Identifier for the availability zone/datacenter in which the cluster resides.\nThis string will be of a form like \"us-west-2a\". The provided availability\nzone must be in the same region as the Databricks deployment. For example, \"us-west-2a\"\nis not a valid zone id if the Databricks deployment resides in the \"us-east-1\" region.\nThis is an optional field at cluster creation, and if not specified, a default zone will be used.\nIf the zone specified is \"auto\", will try to place cluster in a zone with high availability,\nand will retry placement in a different AZ if there is not enough capacity.\nThe list of available zones as well as the default value can be found by using the\n`List Zones` method." - } - } - }, - "azure_attributes": { - "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nThis value should be greater than 0, to make sure the cluster driver node is placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "log_analytics_info": { - "description": "Defines values necessary to configure and run Azure Log Analytics agent", - "properties": { - "log_analytics_primary_key": { - "description": "\u003cneeds content added\u003e" - }, - "log_analytics_workspace_id": { - "description": "\u003cneeds content added\u003e" - } - } - }, - "spot_bid_max_price": { - "description": "The max bid price to be used for Azure spot instances.\nThe Max price for the bid cannot be higher than the on-demand price of the instance.\nIf not specified, the default value is -1, which specifies that the instance cannot be evicted\non the basis of price, and only on the basis of availability. Further, the value should \u003e 0 or -1." - } - } - }, - "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nOnly dbfs destinations are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.\n", - "properties": { - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - } - } - }, - "custom_tags": { - "description": "Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS\ninstances and EBS volumes) with these tags in addition to `default_tags`. Notes:\n\n- Currently, Databricks allows at most 45 custom tags\n\n- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags", - "additionalproperties": { - "description": "" - } - }, - "driver_instance_pool_id": { - "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." - }, - "driver_node_type_id": { - "description": "The node type of the Spark driver.\nNote that this field is optional; if unset, the driver node type will be set as the same value\nas `node_type_id` defined above." - }, - "enable_local_disk_encryption": { - "description": "Whether to enable local disk encryption for the cluster." - }, - "gcp_attributes": { - "description": "Attributes related to clusters running on Google Cloud Platform.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "boot_disk_size": { - "description": "boot disk size in GB" - }, - "google_service_account": { - "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." - }, - "local_ssd_count": { - "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." - }, - "use_preemptible_executors": { - "description": "This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default).\nNote: Soon to be deprecated, use the availability field instead." - }, - "zone_id": { - "description": "Identifier for the availability zone in which the cluster resides.\nThis can be one of the following:\n- \"HA\" =\u003e High availability, spread nodes across availability zones for a Databricks deployment region [default]\n- \"AUTO\" =\u003e Databricks picks an availability zone to schedule the cluster on.\n- A GCP availability zone =\u003e Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones." - } - } - }, - "init_scripts": { - "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", - "items": { - "description": "", - "properties": { - "abfss": { - "description": "destination needs to be provided. e.g.\n`{ \"abfss\" : { \"destination\" : \"abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e\" } }", - "properties": { - "destination": { - "description": "abfss destination, e.g. `abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e`." - } - } - }, - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "file": { - "description": "destination needs to be provided. e.g.\n`{ \"file\" : { \"destination\" : \"file:/my/local/file.sh\" } }`", - "properties": { - "destination": { - "description": "local file destination, e.g. `file:/my/local/file.sh`" - } - } - }, - "gcs": { - "description": "destination needs to be provided. e.g.\n`{ \"gcs\": { \"destination\": \"gs://my-bucket/file.sh\" } }`", - "properties": { - "destination": { - "description": "GCS destination/URI, e.g. `gs://my-bucket/some-prefix`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - }, - "volumes": { - "description": "destination needs to be provided. e.g.\n`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`" - } - } - }, - "workspace": { - "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" - } - } - } - } - } - }, - "instance_pool_id": { - "description": "The optional ID of the instance pool to which the cluster belongs." - }, - "label": { - "description": "A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`." - }, - "node_type_id": { - "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n" - }, - "num_workers": { - "description": "Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned." - }, - "policy_id": { - "description": "The ID of the cluster policy used to create the cluster if applicable." - }, - "spark_conf": { - "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nSee :method:clusters/create for more details.\n", - "additionalproperties": { - "description": "" - } - }, - "spark_env_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs.\nPlease note that key-value pair of the form (X,Y) will be exported as is (i.e.,\n`export X='Y'`) while launching the driver and workers.\n\nIn order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending\nthem to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all\ndefault databricks managed environmental variables are included as well.\n\nExample Spark environment variables:\n`{\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"}` or\n`{\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true\"}`", - "additionalproperties": { - "description": "" - } - }, - "ssh_public_keys": { - "description": "SSH public key contents that will be added to each Spark node in this cluster. The\ncorresponding private keys can be used to login with the user name `ubuntu` on port `2200`.\nUp to 10 keys can be specified.", - "items": { - "description": "" - } - } - } - } - }, - "configuration": { - "description": "String-String configuration for this pipeline execution.", - "additionalproperties": { - "description": "" - } - }, - "continuous": { - "description": "Whether the pipeline is continuous or triggered. This replaces `trigger`." - }, - "deployment": { - "description": "Deployment type of this pipeline.", - "properties": { - "kind": { - "description": "The deployment method that manages the pipeline." - }, - "metadata_file_path": { - "description": "The path to the file containing metadata about the deployment." - } - } - }, - "development": { - "description": "Whether the pipeline is in Development mode. Defaults to false." - }, - "edition": { - "description": "Pipeline product edition." - }, - "filters": { - "description": "Filters on which Pipeline packages to include in the deployed graph.", - "properties": { - "exclude": { - "description": "Paths to exclude.", - "items": { - "description": "" - } - }, - "include": { - "description": "Paths to include.", - "items": { - "description": "" - } - } - } - }, - "gateway_definition": { - "description": "The definition of a gateway pipeline to support CDC.", - "properties": { - "connection_id": { - "description": "Immutable. The Unity Catalog connection this gateway pipeline uses to communicate with the source." - }, - "gateway_storage_catalog": { - "description": "Required, Immutable. The name of the catalog for the gateway pipeline's storage location." - }, - "gateway_storage_name": { - "description": "Optional. The Unity Catalog-compatible name for the gateway storage location.\nThis is the destination to use for the data that is extracted by the gateway.\nDelta Live Tables system will automatically create the storage location under the catalog and schema.\n" - }, - "gateway_storage_schema": { - "description": "Required, Immutable. The name of the schema for the gateway pipelines's storage location." - } - } - }, - "id": { - "description": "Unique identifier for this pipeline." - }, - "ingestion_definition": { - "description": "The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings.", - "properties": { - "connection_name": { - "description": "Immutable. The Unity Catalog connection this ingestion pipeline uses to communicate with the source. Specify either ingestion_gateway_id or connection_name." - }, - "ingestion_gateway_id": { - "description": "Immutable. Identifier for the ingestion gateway used by this ingestion pipeline to communicate with the source. Specify either ingestion_gateway_id or connection_name." - }, - "objects": { - "description": "Required. Settings specifying tables to replicate and the destination for the replicated tables.", - "items": { - "description": "", - "properties": { - "schema": { - "description": "Select tables from a specific source schema.", - "properties": { - "destination_catalog": { - "description": "Required. Destination catalog to store tables." - }, - "destination_schema": { - "description": "Required. Destination schema to store tables in. Tables with the same name as the source tables are created in this destination schema. The pipeline fails If a table with the same name already exists." - }, - "source_catalog": { - "description": "The source catalog name. Might be optional depending on the type of source." - }, - "source_schema": { - "description": "Required. Schema name in the source database." - }, - "table_configuration": { - "description": "Configuration settings to control the ingestion of tables. These settings are applied to all tables in this schema and override the table_configuration defined in the IngestionPipelineDefinition object.", - "properties": { - "primary_keys": { - "description": "The primary key of the table used to apply changes.", - "items": { - "description": "" - } - }, - "salesforce_include_formula_fields": { - "description": "If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector" - }, - "scd_type": { - "description": "The SCD type to use to ingest the table." - } - } - } - } - }, - "table": { - "description": "Select tables from a specific source table.", - "properties": { - "destination_catalog": { - "description": "Required. Destination catalog to store table." - }, - "destination_schema": { - "description": "Required. Destination schema to store table." - }, - "destination_table": { - "description": "Optional. Destination table name. The pipeline fails If a table with that name already exists. If not set, the source table name is used." - }, - "source_catalog": { - "description": "Source catalog name. Might be optional depending on the type of source." - }, - "source_schema": { - "description": "Schema name in the source database. Might be optional depending on the type of source." - }, - "source_table": { - "description": "Required. Table name in the source database." - }, - "table_configuration": { - "description": "Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object and the SchemaSpec.", - "properties": { - "primary_keys": { - "description": "The primary key of the table used to apply changes.", - "items": { - "description": "" - } - }, - "salesforce_include_formula_fields": { - "description": "If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector" - }, - "scd_type": { - "description": "The SCD type to use to ingest the table." - } - } - } - } - } - } - } - }, - "table_configuration": { - "description": "Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline.", - "properties": { - "primary_keys": { - "description": "The primary key of the table used to apply changes.", - "items": { - "description": "" - } - }, - "salesforce_include_formula_fields": { - "description": "If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector" - }, - "scd_type": { - "description": "The SCD type to use to ingest the table." - } - } - } - } - }, - "libraries": { - "description": "Libraries or code needed by this deployment.", - "items": { - "description": "", - "properties": { - "file": { - "description": "The path to a file that defines a pipeline and is stored in the Databricks Repos.\n", - "properties": { - "path": { - "description": "The absolute path of the file." - } - } - }, - "jar": { - "description": "URI of the jar to be installed. Currently only DBFS is supported.\n" - }, - "maven": { - "description": "Specification of a maven library to be installed.\n", - "properties": { - "coordinates": { - "description": "Gradle-style maven coordinates. For example: \"org.jsoup:jsoup:1.7.2\"." - }, - "exclusions": { - "description": "List of dependences to exclude. For example: `[\"slf4j:slf4j\", \"*:hadoop-client\"]`.\n\nMaven dependency exclusions:\nhttps://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html.", - "items": { - "description": "" - } - }, - "repo": { - "description": "Maven repo to install the Maven package from. If omitted, both Maven Central Repository\nand Spark Packages are searched." - } - } - }, - "notebook": { - "description": "The path to a notebook that defines a pipeline and is stored in the Databricks workspace.\n", - "properties": { - "path": { - "description": "The absolute path of the notebook." - } - } - }, - "whl": { - "description": "URI of the whl to be installed." - } - } - } - }, - "name": { - "description": "Friendly identifier for this pipeline." - }, - "notifications": { - "description": "List of notification settings for this pipeline.", - "items": { - "description": "", - "properties": { - "alerts": { - "description": "A list of alerts that trigger the sending of notifications to the configured\ndestinations. The supported alerts are:\n\n* `on-update-success`: A pipeline update completes successfully.\n* `on-update-failure`: Each time a pipeline update fails.\n* `on-update-fatal-failure`: A pipeline update fails with a non-retryable (fatal) error.\n* `on-flow-failure`: A single data flow fails.\n", - "items": { - "description": "" - } - }, - "email_recipients": { - "description": "A list of email addresses notified when a configured alert is triggered.\n", - "items": { - "description": "" - } - } - } - } - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "photon": { - "description": "Whether Photon is enabled for this pipeline." - }, - "serverless": { - "description": "Whether serverless compute is enabled for this pipeline." - }, - "storage": { - "description": "DBFS root directory for storing checkpoints and tables." - }, - "target": { - "description": "Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`." - }, - "trigger": { - "description": "Which pipeline trigger to use. Deprecated: Use `continuous` instead.", - "properties": { - "cron": { - "description": "", - "properties": { - "quartz_cron_schedule": { - "description": "" - }, - "timezone_id": { - "description": "" - } - } - }, - "manual": { - "description": "" - } - } - } - } - } - }, - "quality_monitors": { - "description": "", - "additionalproperties": { - "description": "", - "properties": { - "assets_dir": { - "description": "" - }, - "baseline_table_name": { - "description": "" - }, - "custom_metrics": { - "description": "", - "items": { - "description": "", - "properties": { - "definition": { - "description": "" - }, - "input_columns": { - "description": "", - "items": { - "description": "" - } - }, - "name": { - "description": "" - }, - "output_data_type": { - "description": "" - }, - "type": { - "description": "" - } - } - } - }, - "data_classification_config": { - "description": "", - "properties": { - "enabled": { - "description": "" - } - } - }, - "inference_log": { - "description": "", - "properties": { - "granularities": { - "description": "", - "items": { - "description": "" - } - }, - "label_col": { - "description": "" - }, - "model_id_col": { - "description": "" - }, - "prediction_col": { - "description": "" - }, - "prediction_proba_col": { - "description": "" - }, - "problem_type": { - "description": "" - }, - "timestamp_col": { - "description": "" - } - } - }, - "notifications": { - "description": "", - "properties": { - "on_failure": { - "description": "", - "properties": { - "email_addresses": { - "description": "", - "items": { - "description": "" - } - } - } - }, - "on_new_classification_tag_detected": { - "description": "", - "properties": { - "email_addresses": { - "description": "", - "items": { - "description": "" - } - } - } - } - } - }, - "output_schema_name": { - "description": "" - }, - "schedule": { - "description": "", - "properties": { - "pause_status": { - "description": "" - }, - "quartz_cron_expression": { - "description": "" - }, - "timezone_id": { - "description": "" - } - } - }, - "skip_builtin_dashboard": { - "description": "" - }, - "slicing_exprs": { - "description": "", - "items": { - "description": "" - } - }, - "snapshot": { - "description": "" - }, - "time_series": { - "description": "", - "properties": { - "granularities": { - "description": "", - "items": { - "description": "" - } - }, - "timestamp_col": { - "description": "" - } - } - }, - "warehouse_id": { - "description": "" - } - } - } - }, - "registered_models": { - "description": "List of Registered Models", - "additionalproperties": { - "description": "", - "properties": { - "catalog_name": { - "description": "The name of the catalog where the schema and the registered model reside" - }, - "comment": { - "description": "The comment attached to the registered model" - }, - "grants": { - "description": "", - "items": { - "description": "", - "properties": { - "principal": { - "description": "" - }, - "privileges": { - "description": "", - "items": { - "description": "" - } - } - } - } - }, - "name": { - "description": "The name of the registered model" - }, - "schema_name": { - "description": "The name of the schema where the registered model resides" - }, - "storage_location": { - "description": "The storage location on the cloud under which model version data files are stored" - } - } - } - }, - "schemas": { - "description": "", - "additionalproperties": { - "description": "", - "properties": { - "catalog_name": { - "description": "" - }, - "comment": { - "description": "" - }, - "grants": { - "description": "", - "items": { - "description": "", - "properties": { - "principal": { - "description": "" - }, - "privileges": { - "description": "", - "items": { - "description": "" - } - } - } - } - }, - "name": { - "description": "" - }, - "properties": { - "description": "", - "additionalproperties": { - "description": "" - } - }, - "storage_root": { - "description": "" - } - } - } - } - } - }, - "run_as": { - "description": "", - "properties": { - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - }, - "sync": { - "description": "", - "properties": { - "exclude": { - "description": "", - "items": { - "description": "" - } - }, - "include": { - "description": "", - "items": { - "description": "" - } - }, - "paths": { - "description": "", - "items": { - "description": "" - } - } - } - }, - "variables": { - "description": "", - "additionalproperties": { - "description": "", - "properties": { - "default": { - "description": "" - }, - "description": { - "description": "" - }, - "lookup": { - "description": "", - "properties": { - "alert": { - "description": "" - }, - "cluster": { - "description": "" - }, - "cluster_policy": { - "description": "" - }, - "dashboard": { - "description": "" - }, - "instance_pool": { - "description": "" - }, - "job": { - "description": "" - }, - "metastore": { - "description": "" - }, - "pipeline": { - "description": "" - }, - "query": { - "description": "" - }, - "service_principal": { - "description": "" - }, - "warehouse": { - "description": "" - } - } - }, - "type": { - "description": "" - } - } - } - }, - "workspace": { - "description": "", - "properties": { - "artifact_path": { - "description": "" - }, - "auth_type": { - "description": "" - }, - "azure_client_id": { - "description": "" - }, - "azure_environment": { - "description": "" - }, - "azure_login_app_id": { - "description": "" - }, - "azure_tenant_id": { - "description": "" - }, - "azure_use_msi": { - "description": "" - }, - "azure_workspace_resource_id": { - "description": "" - }, - "client_id": { - "description": "" - }, - "file_path": { - "description": "" - }, - "google_service_account": { - "description": "" - }, - "host": { - "description": "" - }, - "profile": { - "description": "" - }, - "root_path": { - "description": "" - }, - "state_path": { - "description": "" - } - } - } - } - } - }, - "variables": { - "description": "", - "additionalproperties": { - "description": "", - "properties": { - "default": { - "description": "" - }, - "description": { - "description": "" - }, - "lookup": { - "description": "", - "properties": { - "alert": { - "description": "" - }, - "cluster": { - "description": "" - }, - "cluster_policy": { - "description": "" - }, - "dashboard": { - "description": "" - }, - "instance_pool": { - "description": "" - }, - "job": { - "description": "" - }, - "metastore": { - "description": "" - }, - "pipeline": { - "description": "" - }, - "query": { - "description": "" - }, - "service_principal": { - "description": "" - }, - "warehouse": { - "description": "" - } - } - }, - "type": { - "description": "" - } - } - } - }, - "workspace": { - "description": "", - "properties": { - "artifact_path": { - "description": "" - }, - "auth_type": { - "description": "" - }, - "azure_client_id": { - "description": "" - }, - "azure_environment": { - "description": "" - }, - "azure_login_app_id": { - "description": "" - }, - "azure_tenant_id": { - "description": "" - }, - "azure_use_msi": { - "description": "" - }, - "azure_workspace_resource_id": { - "description": "" - }, - "client_id": { - "description": "" - }, - "file_path": { - "description": "" - }, - "google_service_account": { - "description": "" - }, - "host": { - "description": "" - }, - "profile": { - "description": "" - }, - "root_path": { - "description": "" - }, - "state_path": { - "description": "" - } - } - } - } -} \ No newline at end of file diff --git a/bundle/schema/docs_test.go b/bundle/schema/docs_test.go deleted file mode 100644 index 83ee681b..00000000 --- a/bundle/schema/docs_test.go +++ /dev/null @@ -1,62 +0,0 @@ -package schema - -import ( - "encoding/json" - "testing" - - "github.com/databricks/cli/libs/jsonschema" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestSchemaToDocs(t *testing.T) { - jsonSchema := &jsonschema.Schema{ - Type: "object", - Description: "root doc", - Properties: map[string]*jsonschema.Schema{ - "foo": {Type: "number", Description: "foo doc"}, - "bar": {Type: "string"}, - "octave": { - Type: "object", - AdditionalProperties: &jsonschema.Schema{Type: "number"}, - Description: "octave docs", - }, - "scales": { - Type: "object", - Description: "scale docs", - Items: &jsonschema.Schema{Type: "string"}, - }, - }, - } - docs := schemaToDocs(jsonSchema) - docsJson, err := json.MarshalIndent(docs, " ", " ") - require.NoError(t, err) - - expected := - `{ - "description": "root doc", - "properties": { - "bar": { - "description": "" - }, - "foo": { - "description": "foo doc" - }, - "octave": { - "description": "octave docs", - "additionalproperties": { - "description": "" - } - }, - "scales": { - "description": "scale docs", - "items": { - "description": "" - } - } - } - }` - t.Log("[DEBUG] actual: ", string(docsJson)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(docsJson)) -} diff --git a/bundle/schema/embed.go b/bundle/schema/embed.go new file mode 100644 index 00000000..68f42a8e --- /dev/null +++ b/bundle/schema/embed.go @@ -0,0 +1,6 @@ +package schema + +import _ "embed" + +//go:embed jsonschema.json +var Bytes []byte diff --git a/bundle/schema/embed_test.go b/bundle/schema/embed_test.go new file mode 100644 index 00000000..ee0b5a61 --- /dev/null +++ b/bundle/schema/embed_test.go @@ -0,0 +1,71 @@ +package schema_test + +import ( + "encoding/json" + "testing" + + "github.com/databricks/cli/bundle/schema" + "github.com/databricks/cli/libs/jsonschema" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func walk(defs map[string]any, p ...string) jsonschema.Schema { + v, ok := defs[p[0]] + if !ok { + panic("not found: " + p[0]) + } + + if len(p) == 1 { + b, err := json.Marshal(v) + if err != nil { + panic(err) + } + res := jsonschema.Schema{} + err = json.Unmarshal(b, &res) + if err != nil { + panic(err) + } + return res + } + + return walk(v.(map[string]any), p[1:]...) +} + +func TestJsonSchema(t *testing.T) { + s := jsonschema.Schema{} + err := json.Unmarshal(schema.Bytes, &s) + require.NoError(t, err) + + // Assert job fields have their descriptions loaded. + resourceJob := walk(s.Definitions, "github.com", "databricks", "cli", "bundle", "config", "resources.Job") + fields := []string{"name", "continuous", "deployment", "tasks", "trigger"} + for _, field := range fields { + assert.NotEmpty(t, resourceJob.AnyOf[0].Properties[field].Description) + } + + // Assert descriptions were also loaded for a job task definition. + jobTask := walk(s.Definitions, "github.com", "databricks", "databricks-sdk-go", "service", "jobs.Task") + fields = []string{"notebook_task", "spark_jar_task", "spark_python_task", "spark_submit_task", "description", "depends_on", "environment_key", "for_each_task", "existing_cluster_id"} + for _, field := range fields { + assert.NotEmpty(t, jobTask.AnyOf[0].Properties[field].Description) + } + + // Assert descriptions are loaded for pipelines + pipeline := walk(s.Definitions, "github.com", "databricks", "cli", "bundle", "config", "resources.Pipeline") + fields = []string{"name", "catalog", "clusters", "channel", "continuous", "deployment", "development"} + for _, field := range fields { + assert.NotEmpty(t, pipeline.AnyOf[0].Properties[field].Description) + } + + // Assert enum values are loaded + schedule := walk(s.Definitions, "github.com", "databricks", "databricks-sdk-go", "service", "catalog.MonitorCronSchedule") + assert.Contains(t, schedule.AnyOf[0].Properties["pause_status"].Enum, "PAUSED") + assert.Contains(t, schedule.AnyOf[0].Properties["pause_status"].Enum, "UNPAUSED") + + providers := walk(s.Definitions, "github.com", "databricks", "databricks-sdk-go", "service", "jobs.GitProvider") + assert.Contains(t, providers.Enum, "gitHub") + assert.Contains(t, providers.Enum, "bitbucketCloud") + assert.Contains(t, providers.Enum, "gitHubEnterprise") + assert.Contains(t, providers.Enum, "bitbucketServer") +} diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json new file mode 100644 index 00000000..4fe978b8 --- /dev/null +++ b/bundle/schema/jsonschema.json @@ -0,0 +1,5524 @@ +{ + "$defs": { + "bool": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string", + "pattern": "\\$\\{(resources(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(bundle(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(workspace(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(artifacts(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "float64": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "string", + "pattern": "\\$\\{(resources(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(bundle(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(workspace(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(artifacts(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "github.com": { + "databricks": { + "cli": { + "bundle": { + "config": { + "resources.Grant": { + "anyOf": [ + { + "type": "object", + "properties": { + "principal": { + "$ref": "#/$defs/string" + }, + "privileges": { + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false, + "required": [ + "privileges", + "principal" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.Job": { + "anyOf": [ + { + "type": "object", + "properties": { + "continuous": { + "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Continuous" + }, + "deployment": { + "description": "Deployment information for jobs managed by external sources.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobDeployment" + }, + "description": { + "description": "An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding.", + "$ref": "#/$defs/string" + }, + "edit_mode": { + "description": "Edit mode of the job.\n\n* `UI_LOCKED`: The job is in a locked UI state and cannot be modified.\n* `EDITABLE`: The job is in an editable state and can be modified.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobEditMode" + }, + "email_notifications": { + "description": "An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobEmailNotifications" + }, + "environments": { + "description": "A list of task execution environment specifications that can be referenced by tasks of this job.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.JobEnvironment" + }, + "format": { + "description": "Used to tell what is the format of the job. This field is ignored in Create/Update/Reset calls. When using the Jobs API 2.1 this value is always set to `\"MULTI_TASK\"`.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Format" + }, + "git_source": { + "description": "An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks.\n\nIf `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task.\n\nNote: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.GitSource" + }, + "health": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRules" + }, + "job_clusters": { + "description": "A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.JobCluster" + }, + "max_concurrent_runs": { + "description": "An optional maximum allowed number of concurrent runs of the job.\nSet this value if you want to be able to execute multiple runs of the same job concurrently.\nThis is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters.\nThis setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs.\nHowever, from then on, new runs are skipped unless there are fewer than 3 active runs.\nThis value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped.", + "$ref": "#/$defs/int" + }, + "name": { + "description": "An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding.", + "$ref": "#/$defs/string" + }, + "notification_settings": { + "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobNotificationSettings" + }, + "parameters": { + "description": "Job-level parameter definitions", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.JobParameterDefinition" + }, + "permissions": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission" + }, + "queue": { + "description": "The queue settings of the job.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.QueueSettings" + }, + "run_as": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs" + }, + "schedule": { + "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.CronSchedule" + }, + "tags": { + "description": "A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job.", + "$ref": "#/$defs/map/string" + }, + "tasks": { + "description": "A list of task specifications to be executed by this job.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.Task" + }, + "timeout_seconds": { + "description": "An optional timeout applied to each run of this job. A value of `0` means no timeout.", + "$ref": "#/$defs/int" + }, + "trigger": { + "description": "A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.TriggerSettings" + }, + "webhook_notifications": { + "description": "A collection of system notification IDs to notify when runs of this job begin or complete.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.WebhookNotifications" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.MlflowExperiment": { + "anyOf": [ + { + "type": "object", + "properties": { + "artifact_location": { + "description": "Location where artifacts for the experiment are stored.", + "$ref": "#/$defs/string" + }, + "creation_time": { + "description": "Creation time", + "$ref": "#/$defs/int64" + }, + "experiment_id": { + "description": "Unique identifier for the experiment.", + "$ref": "#/$defs/string" + }, + "last_update_time": { + "description": "Last update time", + "$ref": "#/$defs/int64" + }, + "lifecycle_stage": { + "description": "Current life cycle stage of the experiment: \"active\" or \"deleted\".\nDeleted experiments are not returned by APIs.", + "$ref": "#/$defs/string" + }, + "name": { + "description": "Human readable name that identifies the experiment.", + "$ref": "#/$defs/string" + }, + "permissions": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission" + }, + "tags": { + "description": "Tags: Additional metadata key-value pairs.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/ml.ExperimentTag" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.MlflowModel": { + "anyOf": [ + { + "type": "object", + "properties": { + "creation_timestamp": { + "description": "Timestamp recorded when this `registered_model` was created.", + "$ref": "#/$defs/int64" + }, + "description": { + "description": "Description of this `registered_model`.", + "$ref": "#/$defs/string" + }, + "last_updated_timestamp": { + "description": "Timestamp recorded when metadata for this `registered_model` was last updated.", + "$ref": "#/$defs/int64" + }, + "latest_versions": { + "description": "Collection of latest model versions for each stage.\nOnly contains models with current `READY` status.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/ml.ModelVersion" + }, + "name": { + "description": "Unique name for the model.", + "$ref": "#/$defs/string" + }, + "permissions": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission" + }, + "tags": { + "description": "Tags: Additional metadata key-value pairs for this `registered_model`.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/ml.ModelTag" + }, + "user_id": { + "description": "User that created this `registered_model`", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.ModelServingEndpoint": { + "anyOf": [ + { + "type": "object", + "properties": { + "config": { + "description": "The core config of the serving endpoint.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.EndpointCoreConfigInput" + }, + "name": { + "description": "The name of the serving endpoint. This field is required and must be unique across a Databricks workspace.\nAn endpoint name can consist of alphanumeric characters, dashes, and underscores.\n", + "$ref": "#/$defs/string" + }, + "permissions": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission" + }, + "rate_limits": { + "description": "Rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/serving.RateLimit" + }, + "route_optimized": { + "description": "Enable route optimization for the serving endpoint.", + "$ref": "#/$defs/bool" + }, + "tags": { + "description": "Tags to be attached to the serving endpoint and automatically propagated to billing logs.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/serving.EndpointTag" + } + }, + "additionalProperties": false, + "required": [ + "config", + "name" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.Permission": { + "anyOf": [ + { + "type": "object", + "properties": { + "group_name": { + "$ref": "#/$defs/string" + }, + "level": { + "$ref": "#/$defs/string" + }, + "service_principal_name": { + "$ref": "#/$defs/string" + }, + "user_name": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "level" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.Pipeline": { + "anyOf": [ + { + "type": "object", + "properties": { + "catalog": { + "description": "A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog.", + "$ref": "#/$defs/string" + }, + "channel": { + "description": "DLT Release Channel that specifies which version to use.", + "$ref": "#/$defs/string" + }, + "clusters": { + "description": "Cluster settings for this pipeline deployment.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineCluster" + }, + "configuration": { + "description": "String-String configuration for this pipeline execution.", + "$ref": "#/$defs/map/string" + }, + "continuous": { + "description": "Whether the pipeline is continuous or triggered. This replaces `trigger`.", + "$ref": "#/$defs/bool" + }, + "deployment": { + "description": "Deployment type of this pipeline.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineDeployment" + }, + "development": { + "description": "Whether the pipeline is in Development mode. Defaults to false.", + "$ref": "#/$defs/bool" + }, + "edition": { + "description": "Pipeline product edition.", + "$ref": "#/$defs/string" + }, + "filters": { + "description": "Filters on which Pipeline packages to include in the deployed graph.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.Filters" + }, + "gateway_definition": { + "description": "The definition of a gateway pipeline to support CDC.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.IngestionGatewayPipelineDefinition" + }, + "id": { + "description": "Unique identifier for this pipeline.", + "$ref": "#/$defs/string" + }, + "ingestion_definition": { + "description": "The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.IngestionPipelineDefinition" + }, + "libraries": { + "description": "Libraries or code needed by this deployment.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineLibrary" + }, + "name": { + "description": "Friendly identifier for this pipeline.", + "$ref": "#/$defs/string" + }, + "notifications": { + "description": "List of notification settings for this pipeline.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/pipelines.Notifications" + }, + "permissions": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission" + }, + "photon": { + "description": "Whether Photon is enabled for this pipeline.", + "$ref": "#/$defs/bool" + }, + "serverless": { + "description": "Whether serverless compute is enabled for this pipeline.", + "$ref": "#/$defs/bool" + }, + "storage": { + "description": "DBFS root directory for storing checkpoints and tables.", + "$ref": "#/$defs/string" + }, + "target": { + "description": "Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`.", + "$ref": "#/$defs/string" + }, + "trigger": { + "description": "Which pipeline trigger to use. Deprecated: Use `continuous` instead.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineTrigger" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.QualityMonitor": { + "anyOf": [ + { + "type": "object", + "properties": { + "assets_dir": { + "description": "The directory to store monitoring assets (e.g. dashboard, metric tables).", + "$ref": "#/$defs/string" + }, + "baseline_table_name": { + "description": "Name of the baseline table from which drift metrics are computed from.\nColumns in the monitored table should also be present in the baseline table.\n", + "$ref": "#/$defs/string" + }, + "custom_metrics": { + "description": "Custom metrics to compute on the monitored table. These can be aggregate metrics, derived\nmetrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time\nwindows).\n", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/catalog.MonitorMetric" + }, + "data_classification_config": { + "description": "The data classification config for the monitor.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorDataClassificationConfig" + }, + "inference_log": { + "description": "Configuration for monitoring inference logs.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorInferenceLog" + }, + "notifications": { + "description": "The notification settings for the monitor.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorNotifications" + }, + "output_schema_name": { + "description": "Schema where output metric tables are created.", + "$ref": "#/$defs/string" + }, + "schedule": { + "description": "The schedule for automatically updating and refreshing metric tables.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorCronSchedule" + }, + "skip_builtin_dashboard": { + "description": "Whether to skip creating a default dashboard summarizing data quality metrics.", + "$ref": "#/$defs/bool" + }, + "slicing_exprs": { + "description": "List of column expressions to slice data with for targeted analysis. The data is grouped by\neach expression independently, resulting in a separate slice for each predicate and its\ncomplements. For high-cardinality columns, only the top 100 unique values by frequency will\ngenerate slices.\n", + "$ref": "#/$defs/slice/string" + }, + "snapshot": { + "description": "Configuration for monitoring snapshot tables.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorSnapshot" + }, + "time_series": { + "description": "Configuration for monitoring time series tables.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorTimeSeries" + }, + "warehouse_id": { + "description": "Optional argument to specify the warehouse for dashboard creation. If not specified, the first running\nwarehouse will be used.\n", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "assets_dir", + "output_schema_name" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.RegisteredModel": { + "anyOf": [ + { + "type": "object", + "properties": { + "catalog_name": { + "description": "The name of the catalog where the schema and the registered model reside", + "$ref": "#/$defs/string" + }, + "comment": { + "description": "The comment attached to the registered model", + "$ref": "#/$defs/string" + }, + "grants": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Grant" + }, + "name": { + "description": "The name of the registered model", + "$ref": "#/$defs/string" + }, + "schema_name": { + "description": "The name of the schema where the registered model resides", + "$ref": "#/$defs/string" + }, + "storage_location": { + "description": "The storage location on the cloud under which model version data files are stored", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "catalog_name", + "name", + "schema_name" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.Schema": { + "anyOf": [ + { + "type": "object", + "properties": { + "catalog_name": { + "description": "Name of parent catalog.", + "$ref": "#/$defs/string" + }, + "comment": { + "description": "User-provided free-form text description.", + "$ref": "#/$defs/string" + }, + "grants": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Grant" + }, + "name": { + "description": "Name of schema, relative to parent catalog.", + "$ref": "#/$defs/string" + }, + "properties": { + "$ref": "#/$defs/map/string" + }, + "storage_root": { + "description": "Storage root URL for managed tables within schema.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "catalog_name", + "name" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "variable.Lookup": { + "anyOf": [ + { + "type": "object", + "properties": { + "alert": { + "$ref": "#/$defs/string" + }, + "cluster": { + "$ref": "#/$defs/string" + }, + "cluster_policy": { + "$ref": "#/$defs/string" + }, + "dashboard": { + "$ref": "#/$defs/string" + }, + "instance_pool": { + "$ref": "#/$defs/string" + }, + "job": { + "$ref": "#/$defs/string" + }, + "metastore": { + "$ref": "#/$defs/string" + }, + "pipeline": { + "$ref": "#/$defs/string" + }, + "query": { + "$ref": "#/$defs/string" + }, + "service_principal": { + "$ref": "#/$defs/string" + }, + "warehouse": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "variable.Variable": { + "type": "object", + "properties": { + "default": { + "$ref": "#/$defs/interface" + }, + "description": { + "$ref": "#/$defs/string" + }, + "lookup": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/variable.Lookup" + }, + "type": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/variable.VariableType" + } + }, + "additionalProperties": false + }, + "variable.VariableType": { + "type": "string" + } + }, + "config.Artifact": { + "anyOf": [ + { + "type": "object", + "properties": { + "build": { + "$ref": "#/$defs/string" + }, + "executable": { + "$ref": "#/$defs/github.com/databricks/cli/libs/exec.ExecutableType" + }, + "files": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config.ArtifactFile" + }, + "path": { + "$ref": "#/$defs/string" + }, + "type": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.ArtifactType" + } + }, + "additionalProperties": false, + "required": [ + "type" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.ArtifactFile": { + "anyOf": [ + { + "type": "object", + "properties": { + "source": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "source" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.ArtifactType": { + "type": "string" + }, + "config.Bundle": { + "anyOf": [ + { + "type": "object", + "properties": { + "compute_id": { + "$ref": "#/$defs/string" + }, + "databricks_cli_version": { + "$ref": "#/$defs/string" + }, + "deployment": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Deployment" + }, + "git": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Git" + }, + "name": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "name" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Command": { + "type": "string" + }, + "config.Deployment": { + "anyOf": [ + { + "type": "object", + "properties": { + "fail_on_active_runs": { + "$ref": "#/$defs/bool" + }, + "lock": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Lock" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Experimental": { + "anyOf": [ + { + "type": "object", + "properties": { + "pydabs": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.PyDABs" + }, + "python_wheel_wrapper": { + "$ref": "#/$defs/bool" + }, + "scripts": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Command" + }, + "use_legacy_run_as": { + "$ref": "#/$defs/bool" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Git": { + "anyOf": [ + { + "type": "object", + "properties": { + "branch": { + "$ref": "#/$defs/string" + }, + "origin_url": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Lock": { + "anyOf": [ + { + "type": "object", + "properties": { + "enabled": { + "$ref": "#/$defs/bool" + }, + "force": { + "$ref": "#/$defs/bool" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Mode": { + "type": "string" + }, + "config.Presets": { + "anyOf": [ + { + "type": "object", + "properties": { + "jobs_max_concurrent_runs": { + "$ref": "#/$defs/int" + }, + "name_prefix": { + "$ref": "#/$defs/string" + }, + "pipelines_development": { + "$ref": "#/$defs/bool" + }, + "tags": { + "$ref": "#/$defs/map/string" + }, + "trigger_pause_status": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.PyDABs": { + "anyOf": [ + { + "type": "object", + "properties": { + "enabled": { + "$ref": "#/$defs/bool" + }, + "import": { + "$ref": "#/$defs/slice/string" + }, + "venv_path": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Resources": { + "anyOf": [ + { + "type": "object", + "properties": { + "experiments": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.MlflowExperiment" + }, + "jobs": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Job" + }, + "model_serving_endpoints": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint" + }, + "models": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.MlflowModel" + }, + "pipelines": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Pipeline" + }, + "quality_monitors": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.QualityMonitor" + }, + "registered_models": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.RegisteredModel" + }, + "schemas": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Schema" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Sync": { + "anyOf": [ + { + "type": "object", + "properties": { + "exclude": { + "$ref": "#/$defs/slice/string" + }, + "include": { + "$ref": "#/$defs/slice/string" + }, + "paths": { + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Target": { + "anyOf": [ + { + "type": "object", + "properties": { + "artifacts": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Artifact" + }, + "bundle": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Bundle" + }, + "compute_id": { + "$ref": "#/$defs/string" + }, + "default": { + "$ref": "#/$defs/bool" + }, + "git": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Git" + }, + "mode": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Mode" + }, + "permissions": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission" + }, + "presets": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Presets" + }, + "resources": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Resources" + }, + "run_as": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs" + }, + "sync": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Sync" + }, + "variables": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/variable.Variable" + }, + "workspace": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Workspace" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Workspace": { + "anyOf": [ + { + "type": "object", + "properties": { + "artifact_path": { + "$ref": "#/$defs/string" + }, + "auth_type": { + "$ref": "#/$defs/string" + }, + "azure_client_id": { + "$ref": "#/$defs/string" + }, + "azure_environment": { + "$ref": "#/$defs/string" + }, + "azure_login_app_id": { + "$ref": "#/$defs/string" + }, + "azure_tenant_id": { + "$ref": "#/$defs/string" + }, + "azure_use_msi": { + "$ref": "#/$defs/bool" + }, + "azure_workspace_resource_id": { + "$ref": "#/$defs/string" + }, + "client_id": { + "$ref": "#/$defs/string" + }, + "file_path": { + "$ref": "#/$defs/string" + }, + "google_service_account": { + "$ref": "#/$defs/string" + }, + "host": { + "$ref": "#/$defs/string" + }, + "profile": { + "$ref": "#/$defs/string" + }, + "root_path": { + "$ref": "#/$defs/string" + }, + "state_path": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + } + }, + "libs": { + "exec.ExecutableType": { + "type": "string" + } + } + }, + "databricks-sdk-go": { + "service": { + "catalog.MonitorCronSchedule": { + "anyOf": [ + { + "type": "object", + "properties": { + "pause_status": { + "description": "Read only field that indicates whether a schedule is paused or not.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorCronSchedulePauseStatus", + "enum": [ + "UNPAUSED", + "PAUSED" + ] + }, + "quartz_cron_expression": { + "description": "The expression that determines when to run the monitor. See [examples](https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html).\n", + "$ref": "#/$defs/string" + }, + "timezone_id": { + "description": "The timezone id (e.g., ``\"PST\"``) in which to evaluate the quartz expression.\n", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "quartz_cron_expression", + "timezone_id" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "catalog.MonitorCronSchedulePauseStatus": { + "type": "string" + }, + "catalog.MonitorDataClassificationConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "enabled": { + "description": "Whether data classification is enabled.", + "$ref": "#/$defs/bool" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "catalog.MonitorDestination": { + "anyOf": [ + { + "type": "object", + "properties": { + "email_addresses": { + "description": "The list of email addresses to send the notification to. A maximum of 5 email addresses is supported.", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "catalog.MonitorInferenceLog": { + "anyOf": [ + { + "type": "object", + "properties": { + "granularities": { + "description": "Granularities for aggregating data into time windows based on their timestamp. Currently the following static\ngranularities are supported:\n{``\"5 minutes\"``, ``\"30 minutes\"``, ``\"1 hour\"``, ``\"1 day\"``, ``\"\u003cn\u003e week(s)\"``, ``\"1 month\"``, ``\"1 year\"``}.\n", + "$ref": "#/$defs/slice/string" + }, + "label_col": { + "description": "Optional column that contains the ground truth for the prediction.", + "$ref": "#/$defs/string" + }, + "model_id_col": { + "description": "Column that contains the id of the model generating the predictions. Metrics will be computed per model id by\ndefault, and also across all model ids.\n", + "$ref": "#/$defs/string" + }, + "prediction_col": { + "description": "Column that contains the output/prediction from the model.", + "$ref": "#/$defs/string" + }, + "prediction_proba_col": { + "description": "Optional column that contains the prediction probabilities for each class in a classification problem type.\nThe values in this column should be a map, mapping each class label to the prediction probability for a given\nsample. The map should be of PySpark MapType().\n", + "$ref": "#/$defs/string" + }, + "problem_type": { + "description": "Problem type the model aims to solve. Determines the type of model-quality metrics that will be computed.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorInferenceLogProblemType", + "enum": [ + "PROBLEM_TYPE_CLASSIFICATION", + "PROBLEM_TYPE_REGRESSION" + ] + }, + "timestamp_col": { + "description": "Column that contains the timestamps of requests. The column must be one of the following:\n- A ``TimestampType`` column\n- A column whose values can be converted to timestamps through the pyspark\n ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html).\n", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "granularities", + "model_id_col", + "prediction_col", + "problem_type", + "timestamp_col" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "catalog.MonitorInferenceLogProblemType": { + "type": "string" + }, + "catalog.MonitorMetric": { + "anyOf": [ + { + "type": "object", + "properties": { + "definition": { + "description": "Jinja template for a SQL expression that specifies how to compute the metric. See [create metric definition](https://docs.databricks.com/en/lakehouse-monitoring/custom-metrics.html#create-definition).", + "$ref": "#/$defs/string" + }, + "input_columns": { + "description": "A list of column names in the input table the metric should be computed for.\nCan use ``\":table\"`` to indicate that the metric needs information from multiple columns.\n", + "$ref": "#/$defs/slice/string" + }, + "name": { + "description": "Name of the metric in the output tables.", + "$ref": "#/$defs/string" + }, + "output_data_type": { + "description": "The output type of the custom metric.", + "$ref": "#/$defs/string" + }, + "type": { + "description": "Can only be one of ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"``, ``\"CUSTOM_METRIC_TYPE_DERIVED\"``, or ``\"CUSTOM_METRIC_TYPE_DRIFT\"``.\nThe ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"`` and ``\"CUSTOM_METRIC_TYPE_DERIVED\"`` metrics\nare computed on a single table, whereas the ``\"CUSTOM_METRIC_TYPE_DRIFT\"`` compare metrics across\nbaseline and input table, or across the two consecutive time windows.\n- CUSTOM_METRIC_TYPE_AGGREGATE: only depend on the existing columns in your table\n- CUSTOM_METRIC_TYPE_DERIVED: depend on previously computed aggregate metrics\n- CUSTOM_METRIC_TYPE_DRIFT: depend on previously computed aggregate or derived metrics\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorMetricType", + "enum": [ + "CUSTOM_METRIC_TYPE_AGGREGATE", + "CUSTOM_METRIC_TYPE_DERIVED", + "CUSTOM_METRIC_TYPE_DRIFT" + ] + } + }, + "additionalProperties": false, + "required": [ + "definition", + "input_columns", + "name", + "output_data_type", + "type" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "catalog.MonitorMetricType": { + "type": "string" + }, + "catalog.MonitorNotifications": { + "anyOf": [ + { + "type": "object", + "properties": { + "on_failure": { + "description": "Who to send notifications to on monitor failure.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorDestination" + }, + "on_new_classification_tag_detected": { + "description": "Who to send notifications to when new data classification tags are detected.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorDestination" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "catalog.MonitorSnapshot": { + "anyOf": [ + { + "type": "object", + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "catalog.MonitorTimeSeries": { + "anyOf": [ + { + "type": "object", + "properties": { + "granularities": { + "description": "Granularities for aggregating data into time windows based on their timestamp. Currently the following static\ngranularities are supported:\n{``\"5 minutes\"``, ``\"30 minutes\"``, ``\"1 hour\"``, ``\"1 day\"``, ``\"\u003cn\u003e week(s)\"``, ``\"1 month\"``, ``\"1 year\"``}.\n", + "$ref": "#/$defs/slice/string" + }, + "timestamp_col": { + "description": "Column that contains the timestamps of requests. The column must be one of the following:\n- A ``TimestampType`` column\n- A column whose values can be converted to timestamps through the pyspark\n ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html).\n", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "granularities", + "timestamp_col" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.Adlsgen2Info": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination": { + "description": "abfss destination, e.g. `abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e`.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "destination" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.AutoScale": { + "anyOf": [ + { + "type": "object", + "properties": { + "max_workers": { + "description": "The maximum number of workers to which the cluster can scale up when overloaded.\nNote that `max_workers` must be strictly greater than `min_workers`.", + "$ref": "#/$defs/int" + }, + "min_workers": { + "description": "The minimum number of workers to which the cluster can scale down when underutilized.\nIt is also the initial number of workers the cluster will have after creation.", + "$ref": "#/$defs/int" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.AwsAttributes": { + "anyOf": [ + { + "type": "object", + "properties": { + "availability": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.AwsAvailability" + }, + "ebs_volume_count": { + "description": "The number of volumes launched for each instance. Users can choose up to 10 volumes.\nThis feature is only enabled for supported node types. Legacy node types cannot specify\ncustom EBS volumes.\nFor node types with no instance store, at least one EBS volume needs to be specified;\notherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.\nInstance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for\nscratch storage because heterogenously sized scratch devices can lead to inefficient disk\nutilization. If no EBS volumes are attached, Databricks will configure Spark to use instance\nstore volumes.\n\nPlease note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`\nwill be overridden.", + "$ref": "#/$defs/int" + }, + "ebs_volume_iops": { + "description": "If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.", + "$ref": "#/$defs/int" + }, + "ebs_volume_size": { + "description": "The size of each EBS volume (in GiB) launched for each instance. For general purpose\nSSD, this value must be within the range 100 - 4096. For throughput optimized HDD,\nthis value must be within the range 500 - 4096.", + "$ref": "#/$defs/int" + }, + "ebs_volume_throughput": { + "description": "If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.", + "$ref": "#/$defs/int" + }, + "ebs_volume_type": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.EbsVolumeType" + }, + "first_on_demand": { + "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nIf this value is greater than 0, the cluster driver node in particular will be placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster.", + "$ref": "#/$defs/int" + }, + "instance_profile_arn": { + "description": "Nodes for this cluster will only be placed on AWS instances with this instance profile. If\nommitted, nodes will be placed on instances without an IAM instance profile. The instance\nprofile must have previously been added to the Databricks environment by an account\nadministrator.\n\nThis feature may only be available to certain customer plans.\n\nIf this field is ommitted, we will pull in the default from the conf if it exists.", + "$ref": "#/$defs/string" + }, + "spot_bid_price_percent": { + "description": "The bid price for AWS spot instances, as a percentage of the corresponding instance type's\non-demand price.\nFor example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot\ninstance, then the bid price is half of the price of\non-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice\nthe price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.\nWhen spot instances are requested for this cluster, only spot instances whose bid price\npercentage matches this field will be considered.\nNote that, for safety, we enforce this field to be no more than 10000.\n\nThe default value and documentation here should be kept consistent with\nCommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent.", + "$ref": "#/$defs/int" + }, + "zone_id": { + "description": "Identifier for the availability zone/datacenter in which the cluster resides.\nThis string will be of a form like \"us-west-2a\". The provided availability\nzone must be in the same region as the Databricks deployment. For example, \"us-west-2a\"\nis not a valid zone id if the Databricks deployment resides in the \"us-east-1\" region.\nThis is an optional field at cluster creation, and if not specified, a default zone will be used.\nIf the zone specified is \"auto\", will try to place cluster in a zone with high availability,\nand will retry placement in a different AZ if there is not enough capacity.\nThe list of available zones as well as the default value can be found by using the\n`List Zones` method.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.AwsAvailability": { + "type": "string", + "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\n\nNote: If `first_on_demand` is zero, this availability type will be used for the entire cluster.\n", + "enum": [ + "SPOT", + "ON_DEMAND", + "SPOT_WITH_FALLBACK" + ] + }, + "compute.AzureAttributes": { + "anyOf": [ + { + "type": "object", + "properties": { + "availability": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.AzureAvailability" + }, + "first_on_demand": { + "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nThis value should be greater than 0, to make sure the cluster driver node is placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster.", + "$ref": "#/$defs/int" + }, + "log_analytics_info": { + "description": "Defines values necessary to configure and run Azure Log Analytics agent", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.LogAnalyticsInfo" + }, + "spot_bid_max_price": { + "description": "The max bid price to be used for Azure spot instances.\nThe Max price for the bid cannot be higher than the on-demand price of the instance.\nIf not specified, the default value is -1, which specifies that the instance cannot be evicted\non the basis of price, and only on the basis of availability. Further, the value should \u003e 0 or -1.", + "$ref": "#/$defs/float64" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.AzureAvailability": { + "type": "string", + "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\nNote: If `first_on_demand` is zero (which only happens on pool clusters), this availability\ntype will be used for the entire cluster.", + "enum": [ + "SPOT_AZURE", + "ON_DEMAND_AZURE", + "SPOT_WITH_FALLBACK_AZURE" + ] + }, + "compute.ClientsTypes": { + "anyOf": [ + { + "type": "object", + "properties": { + "jobs": { + "description": "With jobs set, the cluster can be used for jobs", + "$ref": "#/$defs/bool" + }, + "notebooks": { + "description": "With notebooks set, this cluster can be used for notebooks", + "$ref": "#/$defs/bool" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.ClusterLogConf": { + "anyOf": [ + { + "type": "object", + "properties": { + "dbfs": { + "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.DbfsStorageInfo" + }, + "s3": { + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.S3StorageInfo" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.ClusterSpec": { + "anyOf": [ + { + "type": "object", + "properties": { + "apply_policy_default_values": { + "description": "When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied.", + "$ref": "#/$defs/bool" + }, + "autoscale": { + "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.AutoScale" + }, + "autotermination_minutes": { + "description": "Automatically terminates the cluster after it is inactive for this time in minutes. If not set,\nthis cluster will not be automatically terminated. If specified, the threshold must be between\n10 and 10000 minutes.\nUsers can also set this value to 0 to explicitly disable automatic termination.", + "$ref": "#/$defs/int" + }, + "aws_attributes": { + "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.AwsAttributes" + }, + "azure_attributes": { + "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.AzureAttributes" + }, + "cluster_log_conf": { + "description": "The configuration for delivering spark logs to a long-term storage destination.\nTwo kinds of destinations (dbfs and s3) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.ClusterLogConf" + }, + "cluster_name": { + "description": "Cluster name requested by the user. This doesn't have to be unique.\nIf not specified at creation, the cluster name will be an empty string.\n", + "$ref": "#/$defs/string" + }, + "custom_tags": { + "description": "Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS\ninstances and EBS volumes) with these tags in addition to `default_tags`. Notes:\n\n- Currently, Databricks allows at most 45 custom tags\n\n- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags", + "$ref": "#/$defs/map/string" + }, + "data_security_mode": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.DataSecurityMode" + }, + "docker_image": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.DockerImage" + }, + "driver_instance_pool_id": { + "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned.", + "$ref": "#/$defs/string" + }, + "driver_node_type_id": { + "description": "The node type of the Spark driver. Note that this field is optional;\nif unset, the driver node type will be set as the same value\nas `node_type_id` defined above.\n", + "$ref": "#/$defs/string" + }, + "enable_elastic_disk": { + "description": "Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk\nspace when its Spark workers are running low on disk space. This feature requires specific AWS\npermissions to function correctly - refer to the User Guide for more details.", + "$ref": "#/$defs/bool" + }, + "enable_local_disk_encryption": { + "description": "Whether to enable LUKS on cluster VMs' local disks", + "$ref": "#/$defs/bool" + }, + "gcp_attributes": { + "description": "Attributes related to clusters running on Google Cloud Platform.\nIf not specified at cluster creation, a set of default values will be used.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.GcpAttributes" + }, + "init_scripts": { + "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/compute.InitScriptInfo" + }, + "instance_pool_id": { + "description": "The optional ID of the instance pool to which the cluster belongs.", + "$ref": "#/$defs/string" + }, + "node_type_id": { + "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n", + "$ref": "#/$defs/string" + }, + "num_workers": { + "description": "Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned.", + "$ref": "#/$defs/int" + }, + "policy_id": { + "description": "The ID of the cluster policy used to create the cluster if applicable.", + "$ref": "#/$defs/string" + }, + "runtime_engine": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.RuntimeEngine" + }, + "single_user_name": { + "description": "Single user name if data_security_mode is `SINGLE_USER`", + "$ref": "#/$defs/string" + }, + "spark_conf": { + "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", + "$ref": "#/$defs/map/string" + }, + "spark_env_vars": { + "description": "An object containing a set of optional, user-specified environment variable key-value pairs.\nPlease note that key-value pair of the form (X,Y) will be exported as is (i.e.,\n`export X='Y'`) while launching the driver and workers.\n\nIn order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending\nthem to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all\ndefault databricks managed environmental variables are included as well.\n\nExample Spark environment variables:\n`{\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"}` or\n`{\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true\"}`", + "$ref": "#/$defs/map/string" + }, + "spark_version": { + "description": "The Spark version of the cluster, e.g. `3.3.x-scala2.11`.\nA list of available Spark versions can be retrieved by using\nthe :method:clusters/sparkVersions API call.\n", + "$ref": "#/$defs/string" + }, + "ssh_public_keys": { + "description": "SSH public key contents that will be added to each Spark node in this cluster. The\ncorresponding private keys can be used to login with the user name `ubuntu` on port `2200`.\nUp to 10 keys can be specified.", + "$ref": "#/$defs/slice/string" + }, + "workload_type": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.WorkloadType" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.DataSecurityMode": { + "type": "string", + "description": "Data security mode decides what data governance model to use when accessing data\nfrom a cluster.\n\n* `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode.\n* `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode.\n* `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited.\n\nThe following modes are deprecated starting with Databricks Runtime 15.0 and\nwill be removed for future Databricks Runtime versions:\n\n* `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters.\n* `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters.\n* `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.\n* `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled.\n", + "enum": [ + "NONE", + "SINGLE_USER", + "USER_ISOLATION", + "LEGACY_TABLE_ACL", + "LEGACY_PASSTHROUGH", + "LEGACY_SINGLE_USER", + "LEGACY_SINGLE_USER_STANDARD" + ] + }, + "compute.DbfsStorageInfo": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination": { + "description": "dbfs destination, e.g. `dbfs:/my/path`", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "destination" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.DockerBasicAuth": { + "anyOf": [ + { + "type": "object", + "properties": { + "password": { + "description": "Password of the user", + "$ref": "#/$defs/string" + }, + "username": { + "description": "Name of the user", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.DockerImage": { + "anyOf": [ + { + "type": "object", + "properties": { + "basic_auth": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.DockerBasicAuth" + }, + "url": { + "description": "URL of the docker image.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.EbsVolumeType": { + "type": "string", + "description": "The type of EBS volumes that will be launched with this cluster.", + "enum": [ + "GENERAL_PURPOSE_SSD", + "THROUGHPUT_OPTIMIZED_HDD" + ] + }, + "compute.Environment": { + "anyOf": [ + { + "type": "object", + "description": "The environment entity used to preserve serverless environment side panel and jobs' environment for non-notebook task.\nIn this minimal environment spec, only pip dependencies are supported.", + "properties": { + "client": { + "description": "Client version used by the environment\nThe client is the user-facing environment of the runtime.\nEach client comes with a specific set of pre-installed libraries.\nThe version is a string, consisting of the major client version.", + "$ref": "#/$defs/string" + }, + "dependencies": { + "description": "List of pip dependencies, as supported by the version of pip in this environment.\nEach dependency is a pip requirement file line https://pip.pypa.io/en/stable/reference/requirements-file-format/\nAllowed dependency could be \u003crequirement specifier\u003e, \u003carchive url/path\u003e, \u003clocal project path\u003e(WSFS or Volumes in Databricks), \u003cvcs project url\u003e\nE.g. dependencies: [\"foo==0.0.1\", \"-r /Workspace/test/requirements.txt\"]", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false, + "required": [ + "client" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.GcpAttributes": { + "anyOf": [ + { + "type": "object", + "properties": { + "availability": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.GcpAvailability" + }, + "boot_disk_size": { + "description": "boot disk size in GB", + "$ref": "#/$defs/int" + }, + "google_service_account": { + "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator.", + "$ref": "#/$defs/string" + }, + "local_ssd_count": { + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type.", + "$ref": "#/$defs/int" + }, + "use_preemptible_executors": { + "description": "This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default).\nNote: Soon to be deprecated, use the availability field instead.", + "$ref": "#/$defs/bool" + }, + "zone_id": { + "description": "Identifier for the availability zone in which the cluster resides.\nThis can be one of the following:\n- \"HA\" =\u003e High availability, spread nodes across availability zones for a Databricks deployment region [default]\n- \"AUTO\" =\u003e Databricks picks an availability zone to schedule the cluster on.\n- A GCP availability zone =\u003e Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.GcpAvailability": { + "type": "string", + "description": "This field determines whether the instance pool will contain preemptible\nVMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable.", + "enum": [ + "PREEMPTIBLE_GCP", + "ON_DEMAND_GCP", + "PREEMPTIBLE_WITH_FALLBACK_GCP" + ] + }, + "compute.GcsStorageInfo": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination": { + "description": "GCS destination/URI, e.g. `gs://my-bucket/some-prefix`", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "destination" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.InitScriptInfo": { + "anyOf": [ + { + "type": "object", + "properties": { + "abfss": { + "description": "destination needs to be provided. e.g.\n`{ \"abfss\" : { \"destination\" : \"abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e\" } }", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.Adlsgen2Info" + }, + "dbfs": { + "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.DbfsStorageInfo" + }, + "file": { + "description": "destination needs to be provided. e.g.\n`{ \"file\" : { \"destination\" : \"file:/my/local/file.sh\" } }`", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.LocalFileInfo" + }, + "gcs": { + "description": "destination needs to be provided. e.g.\n`{ \"gcs\": { \"destination\": \"gs://my-bucket/file.sh\" } }`", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.GcsStorageInfo" + }, + "s3": { + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.S3StorageInfo" + }, + "volumes": { + "description": "destination needs to be provided. e.g.\n`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.VolumesStorageInfo" + }, + "workspace": { + "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.WorkspaceStorageInfo" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.Library": { + "anyOf": [ + { + "type": "object", + "properties": { + "cran": { + "description": "Specification of a CRAN library to be installed as part of the library", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.RCranLibrary" + }, + "egg": { + "description": "Deprecated. URI of the egg library to install. Installing Python egg files is deprecated and is not supported in Databricks Runtime 14.0 and above.", + "$ref": "#/$defs/string" + }, + "jar": { + "description": "URI of the JAR library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.\nFor example: `{ \"jar\": \"/Workspace/path/to/library.jar\" }`, `{ \"jar\" : \"/Volumes/path/to/library.jar\" }` or\n`{ \"jar\": \"s3://my-bucket/library.jar\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI.", + "$ref": "#/$defs/string" + }, + "maven": { + "description": "Specification of a maven library to be installed. For example:\n`{ \"coordinates\": \"org.jsoup:jsoup:1.7.2\" }`", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.MavenLibrary" + }, + "pypi": { + "description": "Specification of a PyPi library to be installed. For example:\n`{ \"package\": \"simplejson\" }`", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.PythonPyPiLibrary" + }, + "requirements": { + "description": "URI of the requirements.txt file to install. Only Workspace paths and Unity Catalog Volumes paths are supported.\nFor example: `{ \"requirements\": \"/Workspace/path/to/requirements.txt\" }` or `{ \"requirements\" : \"/Volumes/path/to/requirements.txt\" }`", + "$ref": "#/$defs/string" + }, + "whl": { + "description": "URI of the wheel library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.\nFor example: `{ \"whl\": \"/Workspace/path/to/library.whl\" }`, `{ \"whl\" : \"/Volumes/path/to/library.whl\" }` or\n`{ \"whl\": \"s3://my-bucket/library.whl\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.LocalFileInfo": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination": { + "description": "local file destination, e.g. `file:/my/local/file.sh`", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "destination" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.LogAnalyticsInfo": { + "anyOf": [ + { + "type": "object", + "properties": { + "log_analytics_primary_key": { + "description": "\u003cneeds content added\u003e", + "$ref": "#/$defs/string" + }, + "log_analytics_workspace_id": { + "description": "\u003cneeds content added\u003e", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.MavenLibrary": { + "anyOf": [ + { + "type": "object", + "properties": { + "coordinates": { + "description": "Gradle-style maven coordinates. For example: \"org.jsoup:jsoup:1.7.2\".", + "$ref": "#/$defs/string" + }, + "exclusions": { + "description": "List of dependences to exclude. For example: `[\"slf4j:slf4j\", \"*:hadoop-client\"]`.\n\nMaven dependency exclusions:\nhttps://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html.", + "$ref": "#/$defs/slice/string" + }, + "repo": { + "description": "Maven repo to install the Maven package from. If omitted, both Maven Central Repository\nand Spark Packages are searched.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "coordinates" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.PythonPyPiLibrary": { + "anyOf": [ + { + "type": "object", + "properties": { + "package": { + "description": "The name of the pypi package to install. An optional exact version specification is also\nsupported. Examples: \"simplejson\" and \"simplejson==3.8.0\".", + "$ref": "#/$defs/string" + }, + "repo": { + "description": "The repository where the package can be found. If not specified, the default pip index is\nused.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "package" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.RCranLibrary": { + "anyOf": [ + { + "type": "object", + "properties": { + "package": { + "description": "The name of the CRAN package to install.", + "$ref": "#/$defs/string" + }, + "repo": { + "description": "The repository where the package can be found. If not specified, the default CRAN repo is used.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "package" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.RuntimeEngine": { + "type": "string", + "description": "Decides which runtime engine to be use, e.g. Standard vs. Photon. If unspecified, the runtime\nengine is inferred from spark_version.", + "enum": [ + "NULL", + "STANDARD", + "PHOTON" + ] + }, + "compute.S3StorageInfo": { + "anyOf": [ + { + "type": "object", + "properties": { + "canned_acl": { + "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs.", + "$ref": "#/$defs/string" + }, + "destination": { + "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs.", + "$ref": "#/$defs/string" + }, + "enable_encryption": { + "description": "(Optional) Flag to enable server side encryption, `false` by default.", + "$ref": "#/$defs/bool" + }, + "encryption_type": { + "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`.", + "$ref": "#/$defs/string" + }, + "endpoint": { + "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used.", + "$ref": "#/$defs/string" + }, + "kms_key": { + "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`.", + "$ref": "#/$defs/string" + }, + "region": { + "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "destination" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.VolumesStorageInfo": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination": { + "description": "Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "destination" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.WorkloadType": { + "anyOf": [ + { + "type": "object", + "properties": { + "clients": { + "description": " defined what type of clients can use the cluster. E.g. Notebooks, Jobs", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.ClientsTypes" + } + }, + "additionalProperties": false, + "required": [ + "clients" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.WorkspaceStorageInfo": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination": { + "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "destination" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.Condition": { + "type": "string", + "enum": [ + "ANY_UPDATED", + "ALL_UPDATED" + ] + }, + "jobs.ConditionTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "left": { + "description": "The left operand of the condition task. Can be either a string value or a job state or parameter reference.", + "$ref": "#/$defs/string" + }, + "op": { + "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.ConditionTaskOp" + }, + "right": { + "description": "The right operand of the condition task. Can be either a string value or a job state or parameter reference.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "left", + "op", + "right" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.ConditionTaskOp": { + "type": "string", + "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.", + "enum": [ + "EQUAL_TO", + "GREATER_THAN", + "GREATER_THAN_OR_EQUAL", + "LESS_THAN", + "LESS_THAN_OR_EQUAL", + "NOT_EQUAL" + ] + }, + "jobs.Continuous": { + "anyOf": [ + { + "type": "object", + "properties": { + "pause_status": { + "description": "Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.PauseStatus" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.CronSchedule": { + "anyOf": [ + { + "type": "object", + "properties": { + "pause_status": { + "description": "Indicate whether this schedule is paused or not.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.PauseStatus" + }, + "quartz_cron_expression": { + "description": "A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required.", + "$ref": "#/$defs/string" + }, + "timezone_id": { + "description": "A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "quartz_cron_expression", + "timezone_id" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.DbtTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "catalog": { + "description": "Optional name of the catalog to use. The value is the top level in the 3-level namespace of Unity Catalog (catalog / schema / relation). The catalog value can only be specified if a warehouse_id is specified. Requires dbt-databricks \u003e= 1.1.1.", + "$ref": "#/$defs/string" + }, + "commands": { + "description": "A list of dbt commands to execute. All commands must start with `dbt`. This parameter must not be empty. A maximum of up to 10 commands can be provided.", + "$ref": "#/$defs/slice/string" + }, + "profiles_directory": { + "description": "Optional (relative) path to the profiles directory. Can only be specified if no warehouse_id is specified. If no warehouse_id is specified and this folder is unset, the root directory is used.", + "$ref": "#/$defs/string" + }, + "project_directory": { + "description": "Path to the project directory. Optional for Git sourced tasks, in which\ncase if no value is provided, the root of the Git repository is used.", + "$ref": "#/$defs/string" + }, + "schema": { + "description": "Optional schema to write to. This parameter is only used when a warehouse_id is also provided. If not provided, the `default` schema is used.", + "$ref": "#/$defs/string" + }, + "source": { + "description": "Optional location type of the project directory. When set to `WORKSPACE`, the project will be retrieved\nfrom the local Databricks workspace. When set to `GIT`, the project will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: Project is located in Databricks workspace.\n* `GIT`: Project is located in cloud Git provider.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Source" + }, + "warehouse_id": { + "description": "ID of the SQL warehouse to connect to. If provided, we automatically generate and provide the profile and connection details to dbt. It can be overridden on a per-command basis by using the `--profiles-dir` command line argument.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "commands" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.FileArrivalTriggerConfiguration": { + "anyOf": [ + { + "type": "object", + "properties": { + "min_time_between_triggers_seconds": { + "description": "If set, the trigger starts a run only after the specified amount of time passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds", + "$ref": "#/$defs/int" + }, + "url": { + "description": "URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location.", + "$ref": "#/$defs/string" + }, + "wait_after_last_change_seconds": { + "description": "If set, the trigger starts a run only after no file activity has occurred for the specified amount of time.\nThis makes it possible to wait for a batch of incoming files to arrive before triggering a run. The\nminimum allowed value is 60 seconds.", + "$ref": "#/$defs/int" + } + }, + "additionalProperties": false, + "required": [ + "url" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.ForEachTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "concurrency": { + "description": "An optional maximum allowed number of concurrent runs of the task.\nSet this value if you want to be able to execute multiple runs of the task concurrently.", + "$ref": "#/$defs/int" + }, + "inputs": { + "description": "Array for task to iterate on. This can be a JSON string or a reference to\nan array parameter.", + "$ref": "#/$defs/string" + }, + "task": { + "description": "Configuration for the task that will be run for each element in the array", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Task" + } + }, + "additionalProperties": false, + "required": [ + "inputs", + "task" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.Format": { + "type": "string", + "enum": [ + "SINGLE_TASK", + "MULTI_TASK" + ] + }, + "jobs.GitProvider": { + "type": "string", + "enum": [ + "gitHub", + "bitbucketCloud", + "azureDevOpsServices", + "gitHubEnterprise", + "bitbucketServer", + "gitLab", + "gitLabEnterpriseEdition", + "awsCodeCommit" + ] + }, + "jobs.GitSnapshot": { + "anyOf": [ + { + "type": "object", + "description": "Read-only state of the remote repository at the time the job was run. This field is only included on job runs.", + "properties": { + "used_commit": { + "description": "Commit that was used to execute the run. If git_branch was specified, this points to the HEAD of the branch at the time of the run; if git_tag was specified, this points to the commit the tag points to.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.GitSource": { + "anyOf": [ + { + "type": "object", + "description": "An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks.\n\nIf `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task.\n\nNote: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job.", + "properties": { + "git_branch": { + "description": "Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit.", + "$ref": "#/$defs/string" + }, + "git_commit": { + "description": "Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag.", + "$ref": "#/$defs/string" + }, + "git_provider": { + "description": "Unique identifier of the service used to host the Git repository. The value is case insensitive.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.GitProvider" + }, + "git_snapshot": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.GitSnapshot" + }, + "git_tag": { + "description": "Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit.", + "$ref": "#/$defs/string" + }, + "git_url": { + "description": "URL of the repository to be cloned by this job.", + "$ref": "#/$defs/string" + }, + "job_source": { + "description": "The source of the job specification in the remote repository when the job is source controlled.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobSource" + } + }, + "additionalProperties": false, + "required": [ + "git_provider", + "git_url" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobCluster": { + "anyOf": [ + { + "type": "object", + "properties": { + "job_cluster_key": { + "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution.", + "$ref": "#/$defs/string" + }, + "new_cluster": { + "description": "If new_cluster, a description of a cluster that is created for each task.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.ClusterSpec" + } + }, + "additionalProperties": false, + "required": [ + "job_cluster_key", + "new_cluster" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobDeployment": { + "anyOf": [ + { + "type": "object", + "properties": { + "kind": { + "description": "The kind of deployment that manages the job.\n\n* `BUNDLE`: The job is managed by Databricks Asset Bundle.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobDeploymentKind" + }, + "metadata_file_path": { + "description": "Path of the file that contains deployment metadata.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "kind" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobDeploymentKind": { + "type": "string", + "description": "* `BUNDLE`: The job is managed by Databricks Asset Bundle.", + "enum": [ + "BUNDLE" + ] + }, + "jobs.JobEditMode": { + "type": "string", + "description": "Edit mode of the job.\n\n* `UI_LOCKED`: The job is in a locked UI state and cannot be modified.\n* `EDITABLE`: The job is in an editable state and can be modified.", + "enum": [ + "UI_LOCKED", + "EDITABLE" + ] + }, + "jobs.JobEmailNotifications": { + "anyOf": [ + { + "type": "object", + "properties": { + "no_alert_for_skipped_runs": { + "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped.", + "$ref": "#/$defs/bool" + }, + "on_duration_warning_threshold_exceeded": { + "description": "A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent.", + "$ref": "#/$defs/slice/string" + }, + "on_failure": { + "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", + "$ref": "#/$defs/slice/string" + }, + "on_start": { + "description": "A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "$ref": "#/$defs/slice/string" + }, + "on_streaming_backlog_exceeded": { + "description": "A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.", + "$ref": "#/$defs/slice/string" + }, + "on_success": { + "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobEnvironment": { + "anyOf": [ + { + "type": "object", + "properties": { + "environment_key": { + "description": "The key of an environment. It has to be unique within a job.", + "$ref": "#/$defs/string" + }, + "spec": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.Environment" + } + }, + "additionalProperties": false, + "required": [ + "environment_key" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobNotificationSettings": { + "anyOf": [ + { + "type": "object", + "properties": { + "no_alert_for_canceled_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled.", + "$ref": "#/$defs/bool" + }, + "no_alert_for_skipped_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped.", + "$ref": "#/$defs/bool" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobParameterDefinition": { + "anyOf": [ + { + "type": "object", + "properties": { + "default": { + "description": "Default value of the parameter.", + "$ref": "#/$defs/string" + }, + "name": { + "description": "The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.`", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "default", + "name" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobRunAs": { + "anyOf": [ + { + "type": "object", + "description": "Write-only setting, available only in Create/Update/Reset and Submit calls. Specifies the user or service principal that the job runs as. If not specified, the job runs as the user who created the job.\n\nOnly `user_name` or `service_principal_name` can be specified. If both are specified, an error is thrown.", + "properties": { + "service_principal_name": { + "description": "Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role.", + "$ref": "#/$defs/string" + }, + "user_name": { + "description": "The email of an active workspace user. Non-admin users can only set this field to their own email.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobSource": { + "anyOf": [ + { + "type": "object", + "description": "The source of the job specification in the remote repository when the job is source controlled.", + "properties": { + "dirty_state": { + "description": "Dirty state indicates the job is not fully synced with the job specification in the remote repository.\n\nPossible values are:\n* `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced.\n* `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobSourceDirtyState" + }, + "import_from_git_branch": { + "description": "Name of the branch which the job is imported from.", + "$ref": "#/$defs/string" + }, + "job_config_path": { + "description": "Path of the job YAML file that contains the job specification.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "import_from_git_branch", + "job_config_path" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobSourceDirtyState": { + "type": "string", + "description": "Dirty state indicates the job is not fully synced with the job specification\nin the remote repository.\n\nPossible values are:\n* `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced.\n* `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced.", + "enum": [ + "NOT_SYNCED", + "DISCONNECTED" + ] + }, + "jobs.JobsHealthMetric": { + "type": "string", + "description": "Specifies the health metric that is being evaluated for a particular health rule.\n\n* `RUN_DURATION_SECONDS`: Expected total time for a run in seconds.\n* `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Private Preview.\n* `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Private Preview.\n* `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Private Preview.\n* `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Private Preview.", + "enum": [ + "RUN_DURATION_SECONDS", + "STREAMING_BACKLOG_BYTES", + "STREAMING_BACKLOG_RECORDS", + "STREAMING_BACKLOG_SECONDS", + "STREAMING_BACKLOG_FILES" + ] + }, + "jobs.JobsHealthOperator": { + "type": "string", + "description": "Specifies the operator used to compare the health metric value with the specified threshold.", + "enum": [ + "GREATER_THAN" + ] + }, + "jobs.JobsHealthRule": { + "anyOf": [ + { + "type": "object", + "properties": { + "metric": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthMetric" + }, + "op": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthOperator" + }, + "value": { + "description": "Specifies the threshold value that the health metric should obey to satisfy the health rule.", + "$ref": "#/$defs/int64" + } + }, + "additionalProperties": false, + "required": [ + "metric", + "op", + "value" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobsHealthRules": { + "anyOf": [ + { + "type": "object", + "description": "An optional set of health rules that can be defined for this job.", + "properties": { + "rules": { + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRule" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.NotebookTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "base_parameters": { + "description": "Base parameters to be used for each run of this job. If the run is initiated by a call to :method:jobs/run\nNow with parameters specified, the two parameters maps are merged. If the same key is specified in\n`base_parameters` and in `run-now`, the value from `run-now` is used.\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nIf the notebook takes a parameter that is not specified in the job’s `base_parameters` or the `run-now` override parameters,\nthe default value from the notebook is used.\n\nRetrieve these parameters in a notebook using [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets).\n\nThe JSON representation of this field cannot exceed 1MB.", + "$ref": "#/$defs/map/string" + }, + "notebook_path": { + "description": "The path of the notebook to be run in the Databricks workspace or remote repository.\nFor notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash.\nFor notebooks stored in a remote repository, the path must be relative. This field is required.", + "$ref": "#/$defs/string" + }, + "source": { + "description": "Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved from the local Databricks workspace. When set to `GIT`, the notebook will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n* `WORKSPACE`: Notebook is located in Databricks workspace.\n* `GIT`: Notebook is located in cloud Git provider.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Source" + }, + "warehouse_id": { + "description": "Optional `warehouse_id` to run the notebook on a SQL warehouse. Classic SQL warehouses are NOT supported, please use serverless or pro SQL warehouses.\n\nNote that SQL warehouses only support SQL cells; if the notebook contains non-SQL cells, the run will fail.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "notebook_path" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.PauseStatus": { + "type": "string", + "enum": [ + "UNPAUSED", + "PAUSED" + ] + }, + "jobs.PeriodicTriggerConfiguration": { + "anyOf": [ + { + "type": "object", + "properties": { + "interval": { + "description": "The interval at which the trigger should run.", + "$ref": "#/$defs/int" + }, + "unit": { + "description": "The unit of time for the interval.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.PeriodicTriggerConfigurationTimeUnit" + } + }, + "additionalProperties": false, + "required": [ + "interval", + "unit" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.PeriodicTriggerConfigurationTimeUnit": { + "type": "string", + "enum": [ + "HOURS", + "DAYS", + "WEEKS" + ] + }, + "jobs.PipelineParams": { + "anyOf": [ + { + "type": "object", + "properties": { + "full_refresh": { + "description": "If true, triggers a full refresh on the delta live table.", + "$ref": "#/$defs/bool" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.PipelineTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "full_refresh": { + "description": "If true, triggers a full refresh on the delta live table.", + "$ref": "#/$defs/bool" + }, + "pipeline_id": { + "description": "The full name of the pipeline task to execute.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "pipeline_id" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.PythonWheelTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "entry_point": { + "description": "Named entry point to use, if it does not exist in the metadata of the package it executes the function from the package directly using `$packageName.$entryPoint()`", + "$ref": "#/$defs/string" + }, + "named_parameters": { + "description": "Command-line parameters passed to Python wheel task in the form of `[\"--name=task\", \"--data=dbfs:/path/to/data.json\"]`. Leave it empty if `parameters` is not null.", + "$ref": "#/$defs/map/string" + }, + "package_name": { + "description": "Name of the package to execute", + "$ref": "#/$defs/string" + }, + "parameters": { + "description": "Command-line parameters passed to Python wheel task. Leave it empty if `named_parameters` is not null.", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false, + "required": [ + "entry_point", + "package_name" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.QueueSettings": { + "anyOf": [ + { + "type": "object", + "properties": { + "enabled": { + "description": "If true, enable queueing for the job. This is a required field.", + "$ref": "#/$defs/bool" + } + }, + "additionalProperties": false, + "required": [ + "enabled" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.RunIf": { + "type": "string", + "description": "An optional value indicating the condition that determines whether the task should be run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`.\n\nPossible values are:\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies have been completed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed", + "enum": [ + "ALL_SUCCESS", + "ALL_DONE", + "NONE_FAILED", + "AT_LEAST_ONE_SUCCESS", + "ALL_FAILED", + "AT_LEAST_ONE_FAILED" + ] + }, + "jobs.RunJobTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "dbt_commands": { + "description": "An array of commands to execute for jobs with the dbt task, for example `\"dbt_commands\": [\"dbt deps\", \"dbt seed\", \"dbt deps\", \"dbt seed\", \"dbt run\"]`", + "$ref": "#/$defs/slice/string" + }, + "jar_params": { + "description": "A list of parameters for jobs with Spark JAR tasks, for example `\"jar_params\": [\"john doe\", \"35\"]`.\nThe parameters are used to invoke the main function of the main class specified in the Spark JAR task.\nIf not specified upon `run-now`, it defaults to an empty list.\njar_params cannot be specified in conjunction with notebook_params.\nThe JSON representation of this field (for example `{\"jar_params\":[\"john doe\",\"35\"]}`) cannot exceed 10,000 bytes.\n\nUse [Task parameter variables](/jobs.html\\\"#parameter-variables\\\") to set parameters containing information about job runs.", + "$ref": "#/$defs/slice/string" + }, + "job_id": { + "description": "ID of the job to trigger.", + "$ref": "#/$defs/int64" + }, + "job_parameters": { + "description": "Job-level parameters used to trigger the job.", + "$ref": "#/$defs/map/string" + }, + "notebook_params": { + "description": "A map from keys to values for jobs with notebook task, for example `\"notebook_params\": {\"name\": \"john doe\", \"age\": \"35\"}`.\nThe map is passed to the notebook and is accessible through the [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html) function.\n\nIf not specified upon `run-now`, the triggered run uses the job’s base parameters.\n\nnotebook_params cannot be specified in conjunction with jar_params.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nThe JSON representation of this field (for example `{\"notebook_params\":{\"name\":\"john doe\",\"age\":\"35\"}}`) cannot exceed 10,000 bytes.", + "$ref": "#/$defs/map/string" + }, + "pipeline_params": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.PipelineParams" + }, + "python_named_params": { + "$ref": "#/$defs/map/string" + }, + "python_params": { + "description": "A list of parameters for jobs with Python tasks, for example `\"python_params\": [\"john doe\", \"35\"]`.\nThe parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it would overwrite\nthe parameters specified in job setting. The JSON representation of this field (for example `{\"python_params\":[\"john doe\",\"35\"]}`)\ncannot exceed 10,000 bytes.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nImportant\n\nThese parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error.\nExamples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis.", + "$ref": "#/$defs/slice/string" + }, + "spark_submit_params": { + "description": "A list of parameters for jobs with spark submit task, for example `\"spark_submit_params\": [\"--class\", \"org.apache.spark.examples.SparkPi\"]`.\nThe parameters are passed to spark-submit script as command-line parameters. If specified upon `run-now`, it would overwrite the\nparameters specified in job setting. The JSON representation of this field (for example `{\"python_params\":[\"john doe\",\"35\"]}`)\ncannot exceed 10,000 bytes.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs\n\nImportant\n\nThese parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error.\nExamples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis.", + "$ref": "#/$defs/slice/string" + }, + "sql_params": { + "description": "A map from keys to values for jobs with SQL task, for example `\"sql_params\": {\"name\": \"john doe\", \"age\": \"35\"}`. The SQL alert task does not support custom parameters.", + "$ref": "#/$defs/map/string" + } + }, + "additionalProperties": false, + "required": [ + "job_id" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.Source": { + "type": "string", + "description": "Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\\\nfrom the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: SQL file is located in Databricks workspace.\n* `GIT`: SQL file is located in cloud Git provider.", + "enum": [ + "WORKSPACE", + "GIT" + ] + }, + "jobs.SparkJarTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "jar_uri": { + "description": "Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create.", + "$ref": "#/$defs/string" + }, + "main_class_name": { + "description": "The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library.\n\nThe code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail.", + "$ref": "#/$defs/string" + }, + "parameters": { + "description": "Parameters passed to the main method.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.SparkPythonTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "parameters": { + "description": "Command line parameters passed to the Python file.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", + "$ref": "#/$defs/slice/string" + }, + "python_file": { + "description": "The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required.", + "$ref": "#/$defs/string" + }, + "source": { + "description": "Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved from the local\nDatabricks workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a Databricks workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Source" + } + }, + "additionalProperties": false, + "required": [ + "python_file" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.SparkSubmitTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "parameters": { + "description": "Command-line parameters passed to spark submit.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.SqlTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "alert": { + "description": "If alert, indicates that this job must refresh a SQL alert.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskAlert" + }, + "dashboard": { + "description": "If dashboard, indicates that this job must refresh a SQL dashboard.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskDashboard" + }, + "file": { + "description": "If file, indicates that this job runs a SQL file in a remote Git repository.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskFile" + }, + "parameters": { + "description": "Parameters to be used for each run of this job. The SQL alert task does not support custom parameters.", + "$ref": "#/$defs/map/string" + }, + "query": { + "description": "If query, indicates that this job must execute a SQL query.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskQuery" + }, + "warehouse_id": { + "description": "The canonical identifier of the SQL warehouse. Recommended to use with serverless or pro SQL warehouses. Classic SQL warehouses are only supported for SQL alert, dashboard and query tasks and are limited to scheduled single-task jobs.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "warehouse_id" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.SqlTaskAlert": { + "anyOf": [ + { + "type": "object", + "properties": { + "alert_id": { + "description": "The canonical identifier of the SQL alert.", + "$ref": "#/$defs/string" + }, + "pause_subscriptions": { + "description": "If true, the alert notifications are not sent to subscribers.", + "$ref": "#/$defs/bool" + }, + "subscriptions": { + "description": "If specified, alert notifications are sent to subscribers.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskSubscription" + } + }, + "additionalProperties": false, + "required": [ + "alert_id" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.SqlTaskDashboard": { + "anyOf": [ + { + "type": "object", + "properties": { + "custom_subject": { + "description": "Subject of the email sent to subscribers of this task.", + "$ref": "#/$defs/string" + }, + "dashboard_id": { + "description": "The canonical identifier of the SQL dashboard.", + "$ref": "#/$defs/string" + }, + "pause_subscriptions": { + "description": "If true, the dashboard snapshot is not taken, and emails are not sent to subscribers.", + "$ref": "#/$defs/bool" + }, + "subscriptions": { + "description": "If specified, dashboard snapshots are sent to subscriptions.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskSubscription" + } + }, + "additionalProperties": false, + "required": [ + "dashboard_id" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.SqlTaskFile": { + "anyOf": [ + { + "type": "object", + "properties": { + "path": { + "description": "Path of the SQL file. Must be relative if the source is a remote Git repository and absolute for workspace paths.", + "$ref": "#/$defs/string" + }, + "source": { + "description": "Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\nfrom the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: SQL file is located in Databricks workspace.\n* `GIT`: SQL file is located in cloud Git provider.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Source" + } + }, + "additionalProperties": false, + "required": [ + "path" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.SqlTaskQuery": { + "anyOf": [ + { + "type": "object", + "properties": { + "query_id": { + "description": "The canonical identifier of the SQL query.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "query_id" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.SqlTaskSubscription": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination_id": { + "description": "The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications.", + "$ref": "#/$defs/string" + }, + "user_name": { + "description": "The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.TableUpdateTriggerConfiguration": { + "anyOf": [ + { + "type": "object", + "properties": { + "condition": { + "description": "The table(s) condition based on which to trigger a job run.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Condition" + }, + "min_time_between_triggers_seconds": { + "description": "If set, the trigger starts a run only after the specified amount of time has passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds.", + "$ref": "#/$defs/int" + }, + "table_names": { + "description": "A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`.", + "$ref": "#/$defs/slice/string" + }, + "wait_after_last_change_seconds": { + "description": "If set, the trigger starts a run only after no table updates have occurred for the specified time\nand can be used to wait for a series of table updates before triggering a run. The\nminimum allowed value is 60 seconds.", + "$ref": "#/$defs/int" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.Task": { + "anyOf": [ + { + "type": "object", + "properties": { + "condition_task": { + "description": "If condition_task, specifies a condition with an outcome that can be used to control the execution of other tasks. Does not require a cluster to execute and does not support retries or notifications.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.ConditionTask" + }, + "dbt_task": { + "description": "If dbt_task, indicates that this must execute a dbt task. It requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.DbtTask" + }, + "depends_on": { + "description": "An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true.\nThe key is `task_key`, and the value is the name assigned to the dependent task.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.TaskDependency" + }, + "description": { + "description": "An optional description for this task.", + "$ref": "#/$defs/string" + }, + "disable_auto_optimization": { + "description": "An option to disable auto optimization in serverless", + "$ref": "#/$defs/bool" + }, + "email_notifications": { + "description": "An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.TaskEmailNotifications" + }, + "environment_key": { + "description": "The key that references an environment spec in a job. This field is required for Python script, Python wheel and dbt tasks when using serverless compute.", + "$ref": "#/$defs/string" + }, + "existing_cluster_id": { + "description": "If existing_cluster_id, the ID of an existing cluster that is used for all runs.\nWhen running jobs or tasks on an existing cluster, you may need to manually restart\nthe cluster if it stops responding. We suggest running jobs and tasks on new clusters for\ngreater reliability", + "$ref": "#/$defs/string" + }, + "for_each_task": { + "description": "If for_each_task, indicates that this task must execute the nested task within it.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.ForEachTask" + }, + "health": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRules" + }, + "job_cluster_key": { + "description": "If job_cluster_key, this task is executed reusing the cluster specified in `job.settings.job_clusters`.", + "$ref": "#/$defs/string" + }, + "libraries": { + "description": "An optional list of libraries to be installed on the cluster.\nThe default value is an empty list.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/compute.Library" + }, + "max_retries": { + "description": "An optional maximum number of times to retry an unsuccessful run. A run is considered to be unsuccessful if it completes with the `FAILED` result_state or `INTERNAL_ERROR` `life_cycle_state`. The value `-1` means to retry indefinitely and the value `0` means to never retry.", + "$ref": "#/$defs/int" + }, + "min_retry_interval_millis": { + "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried.", + "$ref": "#/$defs/int" + }, + "new_cluster": { + "description": "If new_cluster, a description of a new cluster that is created for each run.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.ClusterSpec" + }, + "notebook_task": { + "description": "If notebook_task, indicates that this task must run a notebook. This field may not be specified in conjunction with spark_jar_task.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.NotebookTask" + }, + "notification_settings": { + "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this task.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.TaskNotificationSettings" + }, + "pipeline_task": { + "description": "If pipeline_task, indicates that this task must execute a Pipeline.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.PipelineTask" + }, + "python_wheel_task": { + "description": "If python_wheel_task, indicates that this job must execute a PythonWheel.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.PythonWheelTask" + }, + "retry_on_timeout": { + "description": "An optional policy to specify whether to retry a job when it times out. The default behavior\nis to not retry on timeout.", + "$ref": "#/$defs/bool" + }, + "run_if": { + "description": "An optional value specifying the condition determining whether the task is run once its dependencies have been completed.\n\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies have been completed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.RunIf" + }, + "run_job_task": { + "description": "If run_job_task, indicates that this task must execute another job.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.RunJobTask" + }, + "spark_jar_task": { + "description": "If spark_jar_task, indicates that this task must run a JAR.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.SparkJarTask" + }, + "spark_python_task": { + "description": "If spark_python_task, indicates that this task must run a Python file.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.SparkPythonTask" + }, + "spark_submit_task": { + "description": "If `spark_submit_task`, indicates that this task must be launched by the spark submit script. This task can run only on new clusters.\n\nIn the `new_cluster` specification, `libraries` and `spark_conf` are not supported. Instead, use `--jars` and `--py-files` to add Java and Python libraries and `--conf` to set the Spark configurations.\n\n`master`, `deploy-mode`, and `executor-cores` are automatically configured by Databricks; you _cannot_ specify them in parameters.\n\nBy default, the Spark submit job uses all available memory (excluding reserved memory for Databricks services). You can set `--driver-memory`, and `--executor-memory` to a smaller value to leave some room for off-heap usage.\n\nThe `--jars`, `--py-files`, `--files` arguments support DBFS and S3 paths.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.SparkSubmitTask" + }, + "sql_task": { + "description": "If sql_task, indicates that this job must execute a SQL task.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.SqlTask" + }, + "task_key": { + "description": "A unique name for the task. This field is used to refer to this task from other tasks.\nThis field is required and must be unique within its parent job.\nOn Update or Reset, this field is used to reference the tasks to be updated or reset.", + "$ref": "#/$defs/string" + }, + "timeout_seconds": { + "description": "An optional timeout applied to each run of this job task. A value of `0` means no timeout.", + "$ref": "#/$defs/int" + }, + "webhook_notifications": { + "description": "A collection of system notification IDs to notify when runs of this task begin or complete. The default behavior is to not send any system notifications.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.WebhookNotifications" + } + }, + "additionalProperties": false, + "required": [ + "task_key" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.TaskDependency": { + "anyOf": [ + { + "type": "object", + "properties": { + "outcome": { + "description": "Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run.", + "$ref": "#/$defs/string" + }, + "task_key": { + "description": "The name of the task this task depends on.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "task_key" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.TaskEmailNotifications": { + "anyOf": [ + { + "type": "object", + "properties": { + "no_alert_for_skipped_runs": { + "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped.", + "$ref": "#/$defs/bool" + }, + "on_duration_warning_threshold_exceeded": { + "description": "A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent.", + "$ref": "#/$defs/slice/string" + }, + "on_failure": { + "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", + "$ref": "#/$defs/slice/string" + }, + "on_start": { + "description": "A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "$ref": "#/$defs/slice/string" + }, + "on_streaming_backlog_exceeded": { + "description": "A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.", + "$ref": "#/$defs/slice/string" + }, + "on_success": { + "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.TaskNotificationSettings": { + "anyOf": [ + { + "type": "object", + "properties": { + "alert_on_last_attempt": { + "description": "If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run.", + "$ref": "#/$defs/bool" + }, + "no_alert_for_canceled_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled.", + "$ref": "#/$defs/bool" + }, + "no_alert_for_skipped_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped.", + "$ref": "#/$defs/bool" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.TriggerSettings": { + "anyOf": [ + { + "type": "object", + "properties": { + "file_arrival": { + "description": "File arrival trigger settings.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.FileArrivalTriggerConfiguration" + }, + "pause_status": { + "description": "Whether this trigger is paused or not.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.PauseStatus" + }, + "periodic": { + "description": "Periodic trigger settings.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.PeriodicTriggerConfiguration" + }, + "table": { + "description": "Old table trigger settings name. Deprecated in favor of `table_update`.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.TableUpdateTriggerConfiguration" + }, + "table_update": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.TableUpdateTriggerConfiguration" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.Webhook": { + "anyOf": [ + { + "type": "object", + "properties": { + "id": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "id" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.WebhookNotifications": { + "anyOf": [ + { + "type": "object", + "properties": { + "on_duration_warning_threshold_exceeded": { + "description": "An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.Webhook" + }, + "on_failure": { + "description": "An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.Webhook" + }, + "on_start": { + "description": "An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.Webhook" + }, + "on_streaming_backlog_exceeded": { + "description": "An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.\nA maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.Webhook" + }, + "on_success": { + "description": "An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.Webhook" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "ml.ExperimentTag": { + "anyOf": [ + { + "type": "object", + "properties": { + "key": { + "description": "The tag key.", + "$ref": "#/$defs/string" + }, + "value": { + "description": "The tag value.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "ml.ModelTag": { + "anyOf": [ + { + "type": "object", + "properties": { + "key": { + "description": "The tag key.", + "$ref": "#/$defs/string" + }, + "value": { + "description": "The tag value.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "ml.ModelVersion": { + "anyOf": [ + { + "type": "object", + "properties": { + "creation_timestamp": { + "description": "Timestamp recorded when this `model_version` was created.", + "$ref": "#/$defs/int64" + }, + "current_stage": { + "description": "Current stage for this `model_version`.", + "$ref": "#/$defs/string" + }, + "description": { + "description": "Description of this `model_version`.", + "$ref": "#/$defs/string" + }, + "last_updated_timestamp": { + "description": "Timestamp recorded when metadata for this `model_version` was last updated.", + "$ref": "#/$defs/int64" + }, + "name": { + "description": "Unique name of the model", + "$ref": "#/$defs/string" + }, + "run_id": { + "description": "MLflow run ID used when creating `model_version`, if `source` was generated by an\nexperiment run stored in MLflow tracking server.", + "$ref": "#/$defs/string" + }, + "run_link": { + "description": "Run Link: Direct link to the run that generated this version", + "$ref": "#/$defs/string" + }, + "source": { + "description": "URI indicating the location of the source model artifacts, used when creating `model_version`", + "$ref": "#/$defs/string" + }, + "status": { + "description": "Current status of `model_version`", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/ml.ModelVersionStatus", + "enum": [ + "PENDING_REGISTRATION", + "FAILED_REGISTRATION", + "READY" + ] + }, + "status_message": { + "description": "Details on current `status`, if it is pending or failed.", + "$ref": "#/$defs/string" + }, + "tags": { + "description": "Tags: Additional metadata key-value pairs for this `model_version`.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/ml.ModelVersionTag" + }, + "user_id": { + "description": "User that created this `model_version`.", + "$ref": "#/$defs/string" + }, + "version": { + "description": "Model's version number.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "ml.ModelVersionStatus": { + "type": "string" + }, + "ml.ModelVersionTag": { + "anyOf": [ + { + "type": "object", + "properties": { + "key": { + "description": "The tag key.", + "$ref": "#/$defs/string" + }, + "value": { + "description": "The tag value.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.CronTrigger": { + "anyOf": [ + { + "type": "object", + "properties": { + "quartz_cron_schedule": { + "$ref": "#/$defs/string" + }, + "timezone_id": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.DeploymentKind": { + "type": "string", + "description": "The deployment method that manages the pipeline:\n- BUNDLE: The pipeline is managed by a Databricks Asset Bundle.\n", + "enum": [ + "BUNDLE" + ] + }, + "pipelines.FileLibrary": { + "anyOf": [ + { + "type": "object", + "properties": { + "path": { + "description": "The absolute path of the file.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.Filters": { + "anyOf": [ + { + "type": "object", + "properties": { + "exclude": { + "description": "Paths to exclude.", + "$ref": "#/$defs/slice/string" + }, + "include": { + "description": "Paths to include.", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.IngestionConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "schema": { + "description": "Select tables from a specific source schema.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.SchemaSpec" + }, + "table": { + "description": "Select tables from a specific source table.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.TableSpec" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.IngestionGatewayPipelineDefinition": { + "anyOf": [ + { + "type": "object", + "properties": { + "connection_id": { + "description": "Immutable. The Unity Catalog connection this gateway pipeline uses to communicate with the source.", + "$ref": "#/$defs/string" + }, + "gateway_storage_catalog": { + "description": "Required, Immutable. The name of the catalog for the gateway pipeline's storage location.", + "$ref": "#/$defs/string" + }, + "gateway_storage_name": { + "description": "Optional. The Unity Catalog-compatible name for the gateway storage location.\nThis is the destination to use for the data that is extracted by the gateway.\nDelta Live Tables system will automatically create the storage location under the catalog and schema.\n", + "$ref": "#/$defs/string" + }, + "gateway_storage_schema": { + "description": "Required, Immutable. The name of the schema for the gateway pipelines's storage location.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.IngestionPipelineDefinition": { + "anyOf": [ + { + "type": "object", + "properties": { + "connection_name": { + "description": "Immutable. The Unity Catalog connection this ingestion pipeline uses to communicate with the source. Specify either ingestion_gateway_id or connection_name.", + "$ref": "#/$defs/string" + }, + "ingestion_gateway_id": { + "description": "Immutable. Identifier for the ingestion gateway used by this ingestion pipeline to communicate with the source. Specify either ingestion_gateway_id or connection_name.", + "$ref": "#/$defs/string" + }, + "objects": { + "description": "Required. Settings specifying tables to replicate and the destination for the replicated tables.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/pipelines.IngestionConfig" + }, + "table_configuration": { + "description": "Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.TableSpecificConfig" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.ManualTrigger": { + "anyOf": [ + { + "type": "object", + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.NotebookLibrary": { + "anyOf": [ + { + "type": "object", + "properties": { + "path": { + "description": "The absolute path of the notebook.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.Notifications": { + "anyOf": [ + { + "type": "object", + "properties": { + "alerts": { + "description": "A list of alerts that trigger the sending of notifications to the configured\ndestinations. The supported alerts are:\n\n* `on-update-success`: A pipeline update completes successfully.\n* `on-update-failure`: Each time a pipeline update fails.\n* `on-update-fatal-failure`: A pipeline update fails with a non-retryable (fatal) error.\n* `on-flow-failure`: A single data flow fails.\n", + "$ref": "#/$defs/slice/string" + }, + "email_recipients": { + "description": "A list of email addresses notified when a configured alert is triggered.\n", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.PipelineCluster": { + "anyOf": [ + { + "type": "object", + "properties": { + "apply_policy_default_values": { + "description": "Note: This field won't be persisted. Only API users will check this field.", + "$ref": "#/$defs/bool" + }, + "autoscale": { + "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineClusterAutoscale" + }, + "aws_attributes": { + "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.AwsAttributes" + }, + "azure_attributes": { + "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.AzureAttributes" + }, + "cluster_log_conf": { + "description": "The configuration for delivering spark logs to a long-term storage destination.\nOnly dbfs destinations are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.ClusterLogConf" + }, + "custom_tags": { + "description": "Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS\ninstances and EBS volumes) with these tags in addition to `default_tags`. Notes:\n\n- Currently, Databricks allows at most 45 custom tags\n\n- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags", + "$ref": "#/$defs/map/string" + }, + "driver_instance_pool_id": { + "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned.", + "$ref": "#/$defs/string" + }, + "driver_node_type_id": { + "description": "The node type of the Spark driver.\nNote that this field is optional; if unset, the driver node type will be set as the same value\nas `node_type_id` defined above.", + "$ref": "#/$defs/string" + }, + "enable_local_disk_encryption": { + "description": "Whether to enable local disk encryption for the cluster.", + "$ref": "#/$defs/bool" + }, + "gcp_attributes": { + "description": "Attributes related to clusters running on Google Cloud Platform.\nIf not specified at cluster creation, a set of default values will be used.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.GcpAttributes" + }, + "init_scripts": { + "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/compute.InitScriptInfo" + }, + "instance_pool_id": { + "description": "The optional ID of the instance pool to which the cluster belongs.", + "$ref": "#/$defs/string" + }, + "label": { + "description": "A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`.", + "$ref": "#/$defs/string" + }, + "node_type_id": { + "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n", + "$ref": "#/$defs/string" + }, + "num_workers": { + "description": "Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned.", + "$ref": "#/$defs/int" + }, + "policy_id": { + "description": "The ID of the cluster policy used to create the cluster if applicable.", + "$ref": "#/$defs/string" + }, + "spark_conf": { + "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nSee :method:clusters/create for more details.\n", + "$ref": "#/$defs/map/string" + }, + "spark_env_vars": { + "description": "An object containing a set of optional, user-specified environment variable key-value pairs.\nPlease note that key-value pair of the form (X,Y) will be exported as is (i.e.,\n`export X='Y'`) while launching the driver and workers.\n\nIn order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending\nthem to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all\ndefault databricks managed environmental variables are included as well.\n\nExample Spark environment variables:\n`{\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"}` or\n`{\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true\"}`", + "$ref": "#/$defs/map/string" + }, + "ssh_public_keys": { + "description": "SSH public key contents that will be added to each Spark node in this cluster. The\ncorresponding private keys can be used to login with the user name `ubuntu` on port `2200`.\nUp to 10 keys can be specified.", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.PipelineClusterAutoscale": { + "anyOf": [ + { + "type": "object", + "properties": { + "max_workers": { + "description": "The maximum number of workers to which the cluster can scale up when overloaded. `max_workers` must be strictly greater than `min_workers`.", + "$ref": "#/$defs/int" + }, + "min_workers": { + "description": "The minimum number of workers the cluster can scale down to when underutilized.\nIt is also the initial number of workers the cluster will have after creation.", + "$ref": "#/$defs/int" + }, + "mode": { + "description": "Databricks Enhanced Autoscaling optimizes cluster utilization by automatically\nallocating cluster resources based on workload volume, with minimal impact to\nthe data processing latency of your pipelines. Enhanced Autoscaling is available\nfor `updates` clusters only. The legacy autoscaling feature is used for `maintenance`\nclusters.\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineClusterAutoscaleMode", + "enum": [ + "ENHANCED", + "LEGACY" + ] + } + }, + "additionalProperties": false, + "required": [ + "max_workers", + "min_workers" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.PipelineClusterAutoscaleMode": { + "type": "string" + }, + "pipelines.PipelineDeployment": { + "anyOf": [ + { + "type": "object", + "properties": { + "kind": { + "description": "The deployment method that manages the pipeline.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.DeploymentKind" + }, + "metadata_file_path": { + "description": "The path to the file containing metadata about the deployment.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.PipelineLibrary": { + "anyOf": [ + { + "type": "object", + "properties": { + "file": { + "description": "The path to a file that defines a pipeline and is stored in the Databricks Repos.\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.FileLibrary" + }, + "jar": { + "description": "URI of the jar to be installed. Currently only DBFS is supported.\n", + "$ref": "#/$defs/string" + }, + "maven": { + "description": "Specification of a maven library to be installed.\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.MavenLibrary" + }, + "notebook": { + "description": "The path to a notebook that defines a pipeline and is stored in the Databricks workspace.\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.NotebookLibrary" + }, + "whl": { + "description": "URI of the whl to be installed.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.PipelineTrigger": { + "anyOf": [ + { + "type": "object", + "properties": { + "cron": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.CronTrigger" + }, + "manual": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.ManualTrigger" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.SchemaSpec": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination_catalog": { + "description": "Required. Destination catalog to store tables.", + "$ref": "#/$defs/string" + }, + "destination_schema": { + "description": "Required. Destination schema to store tables in. Tables with the same name as the source tables are created in this destination schema. The pipeline fails If a table with the same name already exists.", + "$ref": "#/$defs/string" + }, + "source_catalog": { + "description": "The source catalog name. Might be optional depending on the type of source.", + "$ref": "#/$defs/string" + }, + "source_schema": { + "description": "Required. Schema name in the source database.", + "$ref": "#/$defs/string" + }, + "table_configuration": { + "description": "Configuration settings to control the ingestion of tables. These settings are applied to all tables in this schema and override the table_configuration defined in the IngestionPipelineDefinition object.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.TableSpecificConfig" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.TableSpec": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination_catalog": { + "description": "Required. Destination catalog to store table.", + "$ref": "#/$defs/string" + }, + "destination_schema": { + "description": "Required. Destination schema to store table.", + "$ref": "#/$defs/string" + }, + "destination_table": { + "description": "Optional. Destination table name. The pipeline fails If a table with that name already exists. If not set, the source table name is used.", + "$ref": "#/$defs/string" + }, + "source_catalog": { + "description": "Source catalog name. Might be optional depending on the type of source.", + "$ref": "#/$defs/string" + }, + "source_schema": { + "description": "Schema name in the source database. Might be optional depending on the type of source.", + "$ref": "#/$defs/string" + }, + "source_table": { + "description": "Required. Table name in the source database.", + "$ref": "#/$defs/string" + }, + "table_configuration": { + "description": "Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object and the SchemaSpec.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.TableSpecificConfig" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.TableSpecificConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "primary_keys": { + "description": "The primary key of the table used to apply changes.", + "$ref": "#/$defs/slice/string" + }, + "salesforce_include_formula_fields": { + "description": "If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector", + "$ref": "#/$defs/bool" + }, + "scd_type": { + "description": "The SCD type to use to ingest the table.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.TableSpecificConfigScdType", + "enum": [ + "SCD_TYPE_1", + "SCD_TYPE_2" + ] + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.TableSpecificConfigScdType": { + "type": "string" + }, + "serving.Ai21LabsConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "ai21labs_api_key": { + "$ref": "#/$defs/string" + }, + "ai21labs_api_key_plaintext": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.AmazonBedrockConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "aws_access_key_id": { + "description": "The Databricks secret key reference for an AWS access key ID with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`.", + "$ref": "#/$defs/string" + }, + "aws_access_key_id_plaintext": { + "description": "An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`.", + "$ref": "#/$defs/string" + }, + "aws_region": { + "description": "The AWS region to use. Bedrock has to be enabled there.", + "$ref": "#/$defs/string" + }, + "aws_secret_access_key": { + "description": "The Databricks secret key reference for an AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_secret_access_key_plaintext`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`.", + "$ref": "#/$defs/string" + }, + "aws_secret_access_key_plaintext": { + "description": "An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_secret_access_key`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`.", + "$ref": "#/$defs/string" + }, + "bedrock_provider": { + "description": "The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.AmazonBedrockConfigBedrockProvider", + "enum": [ + "anthropic", + "cohere", + "ai21labs", + "amazon" + ] + } + }, + "additionalProperties": false, + "required": [ + "aws_region", + "bedrock_provider" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.AmazonBedrockConfigBedrockProvider": { + "type": "string" + }, + "serving.AnthropicConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "anthropic_api_key": { + "description": "The Databricks secret key reference for an Anthropic API key. If you prefer to paste your API key directly, see `anthropic_api_key_plaintext`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`.", + "$ref": "#/$defs/string" + }, + "anthropic_api_key_plaintext": { + "description": "The Anthropic API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `anthropic_api_key`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.AutoCaptureConfigInput": { + "anyOf": [ + { + "type": "object", + "properties": { + "catalog_name": { + "description": "The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled.", + "$ref": "#/$defs/string" + }, + "enabled": { + "description": "Indicates whether the inference table is enabled.", + "$ref": "#/$defs/bool" + }, + "schema_name": { + "description": "The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled.", + "$ref": "#/$defs/string" + }, + "table_name_prefix": { + "description": "The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.CohereConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "cohere_api_base": { + "description": "This is an optional field to provide a customized base URL for the Cohere API. \nIf left unspecified, the standard Cohere base URL is used.\n", + "$ref": "#/$defs/string" + }, + "cohere_api_key": { + "description": "The Databricks secret key reference for a Cohere API key. If you prefer to paste your API key directly, see `cohere_api_key_plaintext`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`.", + "$ref": "#/$defs/string" + }, + "cohere_api_key_plaintext": { + "description": "The Cohere API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `cohere_api_key`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.DatabricksModelServingConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "databricks_api_token": { + "description": "The Databricks secret key reference for a Databricks API token that corresponds to a user or service\nprincipal with Can Query access to the model serving endpoint pointed to by this external model.\nIf you prefer to paste your API key directly, see `databricks_api_token_plaintext`.\nYou must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`.\n", + "$ref": "#/$defs/string" + }, + "databricks_api_token_plaintext": { + "description": "The Databricks API token that corresponds to a user or service\nprincipal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.\nIf you prefer to reference your key using Databricks Secrets, see `databricks_api_token`.\nYou must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`.\n", + "$ref": "#/$defs/string" + }, + "databricks_workspace_url": { + "description": "The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.\n", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "databricks_workspace_url" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.EndpointCoreConfigInput": { + "anyOf": [ + { + "type": "object", + "properties": { + "auto_capture_config": { + "description": "Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.AutoCaptureConfigInput" + }, + "served_entities": { + "description": "A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served entities.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/serving.ServedEntityInput" + }, + "served_models": { + "description": "(Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A serving endpoint can have up to 15 served models.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/serving.ServedModelInput" + }, + "traffic_config": { + "description": "The traffic config defining how invocations to the serving endpoint should be routed.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.TrafficConfig" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.EndpointTag": { + "anyOf": [ + { + "type": "object", + "properties": { + "key": { + "description": "Key field for a serving endpoint tag.", + "$ref": "#/$defs/string" + }, + "value": { + "description": "Optional value field for a serving endpoint tag.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "key" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.ExternalModel": { + "anyOf": [ + { + "type": "object", + "properties": { + "ai21labs_config": { + "description": "AI21Labs Config. Only required if the provider is 'ai21labs'.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.Ai21LabsConfig" + }, + "amazon_bedrock_config": { + "description": "Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.AmazonBedrockConfig" + }, + "anthropic_config": { + "description": "Anthropic Config. Only required if the provider is 'anthropic'.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.AnthropicConfig" + }, + "cohere_config": { + "description": "Cohere Config. Only required if the provider is 'cohere'.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.CohereConfig" + }, + "databricks_model_serving_config": { + "description": "Databricks Model Serving Config. Only required if the provider is 'databricks-model-serving'.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.DatabricksModelServingConfig" + }, + "google_cloud_vertex_ai_config": { + "description": "Google Cloud Vertex AI Config. Only required if the provider is 'google-cloud-vertex-ai'.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.GoogleCloudVertexAiConfig" + }, + "name": { + "description": "The name of the external model.", + "$ref": "#/$defs/string" + }, + "openai_config": { + "description": "OpenAI Config. Only required if the provider is 'openai'.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.OpenAiConfig" + }, + "palm_config": { + "description": "PaLM Config. Only required if the provider is 'palm'.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.PaLmConfig" + }, + "provider": { + "description": "The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic',\n'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'.\",\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.ExternalModelProvider", + "enum": [ + "ai21labs", + "anthropic", + "amazon-bedrock", + "cohere", + "databricks-model-serving", + "google-cloud-vertex-ai", + "openai", + "palm" + ] + }, + "task": { + "description": "The task type of the external model.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "name", + "provider", + "task" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.ExternalModelProvider": { + "type": "string" + }, + "serving.GoogleCloudVertexAiConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "private_key": { + "$ref": "#/$defs/string" + }, + "private_key_plaintext": { + "$ref": "#/$defs/string" + }, + "project_id": { + "$ref": "#/$defs/string" + }, + "region": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.OpenAiConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "microsoft_entra_client_id": { + "$ref": "#/$defs/string" + }, + "microsoft_entra_client_secret": { + "$ref": "#/$defs/string" + }, + "microsoft_entra_client_secret_plaintext": { + "$ref": "#/$defs/string" + }, + "microsoft_entra_tenant_id": { + "$ref": "#/$defs/string" + }, + "openai_api_base": { + "$ref": "#/$defs/string" + }, + "openai_api_key": { + "$ref": "#/$defs/string" + }, + "openai_api_key_plaintext": { + "$ref": "#/$defs/string" + }, + "openai_api_type": { + "$ref": "#/$defs/string" + }, + "openai_api_version": { + "$ref": "#/$defs/string" + }, + "openai_deployment_name": { + "$ref": "#/$defs/string" + }, + "openai_organization": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.PaLmConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "palm_api_key": { + "$ref": "#/$defs/string" + }, + "palm_api_key_plaintext": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.RateLimit": { + "anyOf": [ + { + "type": "object", + "properties": { + "calls": { + "description": "Used to specify how many calls are allowed for a key within the renewal_period.", + "$ref": "#/$defs/int" + }, + "key": { + "description": "Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.RateLimitKey", + "enum": [ + "user", + "endpoint" + ] + }, + "renewal_period": { + "description": "Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.RateLimitRenewalPeriod", + "enum": [ + "minute" + ] + } + }, + "additionalProperties": false, + "required": [ + "calls", + "renewal_period" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.RateLimitKey": { + "type": "string" + }, + "serving.RateLimitRenewalPeriod": { + "type": "string" + }, + "serving.Route": { + "anyOf": [ + { + "type": "object", + "properties": { + "served_model_name": { + "description": "The name of the served model this route configures traffic for.", + "$ref": "#/$defs/string" + }, + "traffic_percentage": { + "description": "The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.", + "$ref": "#/$defs/int" + } + }, + "additionalProperties": false, + "required": [ + "served_model_name", + "traffic_percentage" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.ServedEntityInput": { + "anyOf": [ + { + "type": "object", + "properties": { + "entity_name": { + "description": "The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC),\nor a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of\n__catalog_name__.__schema_name__.__model_name__.\n", + "$ref": "#/$defs/string" + }, + "entity_version": { + "description": "The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.", + "$ref": "#/$defs/string" + }, + "environment_vars": { + "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity.\nNote: this is an experimental feature and subject to change. \nExample entity environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`", + "$ref": "#/$defs/map/string" + }, + "external_model": { + "description": "The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled)\ncan be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model,\nit cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.\nThe task type of all external models within an endpoint must be the same.\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.ExternalModel" + }, + "instance_profile_arn": { + "description": "ARN of the instance profile that the served entity uses to access AWS resources.", + "$ref": "#/$defs/string" + }, + "max_provisioned_throughput": { + "description": "The maximum tokens per second that the endpoint can scale up to.", + "$ref": "#/$defs/int" + }, + "min_provisioned_throughput": { + "description": "The minimum tokens per second that the endpoint can scale down to.", + "$ref": "#/$defs/int" + }, + "name": { + "description": "The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores.\nIf not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other\nentities, it defaults to \u003centity-name\u003e-\u003centity-version\u003e.\n", + "$ref": "#/$defs/string" + }, + "scale_to_zero_enabled": { + "description": "Whether the compute resources for the served entity should scale down to zero.", + "$ref": "#/$defs/bool" + }, + "workload_size": { + "description": "The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.\n", + "$ref": "#/$defs/string" + }, + "workload_type": { + "description": "The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n\"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.ServedModelInput": { + "anyOf": [ + { + "type": "object", + "properties": { + "environment_vars": { + "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this model.\nNote: this is an experimental feature and subject to change. \nExample model environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`", + "$ref": "#/$defs/map/string" + }, + "instance_profile_arn": { + "description": "ARN of the instance profile that the served model will use to access AWS resources.", + "$ref": "#/$defs/string" + }, + "max_provisioned_throughput": { + "description": "The maximum tokens per second that the endpoint can scale up to.", + "$ref": "#/$defs/int" + }, + "min_provisioned_throughput": { + "description": "The minimum tokens per second that the endpoint can scale down to.", + "$ref": "#/$defs/int" + }, + "model_name": { + "description": "The name of the model in Databricks Model Registry to be served or if the model resides in Unity Catalog, the full name of model,\nin the form of __catalog_name__.__schema_name__.__model_name__.\n", + "$ref": "#/$defs/string" + }, + "model_version": { + "description": "The version of the model in Databricks Model Registry or Unity Catalog to be served.", + "$ref": "#/$defs/string" + }, + "name": { + "description": "The name of a served model. It must be unique across an endpoint. If not specified, this field will default to \u003cmodel-name\u003e-\u003cmodel-version\u003e.\nA served model name can consist of alphanumeric characters, dashes, and underscores.\n", + "$ref": "#/$defs/string" + }, + "scale_to_zero_enabled": { + "description": "Whether the compute resources for the served model should scale down to zero.", + "$ref": "#/$defs/bool" + }, + "workload_size": { + "description": "The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0.\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.ServedModelInputWorkloadSize", + "enum": [ + "Small", + "Medium", + "Large" + ] + }, + "workload_type": { + "description": "The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n\"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.ServedModelInputWorkloadType", + "enum": [ + "CPU", + "GPU_SMALL", + "GPU_MEDIUM", + "GPU_LARGE", + "MULTIGPU_MEDIUM" + ] + } + }, + "additionalProperties": false, + "required": [ + "model_name", + "model_version", + "scale_to_zero_enabled" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.ServedModelInputWorkloadSize": { + "type": "string" + }, + "serving.ServedModelInputWorkloadType": { + "type": "string" + }, + "serving.TrafficConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "routes": { + "description": "The list of routes that define traffic to each served entity.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/serving.Route" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + } + } + } + } + }, + "int": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string", + "pattern": "\\$\\{(resources(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(bundle(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(workspace(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(artifacts(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "int64": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string", + "pattern": "\\$\\{(resources(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(bundle(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(workspace(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(artifacts(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "interface": {}, + "map": { + "github.com": { + "databricks": { + "cli": { + "bundle": { + "config": { + "resources.Job": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Job" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.MlflowExperiment": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.MlflowExperiment" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.MlflowModel": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.MlflowModel" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.ModelServingEndpoint": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.Pipeline": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Pipeline" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.QualityMonitor": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.QualityMonitor" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.RegisteredModel": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.RegisteredModel" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.Schema": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Schema" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "variable.Variable": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/variable.Variable" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + } + }, + "config.Artifact": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Artifact" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Command": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Command" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Target": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Target" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + } + } + } + } + }, + "string": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/string" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + } + }, + "slice": { + "github.com": { + "databricks": { + "cli": { + "bundle": { + "config": { + "resources.Grant": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Grant" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.Permission": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Permission" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + } + }, + "config.ArtifactFile": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.ArtifactFile" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + } + } + }, + "databricks-sdk-go": { + "service": { + "catalog.MonitorMetric": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorMetric" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.InitScriptInfo": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.InitScriptInfo" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.Library": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.Library" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobCluster": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobCluster" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobEnvironment": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobEnvironment" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobParameterDefinition": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobParameterDefinition" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobsHealthRule": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRule" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.SqlTaskSubscription": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskSubscription" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.Task": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Task" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.TaskDependency": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.TaskDependency" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.Webhook": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Webhook" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "ml.ExperimentTag": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/ml.ExperimentTag" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "ml.ModelTag": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/ml.ModelTag" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "ml.ModelVersion": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/ml.ModelVersion" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "ml.ModelVersionTag": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/ml.ModelVersionTag" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.IngestionConfig": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.IngestionConfig" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.Notifications": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.Notifications" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.PipelineCluster": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineCluster" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.PipelineLibrary": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineLibrary" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.EndpointTag": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.EndpointTag" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.RateLimit": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.RateLimit" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.Route": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.Route" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.ServedEntityInput": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.ServedEntityInput" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.ServedModelInput": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.ServedModelInput" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + } + } + } + } + }, + "string": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/string" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + } + }, + "string": { + "type": "string" + } + }, + "type": "object", + "properties": { + "artifacts": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Artifact" + }, + "bundle": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Bundle" + }, + "experimental": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Experimental" + }, + "include": { + "$ref": "#/$defs/slice/string" + }, + "permissions": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission" + }, + "presets": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Presets" + }, + "resources": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Resources" + }, + "run_as": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs" + }, + "sync": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Sync" + }, + "targets": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Target" + }, + "variables": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/variable.Variable" + }, + "workspace": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Workspace" + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/bundle/schema/openapi.go b/bundle/schema/openapi.go deleted file mode 100644 index 0d896b87..00000000 --- a/bundle/schema/openapi.go +++ /dev/null @@ -1,293 +0,0 @@ -package schema - -import ( - "encoding/json" - "fmt" - "strings" - - "github.com/databricks/cli/libs/jsonschema" -) - -type OpenapiReader struct { - // OpenAPI spec to read schemas from. - OpenapiSpec *Specification - - // In-memory cache of schemas read from the OpenAPI spec. - memo map[string]jsonschema.Schema -} - -const SchemaPathPrefix = "#/components/schemas/" - -// Read a schema directly from the OpenAPI spec. -func (reader *OpenapiReader) readOpenapiSchema(path string) (jsonschema.Schema, error) { - schemaKey := strings.TrimPrefix(path, SchemaPathPrefix) - - // return early if we already have a computed schema - memoSchema, ok := reader.memo[schemaKey] - if ok { - return memoSchema, nil - } - - // check path is present in openapi spec - openapiSchema, ok := reader.OpenapiSpec.Components.Schemas[schemaKey] - if !ok { - return jsonschema.Schema{}, fmt.Errorf("schema with path %s not found in openapi spec", path) - } - - // convert openapi schema to the native schema struct - bytes, err := json.Marshal(*openapiSchema) - if err != nil { - return jsonschema.Schema{}, err - } - jsonSchema := jsonschema.Schema{} - err = json.Unmarshal(bytes, &jsonSchema) - if err != nil { - return jsonschema.Schema{}, err - } - - // A hack to convert a map[string]interface{} to *Schema - // We rely on the type of a AdditionalProperties in downstream functions - // to do reference interpolation - _, ok = jsonSchema.AdditionalProperties.(map[string]interface{}) - if ok { - b, err := json.Marshal(jsonSchema.AdditionalProperties) - if err != nil { - return jsonschema.Schema{}, err - } - additionalProperties := &jsonschema.Schema{} - err = json.Unmarshal(b, additionalProperties) - if err != nil { - return jsonschema.Schema{}, err - } - jsonSchema.AdditionalProperties = additionalProperties - } - - // store read schema into memo - reader.memo[schemaKey] = jsonSchema - - return jsonSchema, nil -} - -// Resolve all nested "$ref" references in the schema. This function unrolls a single -// level of "$ref" in the schema and calls into traverseSchema to resolve nested references. -// Thus this function and traverseSchema are mutually recursive. -// -// This function is safe against reference loops. If a reference loop is detected, an error -// is returned. -func (reader *OpenapiReader) safeResolveRefs(root *jsonschema.Schema, tracker *tracker) (*jsonschema.Schema, error) { - if root.Reference == nil { - return reader.traverseSchema(root, tracker) - } - key := *root.Reference - - // HACK to unblock CLI release (13th Feb 2024). This is temporary until proper - // support for recursive types is added to the docs generator. PR: https://github.com/databricks/cli/pull/1204 - if strings.Contains(key, "ForEachTask") { - return root, nil - } - - if tracker.hasCycle(key) { - // self reference loops can be supported however the logic is non-trivial because - // cross refernce loops are not allowed (see: http://json-schema.org/understanding-json-schema/structuring.html#recursion) - return nil, fmt.Errorf("references loop detected") - } - ref := *root.Reference - description := root.Description - tracker.push(ref, ref) - - // Mark reference nil, so we do not traverse this again. This is tracked - // in the memo - root.Reference = nil - - // unroll one level of reference. - selfRef, err := reader.readOpenapiSchema(ref) - if err != nil { - return nil, err - } - root = &selfRef - root.Description = description - - // traverse again to find new references - root, err = reader.traverseSchema(root, tracker) - if err != nil { - return nil, err - } - tracker.pop(ref) - return root, err -} - -// Traverse the nested properties of the schema to resolve "$ref" references. This function -// and safeResolveRefs are mutually recursive. -func (reader *OpenapiReader) traverseSchema(root *jsonschema.Schema, tracker *tracker) (*jsonschema.Schema, error) { - // case primitive (or invalid) - if root.Type != jsonschema.ObjectType && root.Type != jsonschema.ArrayType { - return root, nil - } - // only root references are resolved - if root.Reference != nil { - return reader.safeResolveRefs(root, tracker) - } - // case struct - if len(root.Properties) > 0 { - for k, v := range root.Properties { - childSchema, err := reader.safeResolveRefs(v, tracker) - if err != nil { - return nil, err - } - root.Properties[k] = childSchema - } - } - // case array - if root.Items != nil { - itemsSchema, err := reader.safeResolveRefs(root.Items, tracker) - if err != nil { - return nil, err - } - root.Items = itemsSchema - } - // case map - additionalProperties, ok := root.AdditionalProperties.(*jsonschema.Schema) - if ok && additionalProperties != nil { - valueSchema, err := reader.safeResolveRefs(additionalProperties, tracker) - if err != nil { - return nil, err - } - root.AdditionalProperties = valueSchema - } - return root, nil -} - -func (reader *OpenapiReader) readResolvedSchema(path string) (*jsonschema.Schema, error) { - root, err := reader.readOpenapiSchema(path) - if err != nil { - return nil, err - } - tracker := newTracker() - tracker.push(path, path) - resolvedRoot, err := reader.safeResolveRefs(&root, tracker) - if err != nil { - return nil, tracker.errWithTrace(err.Error(), "") - } - return resolvedRoot, nil -} - -func (reader *OpenapiReader) jobsDocs() (*Docs, error) { - jobSettingsSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "jobs.JobSettings") - if err != nil { - return nil, err - } - jobDocs := schemaToDocs(jobSettingsSchema) - // TODO: add description for id if needed. - // Tracked in https://github.com/databricks/cli/issues/242 - jobsDocs := &Docs{ - Description: "List of Databricks jobs", - AdditionalProperties: jobDocs, - } - return jobsDocs, nil -} - -func (reader *OpenapiReader) pipelinesDocs() (*Docs, error) { - pipelineSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "pipelines.PipelineSpec") - if err != nil { - return nil, err - } - pipelineDocs := schemaToDocs(pipelineSpecSchema) - // TODO: Two fields in resources.Pipeline have the json tag id. Clarify the - // semantics and then add a description if needed. (https://github.com/databricks/cli/issues/242) - pipelinesDocs := &Docs{ - Description: "List of DLT pipelines", - AdditionalProperties: pipelineDocs, - } - return pipelinesDocs, nil -} - -func (reader *OpenapiReader) experimentsDocs() (*Docs, error) { - experimentSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "ml.Experiment") - if err != nil { - return nil, err - } - experimentDocs := schemaToDocs(experimentSpecSchema) - experimentsDocs := &Docs{ - Description: "List of MLflow experiments", - AdditionalProperties: experimentDocs, - } - return experimentsDocs, nil -} - -func (reader *OpenapiReader) modelsDocs() (*Docs, error) { - modelSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "ml.Model") - if err != nil { - return nil, err - } - modelDocs := schemaToDocs(modelSpecSchema) - modelsDocs := &Docs{ - Description: "List of MLflow models", - AdditionalProperties: modelDocs, - } - return modelsDocs, nil -} - -func (reader *OpenapiReader) modelServingEndpointsDocs() (*Docs, error) { - modelServingEndpointsSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "serving.CreateServingEndpoint") - if err != nil { - return nil, err - } - modelServingEndpointsDocs := schemaToDocs(modelServingEndpointsSpecSchema) - modelServingEndpointsAllDocs := &Docs{ - Description: "List of Model Serving Endpoints", - AdditionalProperties: modelServingEndpointsDocs, - } - return modelServingEndpointsAllDocs, nil -} - -func (reader *OpenapiReader) registeredModelDocs() (*Docs, error) { - registeredModelsSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "catalog.CreateRegisteredModelRequest") - if err != nil { - return nil, err - } - registeredModelsDocs := schemaToDocs(registeredModelsSpecSchema) - registeredModelsAllDocs := &Docs{ - Description: "List of Registered Models", - AdditionalProperties: registeredModelsDocs, - } - return registeredModelsAllDocs, nil -} - -func (reader *OpenapiReader) ResourcesDocs() (*Docs, error) { - jobsDocs, err := reader.jobsDocs() - if err != nil { - return nil, err - } - pipelinesDocs, err := reader.pipelinesDocs() - if err != nil { - return nil, err - } - experimentsDocs, err := reader.experimentsDocs() - if err != nil { - return nil, err - } - modelsDocs, err := reader.modelsDocs() - if err != nil { - return nil, err - } - modelServingEndpointsDocs, err := reader.modelServingEndpointsDocs() - if err != nil { - return nil, err - } - registeredModelsDocs, err := reader.registeredModelDocs() - if err != nil { - return nil, err - } - - return &Docs{ - Description: "Collection of Databricks resources to deploy.", - Properties: map[string]*Docs{ - "jobs": jobsDocs, - "pipelines": pipelinesDocs, - "experiments": experimentsDocs, - "models": modelsDocs, - "model_serving_endpoints": modelServingEndpointsDocs, - "registered_models": registeredModelsDocs, - }, - }, nil -} diff --git a/bundle/schema/openapi_test.go b/bundle/schema/openapi_test.go deleted file mode 100644 index 4d393cf3..00000000 --- a/bundle/schema/openapi_test.go +++ /dev/null @@ -1,493 +0,0 @@ -package schema - -import ( - "encoding/json" - "testing" - - "github.com/databricks/cli/libs/jsonschema" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestReadSchemaForObject(t *testing.T) { - specString := ` - { - "components": { - "schemas": { - "foo": { - "type": "number" - }, - "fruits": { - "type": "object", - "description": "fruits that are cool", - "properties": { - "guava": { - "type": "string", - "description": "a guava for my schema" - }, - "mango": { - "type": "object", - "description": "a mango for my schema", - "$ref": "#/components/schemas/mango" - } - } - }, - "mango": { - "type": "object", - "properties": { - "foo": { - "$ref": "#/components/schemas/foo" - } - } - } - } - } - } - ` - spec := &Specification{} - reader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - err := json.Unmarshal([]byte(specString), spec) - require.NoError(t, err) - - fruitsSchema, err := reader.readResolvedSchema("#/components/schemas/fruits") - require.NoError(t, err) - - fruitsSchemaJson, err := json.MarshalIndent(fruitsSchema, " ", " ") - require.NoError(t, err) - - expected := `{ - "type": "object", - "description": "fruits that are cool", - "properties": { - "guava": { - "type": "string", - "description": "a guava for my schema" - }, - "mango": { - "type": "object", - "description": "a mango for my schema", - "properties": { - "foo": { - "type": "number" - } - } - } - } - }` - - t.Log("[DEBUG] actual: ", string(fruitsSchemaJson)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(fruitsSchemaJson)) -} - -func TestReadSchemaForArray(t *testing.T) { - specString := ` - { - "components": { - "schemas": { - "fruits": { - "type": "object", - "description": "fruits that are cool", - "items": { - "description": "some papayas, because papayas are fruits too", - "$ref": "#/components/schemas/papaya" - } - }, - "papaya": { - "type": "number" - } - } - } - }` - spec := &Specification{} - reader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - err := json.Unmarshal([]byte(specString), spec) - require.NoError(t, err) - - fruitsSchema, err := reader.readResolvedSchema("#/components/schemas/fruits") - require.NoError(t, err) - - fruitsSchemaJson, err := json.MarshalIndent(fruitsSchema, " ", " ") - require.NoError(t, err) - - expected := `{ - "type": "object", - "description": "fruits that are cool", - "items": { - "type": "number", - "description": "some papayas, because papayas are fruits too" - } - }` - - t.Log("[DEBUG] actual: ", string(fruitsSchemaJson)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(fruitsSchemaJson)) -} - -func TestReadSchemaForMap(t *testing.T) { - specString := `{ - "components": { - "schemas": { - "fruits": { - "type": "object", - "description": "fruits that are meh", - "additionalProperties": { - "description": "watermelons. watermelons.", - "$ref": "#/components/schemas/watermelon" - } - }, - "watermelon": { - "type": "number" - } - } - } - }` - spec := &Specification{} - reader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - err := json.Unmarshal([]byte(specString), spec) - require.NoError(t, err) - - fruitsSchema, err := reader.readResolvedSchema("#/components/schemas/fruits") - require.NoError(t, err) - - fruitsSchemaJson, err := json.MarshalIndent(fruitsSchema, " ", " ") - require.NoError(t, err) - - expected := `{ - "type": "object", - "description": "fruits that are meh", - "additionalProperties": { - "type": "number", - "description": "watermelons. watermelons." - } - }` - - t.Log("[DEBUG] actual: ", string(fruitsSchemaJson)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(fruitsSchemaJson)) -} - -func TestRootReferenceIsResolved(t *testing.T) { - specString := `{ - "components": { - "schemas": { - "foo": { - "type": "object", - "description": "this description is ignored", - "properties": { - "abc": { - "type": "string" - } - } - }, - "fruits": { - "type": "object", - "description": "foo fighters fighting fruits", - "$ref": "#/components/schemas/foo" - } - } - } - }` - spec := &Specification{} - reader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - err := json.Unmarshal([]byte(specString), spec) - require.NoError(t, err) - - schema, err := reader.readResolvedSchema("#/components/schemas/fruits") - require.NoError(t, err) - fruitsSchemaJson, err := json.MarshalIndent(schema, " ", " ") - require.NoError(t, err) - - expected := `{ - "type": "object", - "description": "foo fighters fighting fruits", - "properties": { - "abc": { - "type": "string" - } - } - }` - - t.Log("[DEBUG] actual: ", string(fruitsSchemaJson)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(fruitsSchemaJson)) -} - -func TestSelfReferenceLoopErrors(t *testing.T) { - specString := `{ - "components": { - "schemas": { - "foo": { - "type": "object", - "description": "this description is ignored", - "properties": { - "bar": { - "type": "object", - "$ref": "#/components/schemas/foo" - } - } - }, - "fruits": { - "type": "object", - "description": "foo fighters fighting fruits", - "$ref": "#/components/schemas/foo" - } - } - } - }` - spec := &Specification{} - reader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - err := json.Unmarshal([]byte(specString), spec) - require.NoError(t, err) - - _, err = reader.readResolvedSchema("#/components/schemas/fruits") - assert.ErrorContains(t, err, "references loop detected. traversal trace: -> #/components/schemas/fruits -> #/components/schemas/foo") -} - -func TestCrossReferenceLoopErrors(t *testing.T) { - specString := `{ - "components": { - "schemas": { - "foo": { - "type": "object", - "description": "this description is ignored", - "properties": { - "bar": { - "type": "object", - "$ref": "#/components/schemas/fruits" - } - } - }, - "fruits": { - "type": "object", - "description": "foo fighters fighting fruits", - "$ref": "#/components/schemas/foo" - } - } - } - }` - spec := &Specification{} - reader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - err := json.Unmarshal([]byte(specString), spec) - require.NoError(t, err) - - _, err = reader.readResolvedSchema("#/components/schemas/fruits") - assert.ErrorContains(t, err, "references loop detected. traversal trace: -> #/components/schemas/fruits -> #/components/schemas/foo") -} - -func TestReferenceResolutionForMapInObject(t *testing.T) { - specString := ` - { - "components": { - "schemas": { - "foo": { - "type": "number" - }, - "fruits": { - "type": "object", - "description": "fruits that are cool", - "properties": { - "guava": { - "type": "string", - "description": "a guava for my schema" - }, - "mangos": { - "type": "object", - "description": "multiple mangos", - "$ref": "#/components/schemas/mango" - } - } - }, - "mango": { - "type": "object", - "additionalProperties": { - "description": "a single mango", - "$ref": "#/components/schemas/foo" - } - } - } - } - }` - spec := &Specification{} - reader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - err := json.Unmarshal([]byte(specString), spec) - require.NoError(t, err) - - fruitsSchema, err := reader.readResolvedSchema("#/components/schemas/fruits") - require.NoError(t, err) - - fruitsSchemaJson, err := json.MarshalIndent(fruitsSchema, " ", " ") - require.NoError(t, err) - - expected := `{ - "type": "object", - "description": "fruits that are cool", - "properties": { - "guava": { - "type": "string", - "description": "a guava for my schema" - }, - "mangos": { - "type": "object", - "description": "multiple mangos", - "additionalProperties": { - "type": "number", - "description": "a single mango" - } - } - } - }` - - t.Log("[DEBUG] actual: ", string(fruitsSchemaJson)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(fruitsSchemaJson)) -} - -func TestReferenceResolutionForArrayInObject(t *testing.T) { - specString := `{ - "components": { - "schemas": { - "foo": { - "type": "number" - }, - "fruits": { - "type": "object", - "description": "fruits that are cool", - "properties": { - "guava": { - "type": "string", - "description": "a guava for my schema" - }, - "mangos": { - "type": "object", - "description": "multiple mangos", - "$ref": "#/components/schemas/mango" - } - } - }, - "mango": { - "type": "object", - "items": { - "description": "a single mango", - "$ref": "#/components/schemas/foo" - } - } - } - } - }` - spec := &Specification{} - reader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - err := json.Unmarshal([]byte(specString), spec) - require.NoError(t, err) - - fruitsSchema, err := reader.readResolvedSchema("#/components/schemas/fruits") - require.NoError(t, err) - - fruitsSchemaJson, err := json.MarshalIndent(fruitsSchema, " ", " ") - require.NoError(t, err) - - expected := `{ - "type": "object", - "description": "fruits that are cool", - "properties": { - "guava": { - "type": "string", - "description": "a guava for my schema" - }, - "mangos": { - "type": "object", - "description": "multiple mangos", - "items": { - "type": "number", - "description": "a single mango" - } - } - } - }` - - t.Log("[DEBUG] actual: ", string(fruitsSchemaJson)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(fruitsSchemaJson)) -} - -func TestReferenceResolutionDoesNotOverwriteDescriptions(t *testing.T) { - specString := `{ - "components": { - "schemas": { - "foo": { - "type": "number" - }, - "fruits": { - "type": "object", - "properties": { - "guava": { - "type": "object", - "description": "Guava is a fruit", - "$ref": "#/components/schemas/foo" - }, - "mango": { - "type": "object", - "description": "What is a mango?", - "$ref": "#/components/schemas/foo" - } - } - } - } - } - }` - spec := &Specification{} - reader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - err := json.Unmarshal([]byte(specString), spec) - require.NoError(t, err) - - fruitsSchema, err := reader.readResolvedSchema("#/components/schemas/fruits") - require.NoError(t, err) - - fruitsSchemaJson, err := json.MarshalIndent(fruitsSchema, " ", " ") - require.NoError(t, err) - - expected := `{ - "type": "object", - "properties": { - "guava": { - "type": "number", - "description": "Guava is a fruit" - }, - "mango": { - "type": "number", - "description": "What is a mango?" - } - } - }` - - t.Log("[DEBUG] actual: ", string(fruitsSchemaJson)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(fruitsSchemaJson)) -} diff --git a/bundle/schema/schema.go b/bundle/schema/schema.go deleted file mode 100644 index ac0b4f2e..00000000 --- a/bundle/schema/schema.go +++ /dev/null @@ -1,287 +0,0 @@ -package schema - -import ( - "container/list" - "fmt" - "reflect" - "strings" - - "github.com/databricks/cli/libs/dyn/dynvar" - "github.com/databricks/cli/libs/jsonschema" -) - -// Fields tagged "readonly" should not be emitted in the schema as they are -// computed at runtime, and should not be assigned a value by the bundle author. -const readonlyTag = "readonly" - -// Annotation for internal bundle fields that should not be exposed to customers. -// Fields can be tagged as "internal" to remove them from the generated schema. -const internalTag = "internal" - -// Annotation for bundle fields that have been deprecated. -// Fields tagged as "deprecated" are removed/omitted from the generated schema. -const deprecatedTag = "deprecated" - -// This function translates golang types into json schema. Here is the mapping -// between json schema types and golang types -// -// - GolangType -> Javascript type / Json Schema2 -// -// - bool -> boolean -// -// - string -> string -// -// - int (all variants) -> number -// -// - float (all variants) -> number -// -// - map[string]MyStruct -> { type: object, additionalProperties: {}} -// for details visit: https://json-schema.org/understanding-json-schema/reference/object.html#additional-properties -// -// - []MyStruct -> {type: array, items: {}} -// for details visit: https://json-schema.org/understanding-json-schema/reference/array.html#items -// -// - []MyStruct -> {type: object, properties: {}, additionalProperties: false} -// for details visit: https://json-schema.org/understanding-json-schema/reference/object.html#properties -func New(golangType reflect.Type, docs *Docs) (*jsonschema.Schema, error) { - tracker := newTracker() - schema, err := safeToSchema(golangType, docs, "", tracker) - if err != nil { - return nil, tracker.errWithTrace(err.Error(), "root") - } - return schema, nil -} - -func jsonSchemaType(golangType reflect.Type) (jsonschema.Type, error) { - switch golangType.Kind() { - case reflect.Bool: - return jsonschema.BooleanType, nil - case reflect.String: - return jsonschema.StringType, nil - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, - reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, - reflect.Float32, reflect.Float64: - - return jsonschema.NumberType, nil - case reflect.Struct: - return jsonschema.ObjectType, nil - case reflect.Map: - if golangType.Key().Kind() != reflect.String { - return jsonschema.InvalidType, fmt.Errorf("only strings map keys are valid. key type: %v", golangType.Key().Kind()) - } - return jsonschema.ObjectType, nil - case reflect.Array, reflect.Slice: - return jsonschema.ArrayType, nil - default: - return jsonschema.InvalidType, fmt.Errorf("unhandled golang type: %s", golangType) - } -} - -// A wrapper over toSchema function to: -// 1. Detect cycles in the bundle config struct. -// 2. Update tracker -// -// params: -// -// - golangType: Golang type to generate json schema for -// -// - docs: Contains documentation to be injected into the generated json schema -// -// - traceId: An identifier for the current type, to trace recursive traversal. -// Its value is the first json tag in case of struct fields and "" in other cases -// like array, map or no json tags -// -// - tracker: Keeps track of types / traceIds seen during recursive traversal -func safeToSchema(golangType reflect.Type, docs *Docs, traceId string, tracker *tracker) (*jsonschema.Schema, error) { - // HACK to unblock CLI release (13th Feb 2024). This is temporary until proper - // support for recursive types is added to the schema generator. PR: https://github.com/databricks/cli/pull/1204 - if traceId == "for_each_task" { - return &jsonschema.Schema{ - Type: jsonschema.ObjectType, - }, nil - } - - // WE ERROR OUT IF THERE ARE CYCLES IN THE JSON SCHEMA - // There are mechanisms to deal with cycles though recursive identifiers in json - // schema. However if we use them, we would need to make sure we are able to detect - // cycles where two properties (directly or indirectly) pointing to each other - // - // see: https://json-schema.org/understanding-json-schema/structuring.html#recursion - // for details - if tracker.hasCycle(golangType) { - return nil, fmt.Errorf("cycle detected") - } - - tracker.push(golangType, traceId) - props, err := toSchema(golangType, docs, tracker) - if err != nil { - return nil, err - } - tracker.pop(golangType) - return props, nil -} - -// This function returns all member fields of the provided type. -// If the type has embedded (aka anonymous) fields, this function traverses -// those in a breadth first manner -func getStructFields(golangType reflect.Type) []reflect.StructField { - fields := []reflect.StructField{} - bfsQueue := list.New() - - for i := 0; i < golangType.NumField(); i++ { - bfsQueue.PushBack(golangType.Field(i)) - } - for bfsQueue.Len() > 0 { - front := bfsQueue.Front() - field := front.Value.(reflect.StructField) - bfsQueue.Remove(front) - - if !field.Anonymous { - fields = append(fields, field) - continue - } - - fieldType := field.Type - if fieldType.Kind() == reflect.Pointer { - fieldType = fieldType.Elem() - } - - for i := 0; i < fieldType.NumField(); i++ { - bfsQueue.PushBack(fieldType.Field(i)) - } - } - return fields -} - -func toSchema(golangType reflect.Type, docs *Docs, tracker *tracker) (*jsonschema.Schema, error) { - // *Struct and Struct generate identical json schemas - if golangType.Kind() == reflect.Pointer { - return safeToSchema(golangType.Elem(), docs, "", tracker) - } - if golangType.Kind() == reflect.Interface { - return &jsonschema.Schema{}, nil - } - - rootJavascriptType, err := jsonSchemaType(golangType) - if err != nil { - return nil, err - } - jsonSchema := &jsonschema.Schema{Type: rootJavascriptType} - - // If the type is a non-string primitive, then we allow it to be a string - // provided it's a pure variable reference (ie only a single variable reference). - if rootJavascriptType == jsonschema.BooleanType || rootJavascriptType == jsonschema.NumberType { - jsonSchema = &jsonschema.Schema{ - AnyOf: []*jsonschema.Schema{ - { - Type: rootJavascriptType, - }, - { - Type: jsonschema.StringType, - Pattern: dynvar.VariableRegex, - }, - }, - } - } - - if docs != nil { - jsonSchema.Description = docs.Description - } - - // case array/slice - if golangType.Kind() == reflect.Array || golangType.Kind() == reflect.Slice { - elemGolangType := golangType.Elem() - elemJavascriptType, err := jsonSchemaType(elemGolangType) - if err != nil { - return nil, err - } - var childDocs *Docs - if docs != nil { - childDocs = docs.Items - } - elemProps, err := safeToSchema(elemGolangType, childDocs, "", tracker) - if err != nil { - return nil, err - } - jsonSchema.Items = &jsonschema.Schema{ - Type: elemJavascriptType, - Properties: elemProps.Properties, - AdditionalProperties: elemProps.AdditionalProperties, - Items: elemProps.Items, - Required: elemProps.Required, - } - } - - // case map - if golangType.Kind() == reflect.Map { - if golangType.Key().Kind() != reflect.String { - return nil, fmt.Errorf("only string keyed maps allowed") - } - var childDocs *Docs - if docs != nil { - childDocs = docs.AdditionalProperties - } - jsonSchema.AdditionalProperties, err = safeToSchema(golangType.Elem(), childDocs, "", tracker) - if err != nil { - return nil, err - } - } - - // case struct - if golangType.Kind() == reflect.Struct { - children := getStructFields(golangType) - properties := map[string]*jsonschema.Schema{} - required := []string{} - for _, child := range children { - bundleTag := child.Tag.Get("bundle") - // Fields marked as "readonly", "internal" or "deprecated" are skipped - // while generating the schema - if bundleTag == readonlyTag || bundleTag == internalTag || bundleTag == deprecatedTag { - continue - } - - // get child json tags - childJsonTag := strings.Split(child.Tag.Get("json"), ",") - childName := childJsonTag[0] - - // skip children that have no json tags, the first json tag is "" - // or the first json tag is "-" - if childName == "" || childName == "-" { - continue - } - - // get docs for the child if they exist - var childDocs *Docs - if docs != nil { - if val, ok := docs.Properties[childName]; ok { - childDocs = val - } - } - - // compute if the child is a required field. Determined by the - // presence of "omitempty" in the json tags - hasOmitEmptyTag := false - for i := 1; i < len(childJsonTag); i++ { - if childJsonTag[i] == "omitempty" { - hasOmitEmptyTag = true - } - } - if !hasOmitEmptyTag { - required = append(required, childName) - } - - // compute Schema.Properties for the child recursively - fieldProps, err := safeToSchema(child.Type, childDocs, childName, tracker) - if err != nil { - return nil, err - } - properties[childName] = fieldProps - } - - jsonSchema.AdditionalProperties = false - jsonSchema.Properties = properties - jsonSchema.Required = required - } - - return jsonSchema, nil -} diff --git a/bundle/schema/schema_test.go b/bundle/schema/schema_test.go deleted file mode 100644 index 6d9df0cc..00000000 --- a/bundle/schema/schema_test.go +++ /dev/null @@ -1,1900 +0,0 @@ -package schema - -import ( - "encoding/json" - "reflect" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestIntSchema(t *testing.T) { - var elemInt int - - expected := - `{ - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }` - - schema, err := New(reflect.TypeOf(elemInt), nil) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestBooleanSchema(t *testing.T) { - var elem bool - - expected := - `{ - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }` - - schema, err := New(reflect.TypeOf(elem), nil) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestStringSchema(t *testing.T) { - var elem string - - expected := - `{ - "type": "string" - }` - - schema, err := New(reflect.TypeOf(elem), nil) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestStructOfPrimitivesSchema(t *testing.T) { - type Foo struct { - IntVal int `json:"int_val"` - Int8Val int8 `json:"int8_val"` - Int16Val int16 `json:"int16_val"` - Int32Val int32 `json:"int32_val"` - Int64Val int64 `json:"int64_val"` - - UIntVal uint `json:"uint_val"` - Uint8Val uint8 `json:"uint8_val"` - Uint16Val uint16 `json:"uint16_val"` - Uint32Val uint32 `json:"uint32_val"` - Uint64Val uint64 `json:"uint64_val"` - - Float32Val float32 `json:"float32_val"` - Float64Val float64 `json:"float64_val"` - - StringVal string `json:"string_val"` - - BoolVal bool `json:"bool_val"` - } - - elem := Foo{} - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "properties": { - "bool_val": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "float32_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "float64_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "int16_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "int32_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "int64_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "int8_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "int_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "string_val": { - "type": "string" - }, - "uint16_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "uint32_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "uint64_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "uint8_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "uint_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "int_val", - "int8_val", - "int16_val", - "int32_val", - "int64_val", - "uint_val", - "uint8_val", - "uint16_val", - "uint32_val", - "uint64_val", - "float32_val", - "float64_val", - "string_val", - "bool_val" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestStructOfStructsSchema(t *testing.T) { - type Bar struct { - A int `json:"a"` - B string `json:"b,string"` - } - - type Foo struct { - Bar Bar `json:"bar"` - } - - type MyStruct struct { - Foo Foo `json:"foo"` - } - - elem := MyStruct{} - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "properties": { - "foo": { - "type": "object", - "properties": { - "bar": { - "type": "object", - "properties": { - "a": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "b": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "a", - "b" - ] - } - }, - "additionalProperties": false, - "required": [ - "bar" - ] - } - }, - "additionalProperties": false, - "required": [ - "foo" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestStructOfMapsSchema(t *testing.T) { - type Bar struct { - MyMap map[string]int `json:"my_map"` - } - - type Foo struct { - Bar Bar `json:"bar"` - } - - elem := Foo{} - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "properties": { - "bar": { - "type": "object", - "properties": { - "my_map": { - "type": "object", - "additionalProperties": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "my_map" - ] - } - }, - "additionalProperties": false, - "required": [ - "bar" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestStructOfSliceSchema(t *testing.T) { - type Bar struct { - MySlice []string `json:"my_slice"` - } - - type Foo struct { - Bar Bar `json:"bar"` - } - - elem := Foo{} - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "properties": { - "bar": { - "type": "object", - "properties": { - "my_slice": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "additionalProperties": false, - "required": [ - "my_slice" - ] - } - }, - "additionalProperties": false, - "required": [ - "bar" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestMapOfPrimitivesSchema(t *testing.T) { - var elem map[string]int - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "additionalProperties": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestMapOfStructSchema(t *testing.T) { - type Foo struct { - MyInt int `json:"my_int"` - } - - var elem map[string]Foo - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "additionalProperties": { - "type": "object", - "properties": { - "my_int": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "my_int" - ] - } - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestMapOfMapSchema(t *testing.T) { - var elem map[string]map[string]int - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "additionalProperties": { - "type": "object", - "additionalProperties": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - } - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestMapOfSliceSchema(t *testing.T) { - var elem map[string][]string - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "additionalProperties": { - "type": "array", - "items": { - "type": "string" - } - } - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestSliceOfPrimitivesSchema(t *testing.T) { - var elem []float32 - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "array", - "items": { - "type": "number" - } - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestSliceOfSliceSchema(t *testing.T) { - var elem [][]string - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "array", - "items": { - "type": "array", - "items": { - "type": "string" - } - } - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestSliceOfMapSchema(t *testing.T) { - var elem []map[string]int - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "array", - "items": { - "type": "object", - "additionalProperties": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - } - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestSliceOfStructSchema(t *testing.T) { - type Foo struct { - MyInt int `json:"my_int"` - } - - var elem []Foo - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "array", - "items": { - "type": "object", - "properties": { - "my_int": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "my_int" - ] - } - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestEmbeddedStructSchema(t *testing.T) { - type Location struct { - Country string `json:"country"` - State string `json:"state,omitempty"` - } - - type Person struct { - Name string `json:"name"` - Age int `json:"age,omitempty"` - Home Location `json:"home"` - } - - type Plot struct { - Events map[string]Person `json:"events"` - } - - type Story struct { - Plot Plot `json:"plot"` - *Person - Location - } - - elem := Story{} - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "properties": { - "age": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "country": { - "type": "string" - }, - "home": { - "type": "object", - "properties": { - "country": { - "type": "string" - }, - "state": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "country" - ] - }, - "name": { - "type": "string" - }, - "plot": { - "type": "object", - "properties": { - "events": { - "type": "object", - "additionalProperties": { - "type": "object", - "properties": { - "age": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "home": { - "type": "object", - "properties": { - "country": { - "type": "string" - }, - "state": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "country" - ] - }, - "name": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "name", - "home" - ] - } - } - }, - "additionalProperties": false, - "required": [ - "events" - ] - }, - "state": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "plot", - "name", - "home", - "country" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestErrorWithTrace(t *testing.T) { - tracker := newTracker() - dummyType := reflect.TypeOf(struct{}{}) - err := tracker.errWithTrace("with empty trace", "root") - assert.ErrorContains(t, err, "with empty trace. traversal trace: root") - - tracker.push(dummyType, "resources") - err = tracker.errWithTrace("with depth = 1", "root") - assert.ErrorContains(t, err, "with depth = 1. traversal trace: root -> resources") - - tracker.push(dummyType, "pipelines") - tracker.push(dummyType, "datasets") - err = tracker.errWithTrace("with depth = 4", "root") - assert.ErrorContains(t, err, "with depth = 4. traversal trace: root -> resources -> pipelines -> datasets") -} - -func TestNonAnnotatedFieldsAreSkipped(t *testing.T) { - type MyStruct struct { - Foo string - Bar int `json:"bar"` - } - - elem := MyStruct{} - - schema, err := New(reflect.TypeOf(elem), nil) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expectedSchema := - `{ - "type": "object", - "properties": { - "bar": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "bar" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expectedSchema) - - assert.Equal(t, expectedSchema, string(jsonSchema)) -} - -func TestDashFieldsAreSkipped(t *testing.T) { - type MyStruct struct { - Foo string `json:"-"` - Bar int `json:"bar"` - } - - elem := MyStruct{} - - schema, err := New(reflect.TypeOf(elem), nil) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expectedSchema := - `{ - "type": "object", - "properties": { - "bar": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "bar" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expectedSchema) - - assert.Equal(t, expectedSchema, string(jsonSchema)) -} - -func TestPointerInStructSchema(t *testing.T) { - - type Bar struct { - PtrVal2 *int `json:"ptr_val2"` - } - - type Foo struct { - PtrInt *int `json:"ptr_int"` - PtrString *string `json:"ptr_string"` - FloatVal float32 `json:"float_val"` - PtrBar *Bar `json:"ptr_bar"` - Bar *Bar `json:"bar"` - } - - elem := Foo{} - - schema, err := New(reflect.TypeOf(elem), nil) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expectedSchema := - `{ - "type": "object", - "properties": { - "bar": { - "type": "object", - "properties": { - "ptr_val2": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "ptr_val2" - ] - }, - "float_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "ptr_bar": { - "type": "object", - "properties": { - "ptr_val2": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "ptr_val2" - ] - }, - "ptr_int": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "ptr_string": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "ptr_int", - "ptr_string", - "float_val", - "ptr_bar", - "bar" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expectedSchema) - - assert.Equal(t, expectedSchema, string(jsonSchema)) -} - -func TestGenericSchema(t *testing.T) { - type Person struct { - Name string `json:"name"` - Age int `json:"age,omitempty"` - } - - type Plot struct { - Stakes []string `json:"stakes"` - Deaths []Person `json:"deaths"` - Murders map[string]Person `json:"murders"` - } - - type Wedding struct { - Hidden string `json:","` - Groom Person `json:"groom"` - Bride Person `json:"bride"` - Plots []Plot `json:"plots"` - } - - type Story struct { - Hero *Person `json:"hero"` - Villian Person `json:"villian,omitempty"` - Weddings []Wedding `json:"weddings"` - } - - elem := Story{} - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "properties": { - "hero": { - "type": "object", - "properties": { - "age": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "name": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "name" - ] - }, - "villian": { - "type": "object", - "properties": { - "age": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "name": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "name" - ] - }, - "weddings": { - "type": "array", - "items": { - "type": "object", - "properties": { - "bride": { - "type": "object", - "properties": { - "age": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "name": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "name" - ] - }, - "groom": { - "type": "object", - "properties": { - "age": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "name": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "name" - ] - }, - "plots": { - "type": "array", - "items": { - "type": "object", - "properties": { - "deaths": { - "type": "array", - "items": { - "type": "object", - "properties": { - "age": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "name": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "name" - ] - } - }, - "murders": { - "type": "object", - "additionalProperties": { - "type": "object", - "properties": { - "age": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "name": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "name" - ] - } - }, - "stakes": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "additionalProperties": false, - "required": [ - "stakes", - "deaths", - "murders" - ] - } - } - }, - "additionalProperties": false, - "required": [ - "groom", - "bride", - "plots" - ] - } - } - }, - "additionalProperties": false, - "required": [ - "hero", - "weddings" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestFieldsWithoutOmitEmptyAreRequired(t *testing.T) { - - type Papaya struct { - A int `json:"a,string,omitempty"` - B string `json:"b"` - } - - type MyStruct struct { - Foo string `json:"-,omitempty"` - Bar int `json:"bar"` - Apple int `json:"apple,omitempty"` - Mango int `json:",omitempty"` - Guava int `json:","` - Papaya *Papaya `json:"papaya,"` - } - - elem := MyStruct{} - - schema, err := New(reflect.TypeOf(elem), nil) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expectedSchema := - `{ - "type": "object", - "properties": { - "apple": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "bar": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "papaya": { - "type": "object", - "properties": { - "a": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "b": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "b" - ] - } - }, - "additionalProperties": false, - "required": [ - "bar", - "papaya" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expectedSchema) - - assert.Equal(t, expectedSchema, string(jsonSchema)) -} - -func TestDocIngestionForObject(t *testing.T) { - docs := &Docs{ - Description: "docs for root", - Properties: map[string]*Docs{ - "my_struct": { - Description: "docs for my struct", - Properties: map[string]*Docs{ - "a": { - Description: "docs for a", - }, - "c": { - Description: "docs for c which does not exist on my_struct", - }, - }, - }, - }, - } - - type MyStruct struct { - A string `json:"a"` - B int `json:"b"` - } - - type Root struct { - MyStruct *MyStruct `json:"my_struct"` - } - - elem := Root{} - - schema, err := New(reflect.TypeOf(elem), docs) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expectedSchema := - `{ - "type": "object", - "description": "docs for root", - "properties": { - "my_struct": { - "type": "object", - "description": "docs for my struct", - "properties": { - "a": { - "type": "string", - "description": "docs for a" - }, - "b": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "a", - "b" - ] - } - }, - "additionalProperties": false, - "required": [ - "my_struct" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expectedSchema) - - assert.Equal(t, expectedSchema, string(jsonSchema)) -} - -func TestDocIngestionForSlice(t *testing.T) { - docs := &Docs{ - Description: "docs for root", - Properties: map[string]*Docs{ - "my_slice": { - Description: "docs for my slice", - Items: &Docs{ - Properties: map[string]*Docs{ - "guava": { - Description: "docs for guava", - }, - "pineapple": { - Description: "docs for pineapple", - }, - "watermelon": { - Description: "docs for watermelon which does not exist in schema", - }, - }, - }, - }, - }, - } - - type Bar struct { - Guava int `json:"guava"` - Pineapple int `json:"pineapple"` - } - - type Root struct { - MySlice []Bar `json:"my_slice"` - } - - elem := Root{} - - schema, err := New(reflect.TypeOf(elem), docs) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expectedSchema := - `{ - "type": "object", - "description": "docs for root", - "properties": { - "my_slice": { - "type": "array", - "description": "docs for my slice", - "items": { - "type": "object", - "properties": { - "guava": { - "description": "docs for guava", - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "pineapple": { - "description": "docs for pineapple", - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "guava", - "pineapple" - ] - } - } - }, - "additionalProperties": false, - "required": [ - "my_slice" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expectedSchema) - - assert.Equal(t, expectedSchema, string(jsonSchema)) -} - -func TestDocIngestionForMap(t *testing.T) { - docs := &Docs{ - Description: "docs for root", - Properties: map[string]*Docs{ - "my_map": { - Description: "docs for my map", - AdditionalProperties: &Docs{ - Properties: map[string]*Docs{ - "apple": { - Description: "docs for apple", - }, - "mango": { - Description: "docs for mango", - }, - "watermelon": { - Description: "docs for watermelon which does not exist in schema", - }, - "papaya": { - Description: "docs for papaya which does not exist in schema", - }, - }, - }, - }, - }, - } - - type Foo struct { - Apple int `json:"apple"` - Mango int `json:"mango"` - } - - type Root struct { - MyMap map[string]*Foo `json:"my_map"` - } - - elem := Root{} - - schema, err := New(reflect.TypeOf(elem), docs) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expectedSchema := - `{ - "type": "object", - "description": "docs for root", - "properties": { - "my_map": { - "type": "object", - "description": "docs for my map", - "additionalProperties": { - "type": "object", - "properties": { - "apple": { - "description": "docs for apple", - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "mango": { - "description": "docs for mango", - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "apple", - "mango" - ] - } - } - }, - "additionalProperties": false, - "required": [ - "my_map" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expectedSchema) - - assert.Equal(t, expectedSchema, string(jsonSchema)) -} - -func TestDocIngestionForTopLevelPrimitive(t *testing.T) { - docs := &Docs{ - Description: "docs for root", - Properties: map[string]*Docs{ - "my_val": { - Description: "docs for my val", - }, - }, - } - - type Root struct { - MyVal int `json:"my_val"` - } - - elem := Root{} - - schema, err := New(reflect.TypeOf(elem), docs) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expectedSchema := - `{ - "type": "object", - "description": "docs for root", - "properties": { - "my_val": { - "description": "docs for my val", - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "my_val" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expectedSchema) - - assert.Equal(t, expectedSchema, string(jsonSchema)) -} - -func TestErrorOnMapWithoutStringKey(t *testing.T) { - type Foo struct { - Bar map[int]string `json:"bar"` - } - elem := Foo{} - _, err := New(reflect.TypeOf(elem), nil) - assert.ErrorContains(t, err, "only strings map keys are valid. key type: int") -} - -func TestErrorIfStructRefersToItself(t *testing.T) { - type Foo struct { - MyFoo *Foo `json:"my_foo"` - } - - elem := Foo{} - _, err := New(reflect.TypeOf(elem), nil) - assert.ErrorContains(t, err, "cycle detected. traversal trace: root -> my_foo") -} - -func TestErrorIfStructHasLoop(t *testing.T) { - type Apple struct { - MyVal int `json:"my_val"` - MyMango struct { - MyGuava struct { - MyPapaya struct { - MyApple *Apple `json:"my_apple"` - } `json:"my_papaya"` - } `json:"my_guava"` - } `json:"my_mango"` - } - - elem := Apple{} - _, err := New(reflect.TypeOf(elem), nil) - assert.ErrorContains(t, err, "cycle detected. traversal trace: root -> my_mango -> my_guava -> my_papaya -> my_apple") -} - -func TestInterfaceGeneratesEmptySchema(t *testing.T) { - type Foo struct { - Apple int `json:"apple"` - Mango interface{} `json:"mango"` - } - - elem := Foo{} - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "properties": { - "apple": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "mango": {} - }, - "additionalProperties": false, - "required": [ - "apple", - "mango" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestBundleReadOnlytag(t *testing.T) { - type Pokemon struct { - Pikachu string `json:"pikachu" bundle:"readonly"` - Raichu string `json:"raichu"` - } - - type Foo struct { - Pokemon *Pokemon `json:"pokemon"` - Apple int `json:"apple"` - Mango string `json:"mango" bundle:"readonly"` - } - - elem := Foo{} - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "properties": { - "apple": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "pokemon": { - "type": "object", - "properties": { - "raichu": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "raichu" - ] - } - }, - "additionalProperties": false, - "required": [ - "pokemon", - "apple" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestBundleInternalTag(t *testing.T) { - type Pokemon struct { - Pikachu string `json:"pikachu" bundle:"internal"` - Raichu string `json:"raichu"` - } - - type Foo struct { - Pokemon *Pokemon `json:"pokemon"` - Apple int `json:"apple"` - Mango string `json:"mango" bundle:"internal"` - } - - elem := Foo{} - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "properties": { - "apple": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "pokemon": { - "type": "object", - "properties": { - "raichu": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "raichu" - ] - } - }, - "additionalProperties": false, - "required": [ - "pokemon", - "apple" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} diff --git a/bundle/schema/spec.go b/bundle/schema/spec.go deleted file mode 100644 index fdc31a4c..00000000 --- a/bundle/schema/spec.go +++ /dev/null @@ -1,11 +0,0 @@ -package schema - -import "github.com/databricks/cli/libs/jsonschema" - -type Specification struct { - Components *Components `json:"components"` -} - -type Components struct { - Schemas map[string]*jsonschema.Schema `json:"schemas,omitempty"` -} diff --git a/bundle/schema/tracker.go b/bundle/schema/tracker.go deleted file mode 100644 index ace6559b..00000000 --- a/bundle/schema/tracker.go +++ /dev/null @@ -1,53 +0,0 @@ -package schema - -import ( - "container/list" - "fmt" -) - -type tracker struct { - // Nodes encountered in current path during the recursive traversal. Used to - // check for cycles - seenNodes map[interface{}]struct{} - - // List of node names encountered in order in current path during the recursive traversal. - // Used to hydrate errors with path to the exact node where error occured. - // - // NOTE: node and node names can be the same - listOfNodes *list.List -} - -func newTracker() *tracker { - return &tracker{ - seenNodes: map[interface{}]struct{}{}, - listOfNodes: list.New(), - } -} - -func (t *tracker) errWithTrace(prefix string, initTrace string) error { - traceString := initTrace - curr := t.listOfNodes.Front() - for curr != nil { - if curr.Value.(string) != "" { - traceString += " -> " + curr.Value.(string) - } - curr = curr.Next() - } - return fmt.Errorf(prefix + ". traversal trace: " + traceString) -} - -func (t *tracker) hasCycle(node interface{}) bool { - _, ok := t.seenNodes[node] - return ok -} - -func (t *tracker) push(node interface{}, name string) { - t.seenNodes[node] = struct{}{} - t.listOfNodes.PushBack(name) -} - -func (t *tracker) pop(nodeType interface{}) { - back := t.listOfNodes.Back() - t.listOfNodes.Remove(back) - delete(t.seenNodes, nodeType) -} diff --git a/cmd/bundle/schema.go b/cmd/bundle/schema.go index 813aebba..480618ed 100644 --- a/cmd/bundle/schema.go +++ b/cmd/bundle/schema.go @@ -1,13 +1,8 @@ package bundle import ( - "encoding/json" - "reflect" - - "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/schema" "github.com/databricks/cli/cmd/root" - "github.com/databricks/cli/libs/jsonschema" "github.com/spf13/cobra" ) @@ -19,32 +14,8 @@ func newSchemaCommand() *cobra.Command { } cmd.RunE = func(cmd *cobra.Command, args []string) error { - // Load embedded schema descriptions. - docs, err := schema.LoadBundleDescriptions() - if err != nil { - return err - } - - // Generate the JSON schema from the bundle configuration struct in Go. - schema, err := schema.New(reflect.TypeOf(config.Root{}), docs) - if err != nil { - return err - } - - // Target variable value overrides can be primitives, maps or sequences. - // Set an empty schema for them. - err = schema.SetByPath("targets.*.variables.*", jsonschema.Schema{}) - if err != nil { - return err - } - - // Print the JSON schema to stdout. - result, err := json.MarshalIndent(schema, "", " ") - if err != nil { - return err - } - cmd.OutOrStdout().Write(result) - return nil + _, err := cmd.OutOrStdout().Write(schema.Bytes) + return err } return cmd diff --git a/libs/dyn/dynvar/ref.go b/libs/dyn/dynvar/ref.go index bf160fa8..338ac8ce 100644 --- a/libs/dyn/dynvar/ref.go +++ b/libs/dyn/dynvar/ref.go @@ -6,9 +6,7 @@ import ( "github.com/databricks/cli/libs/dyn" ) -const VariableRegex = `\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\[[0-9]+\])*)*(\[[0-9]+\])*)\}` - -var re = regexp.MustCompile(VariableRegex) +var re = regexp.MustCompile(`\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\[[0-9]+\])*)*(\[[0-9]+\])*)\}`) // ref represents a variable reference. // It is a string [dyn.Value] contained in a larger [dyn.Value]. diff --git a/libs/jsonschema/from_type.go b/libs/jsonschema/from_type.go new file mode 100644 index 00000000..18a2b3ba --- /dev/null +++ b/libs/jsonschema/from_type.go @@ -0,0 +1,356 @@ +package jsonschema + +import ( + "container/list" + "fmt" + "maps" + "path" + "reflect" + "slices" + "strings" +) + +var skipTags = []string{ + // Fields tagged "readonly" should not be emitted in the schema as they are + // computed at runtime, and should not be assigned a value by the bundle author. + "readonly", + + // Annotation for internal bundle fields that should not be exposed to customers. + // Fields can be tagged as "internal" to remove them from the generated schema. + "internal", + + // Annotation for bundle fields that have been deprecated. + // Fields tagged as "deprecated" are omitted from the generated schema. + "deprecated", +} + +type constructor struct { + // Map of typ.PkgPath() + "." + typ.Name() to the schema for that type. + // Example key: github.com/databricks/databricks-sdk-go/service/jobs.JobSettings + definitions map[string]Schema + + // Map of typ.PkgPath() + "." + typ.Name() to the corresponding type. Used to + // track types that have been seen to avoid infinite recursion. + seen map[string]reflect.Type + + // The root type for which the schema is being generated. + root reflect.Type +} + +// JSON pointers use "/" as a delimiter to represent nested objects. This means +// we would instead need to use "~1" to represent "/" if we wish to refer to a +// key in a JSON object with a "/" in it. Instead of doing that we replace "/" with an +// additional level of nesting in the output map. Thus the $refs in the generated +// JSON schema can contain "/" without any issues. +// see: https://datatracker.ietf.org/doc/html/rfc6901 +// +// For example: +// {"a/b/c": "value"} is converted to {"a": {"b": {"c": "value"}}} +// the $ref for "value" would be "#/$defs/a/b/c" in the generated JSON schema. +func (c *constructor) Definitions() map[string]any { + defs := maps.Clone(c.definitions) + + // Remove the root type from the definitions. We don't need to include it in + // the definitions because it will be inlined as the root of the generated JSON schema. + delete(defs, typePath(c.root)) + + if len(defs) == 0 { + return nil + } + + res := make(map[string]any) + for k, v := range defs { + parts := strings.Split(k, "/") + cur := res + for i, p := range parts { + // Set the value for the last part. + if i == len(parts)-1 { + cur[p] = v + break + } + + // For all but the last part, create a new map value to add a level + // of nesting. + if _, ok := cur[p]; !ok { + cur[p] = make(map[string]any) + } + cur = cur[p].(map[string]any) + } + } + + return res +} + +// FromType converts a [reflect.Type] to a [Schema]. Nodes in the final JSON +// schema are guaranteed to be one level deep, which is done using defining $defs +// for every Go type and referring them using $ref in the corresponding node in +// the JSON schema. +// +// fns is a list of transformation functions that will be applied in order to all $defs +// in the schema. +func FromType(typ reflect.Type, fns []func(typ reflect.Type, s Schema) Schema) (Schema, error) { + c := constructor{ + definitions: make(map[string]Schema), + seen: make(map[string]reflect.Type), + root: typ, + } + + _, err := c.walk(typ) + if err != nil { + return Schema{}, err + } + + for _, fn := range fns { + for k := range c.definitions { + c.definitions[k] = fn(c.seen[k], c.definitions[k]) + } + } + + res := c.definitions[typePath(typ)] + res.Definitions = c.Definitions() + return res, nil +} + +// typePath computes a unique string representation of the type. $ref in the generated +// JSON schema will refer to this path. See TestTypePath for examples outputs. +func typePath(typ reflect.Type) string { + // Pointers have a typ.Name() of "". Dereference them to get the underlying type. + for typ.Kind() == reflect.Ptr { + typ = typ.Elem() + } + + if typ.Kind() == reflect.Interface { + return "interface" + } + + // Recursively call typePath, to handle slices of slices / maps. + if typ.Kind() == reflect.Slice { + return path.Join("slice", typePath(typ.Elem())) + } + + if typ.Kind() == reflect.Map { + if typ.Key().Kind() != reflect.String { + panic(fmt.Sprintf("found map with non-string key: %v", typ.Key())) + } + + // Recursively call typePath, to handle maps of maps / slices. + return path.Join("map", typePath(typ.Elem())) + } + + switch { + case typ.PkgPath() != "" && typ.Name() != "": + return typ.PkgPath() + "." + typ.Name() + case typ.Name() != "": + return typ.Name() + default: + // Invariant. This function should return a non-empty string + // for all types. + panic("unexpected empty type name for type: " + typ.String()) + } +} + +// Walk the Go type, generating $defs for every type encountered, and populating +// the corresponding $ref in the JSON schema. +func (c *constructor) walk(typ reflect.Type) (string, error) { + // Dereference pointers if necessary. + for typ.Kind() == reflect.Ptr { + typ = typ.Elem() + } + + typPath := typePath(typ) + + // Return early if the type has already been seen, to avoid infinite recursion. + if _, ok := c.seen[typPath]; ok { + return typPath, nil + } + c.seen[typPath] = typ + + var s Schema + var err error + + switch typ.Kind() { + case reflect.Struct: + s, err = c.fromTypeStruct(typ) + case reflect.Slice: + s, err = c.fromTypeSlice(typ) + case reflect.Map: + s, err = c.fromTypeMap(typ) + case reflect.String: + s = Schema{Type: StringType} + case reflect.Bool: + s = Schema{Type: BooleanType} + case reflect.Int, reflect.Int32, reflect.Int64: + s = Schema{Type: IntegerType} + case reflect.Float32, reflect.Float64: + s = Schema{Type: NumberType} + case reflect.Interface: + // We cannot determine the schema for fields of interface type just based + // on the type information. Thus we'll set the empty schema here and allow + // arbitrary values. + s = Schema{} + default: + return "", fmt.Errorf("unsupported type: %s", typ.Kind()) + } + if err != nil { + return "", err + } + + // Store the computed JSON schema for the type. + c.definitions[typPath] = s + return typPath, nil +} + +// This function returns all member fields of the provided type. +// If the type has embedded (aka anonymous) fields, this function traverses +// those in a breadth first manner +// +// BFS is important because we want the a field defined at a higher level embedded +// struct to be given preference over a field with the same name defined at a lower +// level embedded struct. For example see: TestHigherLevelEmbeddedFieldIsInSchema +func getStructFields(typ reflect.Type) []reflect.StructField { + fields := []reflect.StructField{} + bfsQueue := list.New() + + for i := 0; i < typ.NumField(); i++ { + bfsQueue.PushBack(typ.Field(i)) + } + for bfsQueue.Len() > 0 { + front := bfsQueue.Front() + field := front.Value.(reflect.StructField) + bfsQueue.Remove(front) + + if !field.Anonymous { + fields = append(fields, field) + continue + } + + fieldType := field.Type + + // Embedded types can only be struct{} or pointer to struct{}. Multiple + // levels of pointers are not allowed by the Go compiler. So we only + // dereference pointers once. + if fieldType.Kind() == reflect.Pointer { + fieldType = fieldType.Elem() + } + + for i := 0; i < fieldType.NumField(); i++ { + bfsQueue.PushBack(fieldType.Field(i)) + } + } + return fields +} + +func (c *constructor) fromTypeStruct(typ reflect.Type) (Schema, error) { + if typ.Kind() != reflect.Struct { + return Schema{}, fmt.Errorf("expected struct, got %s", typ.Kind()) + } + + res := Schema{ + Type: ObjectType, + Properties: make(map[string]*Schema), + Required: []string{}, + AdditionalProperties: false, + } + + structFields := getStructFields(typ) + for _, structField := range structFields { + bundleTags := strings.Split(structField.Tag.Get("bundle"), ",") + // Fields marked as "readonly", "internal" or "deprecated" are skipped + // while generating the schema + skip := false + for _, tag := range skipTags { + if slices.Contains(bundleTags, tag) { + skip = true + break + } + } + if skip { + continue + } + + jsonTags := strings.Split(structField.Tag.Get("json"), ",") + fieldName := jsonTags[0] + // Do not include fields in the schema that will not be serialized during + // JSON marshalling. + if fieldName == "" || fieldName == "-" || !structField.IsExported() { + continue + } + + // Skip property if it is already present in the schema. + // This can happen if the same field is defined multiple times across + // a tree of embedded structs. For example see: TestHigherLevelEmbeddedFieldIsInSchema + if _, ok := res.Properties[fieldName]; ok { + continue + } + + // "omitempty" tags in the Go SDK structs represent fields that not are + // required to be present in the API payload. Thus its absence in the + // tags list indicates that the field is required. + if !slices.Contains(jsonTags, "omitempty") { + res.Required = append(res.Required, fieldName) + } + + // Walk the fields of the struct. + typPath, err := c.walk(structField.Type) + if err != nil { + return Schema{}, err + } + + // For every property in the struct, add a $ref to the corresponding + // $defs block. + refPath := path.Join("#/$defs", typPath) + res.Properties[fieldName] = &Schema{ + Reference: &refPath, + } + } + + return res, nil +} + +func (c *constructor) fromTypeSlice(typ reflect.Type) (Schema, error) { + if typ.Kind() != reflect.Slice { + return Schema{}, fmt.Errorf("expected slice, got %s", typ.Kind()) + } + + res := Schema{ + Type: ArrayType, + } + + // Walk the slice element type. + typPath, err := c.walk(typ.Elem()) + if err != nil { + return Schema{}, err + } + + refPath := path.Join("#/$defs", typPath) + + // Add a $ref to the corresponding $defs block for the slice element type. + res.Items = &Schema{ + Reference: &refPath, + } + return res, nil +} + +func (c *constructor) fromTypeMap(typ reflect.Type) (Schema, error) { + if typ.Kind() != reflect.Map { + return Schema{}, fmt.Errorf("expected map, got %s", typ.Kind()) + } + + res := Schema{ + Type: ObjectType, + } + + // Walk the map value type. + typPath, err := c.walk(typ.Elem()) + if err != nil { + return Schema{}, err + } + + refPath := path.Join("#/$defs", typPath) + + // Add a $ref to the corresponding $defs block for the map value type. + res.AdditionalProperties = &Schema{ + Reference: &refPath, + } + return res, nil +} diff --git a/libs/jsonschema/from_type_test.go b/libs/jsonschema/from_type_test.go new file mode 100644 index 00000000..174ffad8 --- /dev/null +++ b/libs/jsonschema/from_type_test.go @@ -0,0 +1,521 @@ +package jsonschema + +import ( + "reflect" + "testing" + + "github.com/databricks/cli/libs/jsonschema/test_types" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestFromTypeBasic(t *testing.T) { + type myStruct struct { + S string `json:"s"` + I *int `json:"i,omitempty"` + V interface{} `json:"v,omitempty"` + TriplePointer ***int `json:"triple_pointer,omitempty"` + + // These fields should be ignored in the resulting schema. + NotAnnotated string + DashedTag string `json:"-"` + InternalTagged string `json:"internal_tagged" bundle:"internal"` + DeprecatedTagged string `json:"deprecated_tagged" bundle:"deprecated"` + ReadOnlyTagged string `json:"readonly_tagged" bundle:"readonly"` + } + + strRef := "#/$defs/string" + boolRef := "#/$defs/bool" + intRef := "#/$defs/int" + interfaceRef := "#/$defs/interface" + + tcases := []struct { + name string + typ reflect.Type + expected Schema + }{ + { + name: "int", + typ: reflect.TypeOf(int(0)), + expected: Schema{ + Type: "integer", + }, + }, + { + name: "string", + typ: reflect.TypeOf(string("")), + expected: Schema{ + Type: "string", + }, + }, + { + name: "bool", + typ: reflect.TypeOf(bool(true)), + expected: Schema{ + Type: "boolean", + }, + }, + { + name: "float64", + typ: reflect.TypeOf(float64(0)), + expected: Schema{ + Type: "number", + }, + }, + { + name: "struct", + typ: reflect.TypeOf(myStruct{}), + expected: Schema{ + Type: "object", + Definitions: map[string]any{ + "interface": Schema{}, + "string": Schema{ + Type: "string", + }, + "int": Schema{ + Type: "integer", + }, + }, + Properties: map[string]*Schema{ + "s": { + Reference: &strRef, + }, + "i": { + Reference: &intRef, + }, + "v": { + Reference: &interfaceRef, + }, + "triple_pointer": { + Reference: &intRef, + }, + }, + AdditionalProperties: false, + Required: []string{"s"}, + }, + }, + { + name: "slice", + typ: reflect.TypeOf([]bool{}), + expected: Schema{ + Type: "array", + Definitions: map[string]any{ + "bool": Schema{ + Type: "boolean", + }, + }, + Items: &Schema{ + Reference: &boolRef, + }, + }, + }, + { + name: "map", + typ: reflect.TypeOf(map[string]int{}), + expected: Schema{ + Type: "object", + Definitions: map[string]any{ + "int": Schema{ + Type: "integer", + }, + }, + AdditionalProperties: &Schema{ + Reference: &intRef, + }, + }, + }, + } + + for _, tc := range tcases { + t.Run(tc.name, func(t *testing.T) { + s, err := FromType(tc.typ, nil) + assert.NoError(t, err) + assert.Equal(t, tc.expected, s) + }) + } +} + +func TestGetStructFields(t *testing.T) { + type InnerEmbeddedStruct struct { + InnerField float64 + } + + type EmbeddedStructOne struct { + FieldOne int + + *InnerEmbeddedStruct + } + + type EmbeddedStructTwo struct { + FieldTwo bool + } + + type MyStruct struct { + *EmbeddedStructOne + EmbeddedStructTwo + + OuterField string + } + + fields := getStructFields(reflect.TypeOf(MyStruct{})) + assert.Len(t, fields, 4) + assert.Equal(t, "OuterField", fields[0].Name) + assert.Equal(t, "FieldOne", fields[1].Name) + + // InnerField occurring after FieldTwo ensures BFS as opposed to DFS traversal. + assert.Equal(t, "FieldTwo", fields[2].Name) + assert.Equal(t, "InnerField", fields[3].Name) +} + +func TestHigherLevelEmbeddedFieldIsInSchema(t *testing.T) { + type Inner struct { + Override string `json:"override,omitempty"` + } + + type EmbeddedOne struct { + Inner + } + + type EmbeddedTwo struct { + Override int `json:"override,omitempty"` + } + + type Outer struct { + EmbeddedOne + EmbeddedTwo + } + + intRef := "#/$defs/int" + expected := Schema{ + Type: "object", + Definitions: map[string]any{ + "int": Schema{ + Type: "integer", + }, + }, + Properties: map[string]*Schema{ + "override": { + Reference: &intRef, + }, + }, + AdditionalProperties: false, + Required: []string{}, + } + + s, err := FromType(reflect.TypeOf(Outer{}), nil) + require.NoError(t, err) + assert.Equal(t, expected, s) +} + +func TestFromTypeNested(t *testing.T) { + type Inner struct { + S string `json:"s"` + } + + type Outer struct { + I string `json:"i"` + Inner Inner `json:"inner"` + } + + innerRef := "#/$defs/github.com/databricks/cli/libs/jsonschema.Inner" + strRef := "#/$defs/string" + + expectedDefinitions := map[string]any{ + "github.com": map[string]any{ + "databricks": map[string]any{ + "cli": map[string]any{ + "libs": map[string]any{ + "jsonschema.Inner": Schema{ + Type: "object", + Properties: map[string]*Schema{ + "s": { + Reference: &strRef, + }, + }, + AdditionalProperties: false, + Required: []string{"s"}, + }, + }, + }, + }, + }, + "string": Schema{ + Type: "string", + }, + } + + tcases := []struct { + name string + typ reflect.Type + expected Schema + }{ + { + name: "struct in struct", + typ: reflect.TypeOf(Outer{}), + expected: Schema{ + Type: "object", + Definitions: expectedDefinitions, + Properties: map[string]*Schema{ + "i": { + Reference: &strRef, + }, + "inner": { + Reference: &innerRef, + }, + }, + AdditionalProperties: false, + Required: []string{"i", "inner"}, + }, + }, + { + name: "struct as a map value", + typ: reflect.TypeOf(map[string]*Inner{}), + expected: Schema{ + Type: "object", + Definitions: expectedDefinitions, + AdditionalProperties: &Schema{ + Reference: &innerRef, + }, + }, + }, + { + name: "struct as a slice element", + typ: reflect.TypeOf([]Inner{}), + expected: Schema{ + Type: "array", + Definitions: expectedDefinitions, + Items: &Schema{ + Reference: &innerRef, + }, + }, + }, + } + for _, tc := range tcases { + t.Run(tc.name, func(t *testing.T) { + s, err := FromType(tc.typ, nil) + assert.NoError(t, err) + assert.Equal(t, tc.expected, s) + }) + } +} + +func TestFromTypeRecursive(t *testing.T) { + fooRef := "#/$defs/github.com/databricks/cli/libs/jsonschema/test_types.Foo" + barRef := "#/$defs/github.com/databricks/cli/libs/jsonschema/test_types.Bar" + + expected := Schema{ + Type: "object", + Definitions: map[string]any{ + "github.com": map[string]any{ + "databricks": map[string]any{ + "cli": map[string]any{ + "libs": map[string]any{ + "jsonschema": map[string]any{ + "test_types.Bar": Schema{ + Type: "object", + Properties: map[string]*Schema{ + "foo": { + Reference: &fooRef, + }, + }, + AdditionalProperties: false, + Required: []string{}, + }, + "test_types.Foo": Schema{ + Type: "object", + Properties: map[string]*Schema{ + "bar": { + Reference: &barRef, + }, + }, + AdditionalProperties: false, + Required: []string{}, + }, + }, + }, + }, + }, + }, + }, + Properties: map[string]*Schema{ + "foo": { + Reference: &fooRef, + }, + }, + AdditionalProperties: false, + Required: []string{"foo"}, + } + + s, err := FromType(reflect.TypeOf(test_types.Outer{}), nil) + assert.NoError(t, err) + assert.Equal(t, expected, s) +} + +func TestFromTypeSelfReferential(t *testing.T) { + selfRef := "#/$defs/github.com/databricks/cli/libs/jsonschema/test_types.Self" + stringRef := "#/$defs/string" + + expected := Schema{ + Type: "object", + Definitions: map[string]any{ + "github.com": map[string]any{ + "databricks": map[string]any{ + "cli": map[string]any{ + "libs": map[string]any{ + "jsonschema": map[string]any{ + "test_types.Self": Schema{ + Type: "object", + Properties: map[string]*Schema{ + "self": { + Reference: &selfRef, + }, + "s": { + Reference: &stringRef, + }, + }, + AdditionalProperties: false, + Required: []string{}, + }, + }, + }, + }, + }, + }, + "string": Schema{ + Type: "string", + }, + }, + Properties: map[string]*Schema{ + "self": { + Reference: &selfRef, + }, + }, + AdditionalProperties: false, + Required: []string{}, + } + + s, err := FromType(reflect.TypeOf(test_types.OuterSelf{}), nil) + assert.NoError(t, err) + assert.Equal(t, expected, s) +} + +func TestFromTypeError(t *testing.T) { + // Maps with non-string keys should panic. + type mapOfInts map[int]int + assert.PanicsWithValue(t, "found map with non-string key: int", func() { + FromType(reflect.TypeOf(mapOfInts{}), nil) + }) + + // Unsupported types should return an error. + _, err := FromType(reflect.TypeOf(complex64(0)), nil) + assert.EqualError(t, err, "unsupported type: complex64") +} + +func TestFromTypeFunctionsArg(t *testing.T) { + type myStruct struct { + S string `json:"s"` + } + + strRef := "#/$defs/string" + expected := Schema{ + Type: "object", + Definitions: map[string]any{ + "string": Schema{ + Type: "string", + Description: "a string", + Enum: []any{"a", "b", "c"}, + }, + }, + Properties: map[string]*Schema{ + "s": { + Reference: &strRef, + }, + }, + AdditionalProperties: false, + Required: []string{"s"}, + } + + addDescription := func(typ reflect.Type, s Schema) Schema { + if typ.Kind() != reflect.String { + return s + } + s.Description = "a string" + return s + } + + addEnums := func(typ reflect.Type, s Schema) Schema { + if typ.Kind() != reflect.String { + return s + } + s.Enum = []any{"a", "b", "c"} + return s + } + + s, err := FromType(reflect.TypeOf(myStruct{}), []func(reflect.Type, Schema) Schema{ + addDescription, + addEnums, + }) + assert.NoError(t, err) + assert.Equal(t, expected, s) +} + +func TestTypePath(t *testing.T) { + type myStruct struct{} + + tcases := []struct { + typ reflect.Type + path string + }{ + { + typ: reflect.TypeOf(""), + path: "string", + }, + { + typ: reflect.TypeOf(int(0)), + path: "int", + }, + { + typ: reflect.TypeOf(true), + path: "bool", + }, + { + typ: reflect.TypeOf(float64(0)), + path: "float64", + }, + { + typ: reflect.TypeOf(myStruct{}), + path: "github.com/databricks/cli/libs/jsonschema.myStruct", + }, + { + typ: reflect.TypeOf([]int{}), + path: "slice/int", + }, + { + typ: reflect.TypeOf(map[string]int{}), + path: "map/int", + }, + { + typ: reflect.TypeOf([]myStruct{}), + path: "slice/github.com/databricks/cli/libs/jsonschema.myStruct", + }, + { + typ: reflect.TypeOf([][]map[string]map[string]myStruct{}), + path: "slice/slice/map/map/github.com/databricks/cli/libs/jsonschema.myStruct", + }, + { + typ: reflect.TypeOf(map[string]myStruct{}), + path: "map/github.com/databricks/cli/libs/jsonschema.myStruct", + }, + } + + for _, tc := range tcases { + t.Run(tc.typ.String(), func(t *testing.T) { + assert.Equal(t, tc.path, typePath(tc.typ)) + }) + } + + // Maps with non-string keys should panic. + assert.PanicsWithValue(t, "found map with non-string key: int", func() { + typePath(reflect.TypeOf(map[int]int{})) + }) +} diff --git a/libs/jsonschema/schema.go b/libs/jsonschema/schema.go index f1e223ec..7690ec2f 100644 --- a/libs/jsonschema/schema.go +++ b/libs/jsonschema/schema.go @@ -6,7 +6,6 @@ import ( "os" "regexp" "slices" - "strings" "github.com/databricks/cli/internal/build" "golang.org/x/mod/semver" @@ -14,6 +13,10 @@ import ( // defines schema for a json object type Schema struct { + // Definitions that can be reused and referenced throughout the schema. The + // syntax for a reference is $ref: #/$defs/ + Definitions map[string]any `json:"$defs,omitempty"` + // Type of the object Type Type `json:"type,omitempty"` @@ -63,7 +66,7 @@ type Schema struct { Extension // Schema that must match any of the schemas in the array - AnyOf []*Schema `json:"anyOf,omitempty"` + AnyOf []Schema `json:"anyOf,omitempty"` } // Default value defined in a JSON Schema, represented as a string. @@ -82,41 +85,6 @@ func (s *Schema) ParseString(v string) (any, error) { return fromString(v, s.Type) } -func (s *Schema) getByPath(path string) (*Schema, error) { - p := strings.Split(path, ".") - - res := s - for _, node := range p { - if node == "*" { - res = res.AdditionalProperties.(*Schema) - continue - } - var ok bool - res, ok = res.Properties[node] - if !ok { - return nil, fmt.Errorf("property %q not found in schema. Query path: %s", node, path) - } - } - return res, nil -} - -func (s *Schema) GetByPath(path string) (Schema, error) { - v, err := s.getByPath(path) - if err != nil { - return Schema{}, err - } - return *v, nil -} - -func (s *Schema) SetByPath(path string, v Schema) error { - dst, err := s.getByPath(path) - if err != nil { - return err - } - *dst = v - return nil -} - type Type string const ( diff --git a/libs/jsonschema/schema_test.go b/libs/jsonschema/schema_test.go index c365cf23..cf1f1276 100644 --- a/libs/jsonschema/schema_test.go +++ b/libs/jsonschema/schema_test.go @@ -4,7 +4,6 @@ import ( "testing" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) func TestSchemaValidateTypeNames(t *testing.T) { @@ -306,92 +305,3 @@ func TestValidateSchemaSkippedPropertiesHaveDefaults(t *testing.T) { err = s.validate() assert.NoError(t, err) } - -func testSchema() *Schema { - return &Schema{ - Type: "object", - Properties: map[string]*Schema{ - "int_val": { - Type: "integer", - Default: int64(123), - }, - "string_val": { - Type: "string", - }, - "object_val": { - Type: "object", - Properties: map[string]*Schema{ - "bar": { - Type: "string", - Default: "baz", - }, - }, - AdditionalProperties: &Schema{ - Type: "object", - Properties: map[string]*Schema{ - "foo": { - Type: "string", - Default: "zab", - }, - }, - }, - }, - }, - } - -} - -func TestSchemaGetByPath(t *testing.T) { - s := testSchema() - - ss, err := s.GetByPath("int_val") - require.NoError(t, err) - assert.Equal(t, Schema{ - Type: IntegerType, - Default: int64(123), - }, ss) - - ss, err = s.GetByPath("string_val") - require.NoError(t, err) - assert.Equal(t, Schema{ - Type: StringType, - }, ss) - - ss, err = s.GetByPath("object_val.bar") - require.NoError(t, err) - assert.Equal(t, Schema{ - Type: StringType, - Default: "baz", - }, ss) - - ss, err = s.GetByPath("object_val.*.foo") - require.NoError(t, err) - assert.Equal(t, Schema{ - Type: StringType, - Default: "zab", - }, ss) -} - -func TestSchemaSetByPath(t *testing.T) { - s := testSchema() - - err := s.SetByPath("int_val", Schema{ - Type: IntegerType, - Default: int64(456), - }) - require.NoError(t, err) - assert.Equal(t, int64(456), s.Properties["int_val"].Default) - - err = s.SetByPath("object_val.*.foo", Schema{ - Type: StringType, - Default: "zooby", - }) - require.NoError(t, err) - - ns, err := s.GetByPath("object_val.*.foo") - require.NoError(t, err) - assert.Equal(t, Schema{ - Type: StringType, - Default: "zooby", - }, ns) -} diff --git a/libs/jsonschema/test_types/test_types.go b/libs/jsonschema/test_types/test_types.go new file mode 100644 index 00000000..75e81595 --- /dev/null +++ b/libs/jsonschema/test_types/test_types.go @@ -0,0 +1,25 @@ +package test_types + +// Recursive types cannot be defined inline without making them anonymous, +// so we define them here instead. +type Foo struct { + Bar *Bar `json:"bar,omitempty"` +} + +type Bar struct { + Foo Foo `json:"foo,omitempty"` +} + +type Outer struct { + Foo Foo `json:"foo"` +} + +type Self struct { + Self *Self `json:"self,omitempty"` + + S string `json:"s,omitempty"` +} + +type OuterSelf struct { + Self Self `json:"self,omitempty"` +} diff --git a/libs/template/config_test.go b/libs/template/config_test.go index 73b47f28..ab9dbeb5 100644 --- a/libs/template/config_test.go +++ b/libs/template/config_test.go @@ -461,7 +461,7 @@ func TestPromptIsSkippedAnyOf(t *testing.T) { Default: "hello-world", Extension: jsonschema.Extension{ SkipPromptIf: &jsonschema.Schema{ - AnyOf: []*jsonschema.Schema{ + AnyOf: []jsonschema.Schema{ { Properties: map[string]*jsonschema.Schema{ "abc": { From 5d2c0e3885070359c22310dd79fa5fe594c8e0f5 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Tue, 10 Sep 2024 20:19:34 +0530 Subject: [PATCH 07/15] Alias variables block in the `Target` struct (#1748) ## Changes This PR aliases and overrides the schema associated with the variables block in `target` to allow for directly specifying a variable value in the JSON schema (without an levels of nesting). This is needed because this direct value is resolved by dynamically parsing the configuration tree. https://github.com/databricks/cli/blob/ca6332a5a4325aff1be848536f45d13bd74d93b3/bundle/config/root.go#L424 ## Tests Existing unit tests. --- bundle/config/root_test.go | 2 +- bundle/config/target.go | 21 +++++++++++++++- bundle/config/variable/variable.go | 5 ++++ bundle/internal/schema/main.go | 16 ++++++++++++ bundle/schema/jsonschema.json | 39 +++++++++++++++++++++++++++++- 5 files changed, 80 insertions(+), 3 deletions(-) diff --git a/bundle/config/root_test.go b/bundle/config/root_test.go index c95e6e86..d2c7a9b1 100644 --- a/bundle/config/root_test.go +++ b/bundle/config/root_test.go @@ -139,7 +139,7 @@ func TestRootMergeTargetOverridesWithVariables(t *testing.T) { }, Targets: map[string]*Target{ "development": { - Variables: map[string]*variable.Variable{ + Variables: map[string]*variable.TargetVariable{ "foo": { Default: "bar", Description: "wrong", diff --git a/bundle/config/target.go b/bundle/config/target.go index a2ef4d73..fc6ba7b5 100644 --- a/bundle/config/target.go +++ b/bundle/config/target.go @@ -38,7 +38,26 @@ type Target struct { // Override default values or lookup name for defined variables // Does not permit defining new variables or redefining existing ones // in the scope of an target - Variables map[string]*variable.Variable `json:"variables,omitempty"` + // + // There are two valid ways to define a variable override in a target: + // 1. Direct value override. We normalize this to the variable.Variable + // struct format when loading the configuration YAML: + // + // variables: + // foo: "value" + // + // 2. Override matching the variable.Variable struct. + // + // variables: + // foo: + // default: "value" + // + // OR + // + // variables: + // foo: + // lookup: "resource_name" + Variables map[string]*variable.TargetVariable `json:"variables,omitempty"` Git Git `json:"git,omitempty"` diff --git a/bundle/config/variable/variable.go b/bundle/config/variable/variable.go index ba94f9c8..2362ad10 100644 --- a/bundle/config/variable/variable.go +++ b/bundle/config/variable/variable.go @@ -16,6 +16,11 @@ const ( VariableTypeComplex VariableType = "complex" ) +// We alias it here to override the JSON schema associated with a variable value +// in a target override. This is because we allow for directly specifying the value +// in addition to the variable.Variable struct format in a target override. +type TargetVariable Variable + // An input variable for the bundle config type Variable struct { // A type of the variable. This is used to validate the value of the variable diff --git a/bundle/internal/schema/main.go b/bundle/internal/schema/main.go index 3c1fb5da..4a237147 100644 --- a/bundle/internal/schema/main.go +++ b/bundle/internal/schema/main.go @@ -21,6 +21,22 @@ func addInterpolationPatterns(typ reflect.Type, s jsonschema.Schema) jsonschema. return s } + // The variables block in a target override allows for directly specifying + // the value of the variable. + if typ == reflect.TypeOf(variable.TargetVariable{}) { + return jsonschema.Schema{ + AnyOf: []jsonschema.Schema{ + // We keep the original schema so that autocomplete suggestions + // continue to work. + s, + // All values are valid for a variable value, be it primitive types + // like string/bool or complex ones like objects/arrays. Thus we override + // the schema to allow all valid JSON values. + {}, + }, + } + } + switch s.Type { case jsonschema.ArrayType, jsonschema.ObjectType: // arrays and objects can have complex variable values specified. diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 4fe978b8..2db1a5ab 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -642,6 +642,29 @@ } ] }, + "variable.TargetVariable": { + "anyOf": [ + { + "type": "object", + "properties": { + "default": { + "$ref": "#/$defs/interface" + }, + "description": { + "$ref": "#/$defs/string" + }, + "lookup": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/variable.Lookup" + }, + "type": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/variable.VariableType" + } + }, + "additionalProperties": false + }, + {} + ] + }, "variable.Variable": { "type": "object", "properties": { @@ -995,7 +1018,7 @@ "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Sync" }, "variables": { - "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/variable.Variable" + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/variable.TargetVariable" }, "workspace": { "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Workspace" @@ -4993,6 +5016,20 @@ } ] }, + "variable.TargetVariable": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/variable.TargetVariable" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "variable.Variable": { "anyOf": [ { From c61358407fade291678ff9e68f9ccd982ea99124 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Wed, 11 Sep 2024 14:45:56 +0530 Subject: [PATCH 08/15] Add end to end integration tests for bundle JSON schema (#1726) --- .github/workflows/push.yml | 11 ++- .../internal/schema/testdata/fail/basic.yml | 3 + .../fail/invalid_enum_value_in_job.yml | 4 ++ .../fail/invalid_enum_value_in_model.yml | 6 ++ .../fail/invalid_reference_in_job.yml | 8 +++ .../fail/invalid_reference_in_model.yml | 5 ++ .../fail/required_field_missing_in_job.yml | 9 +++ .../testdata/fail/unknown_field_in_job.yml | 5 ++ .../testdata/fail/unknown_field_in_model.yml | 6 ++ .../testdata/fail/unknown_top_level_field.yml | 1 + .../testdata/pass/artifact_references.yml | 11 +++ .../internal/schema/testdata/pass/basic.yml | 2 + .../testdata/pass/direct_value_in_target.yml | 4 ++ bundle/internal/schema/testdata/pass/job.yml | 63 ++++++++++++++++ bundle/internal/schema/testdata/pass/ml.yml | 72 +++++++++++++++++++ .../schema/testdata/pass/pipeline.yml | 54 ++++++++++++++ .../schema/testdata/pass/quality_monitor.yml | 16 +++++ .../schema/testdata/pass/run_job_task.yml | 56 +++++++++++++++ .../internal/schema/testdata/pass/schema.yml | 24 +++++++ 19 files changed, 358 insertions(+), 2 deletions(-) create mode 100644 bundle/internal/schema/testdata/fail/basic.yml create mode 100644 bundle/internal/schema/testdata/fail/invalid_enum_value_in_job.yml create mode 100644 bundle/internal/schema/testdata/fail/invalid_enum_value_in_model.yml create mode 100644 bundle/internal/schema/testdata/fail/invalid_reference_in_job.yml create mode 100644 bundle/internal/schema/testdata/fail/invalid_reference_in_model.yml create mode 100644 bundle/internal/schema/testdata/fail/required_field_missing_in_job.yml create mode 100644 bundle/internal/schema/testdata/fail/unknown_field_in_job.yml create mode 100644 bundle/internal/schema/testdata/fail/unknown_field_in_model.yml create mode 100644 bundle/internal/schema/testdata/fail/unknown_top_level_field.yml create mode 100644 bundle/internal/schema/testdata/pass/artifact_references.yml create mode 100644 bundle/internal/schema/testdata/pass/basic.yml create mode 100644 bundle/internal/schema/testdata/pass/direct_value_in_target.yml create mode 100644 bundle/internal/schema/testdata/pass/job.yml create mode 100644 bundle/internal/schema/testdata/pass/ml.yml create mode 100644 bundle/internal/schema/testdata/pass/pipeline.yml create mode 100644 bundle/internal/schema/testdata/pass/quality_monitor.yml create mode 100644 bundle/internal/schema/testdata/pass/run_job_task.yml create mode 100644 bundle/internal/schema/testdata/pass/schema.yml diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 08edfb9d..02bf7378 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -107,11 +107,18 @@ jobs: run: npm install -g ajv-cli@5.0.0 # Assert that the generated bundle schema is a valid JSON schema by using - # ajv-cli to validate it against a sample configuration file. + # ajv-cli to validate it against bundle configuration files. # By default the ajv-cli runs in strict mode which will fail if the schema # itself is not valid. Strict mode is more strict than the JSON schema # specification. See for details: https://ajv.js.org/options.html#strict-mode-options - name: Validate bundle schema run: | go run main.go bundle schema > schema.json - ajv -s schema.json -d ./bundle/tests/basic/databricks.yml + + for file in ./bundle/internal/schema/testdata/pass/*.yml; do + ajv test -s schema.json -d $file --valid + done + + for file in ./bundle/internal/schema/testdata/fail/*.yml; do + ajv test -s schema.json -d $file --invalid + done diff --git a/bundle/internal/schema/testdata/fail/basic.yml b/bundle/internal/schema/testdata/fail/basic.yml new file mode 100644 index 00000000..5ab981e3 --- /dev/null +++ b/bundle/internal/schema/testdata/fail/basic.yml @@ -0,0 +1,3 @@ +bundle: + # expected type is 'string' + name: 1234 diff --git a/bundle/internal/schema/testdata/fail/invalid_enum_value_in_job.yml b/bundle/internal/schema/testdata/fail/invalid_enum_value_in_job.yml new file mode 100644 index 00000000..92b1e9fc --- /dev/null +++ b/bundle/internal/schema/testdata/fail/invalid_enum_value_in_job.yml @@ -0,0 +1,4 @@ +resources: + jobs: + myjob: + format: INVALID_VALUE diff --git a/bundle/internal/schema/testdata/fail/invalid_enum_value_in_model.yml b/bundle/internal/schema/testdata/fail/invalid_enum_value_in_model.yml new file mode 100644 index 00000000..278b238f --- /dev/null +++ b/bundle/internal/schema/testdata/fail/invalid_enum_value_in_model.yml @@ -0,0 +1,6 @@ +resources: + models: + mymodel: + latest_versions: + - creation_timestamp: 123 + status: INVALID_VALUE diff --git a/bundle/internal/schema/testdata/fail/invalid_reference_in_job.yml b/bundle/internal/schema/testdata/fail/invalid_reference_in_job.yml new file mode 100644 index 00000000..2e0e2d84 --- /dev/null +++ b/bundle/internal/schema/testdata/fail/invalid_reference_in_job.yml @@ -0,0 +1,8 @@ +resources: + jobs: + outer: + name: outer job + tasks: + - task_key: run job task 1 + run_job_task: + job_id: ${invalid.reference} diff --git a/bundle/internal/schema/testdata/fail/invalid_reference_in_model.yml b/bundle/internal/schema/testdata/fail/invalid_reference_in_model.yml new file mode 100644 index 00000000..899d6d85 --- /dev/null +++ b/bundle/internal/schema/testdata/fail/invalid_reference_in_model.yml @@ -0,0 +1,5 @@ +resources: + models: + mymodel: + latest_versions: + - creation_timestamp: ${invalid.reference} diff --git a/bundle/internal/schema/testdata/fail/required_field_missing_in_job.yml b/bundle/internal/schema/testdata/fail/required_field_missing_in_job.yml new file mode 100644 index 00000000..ebb8ecf6 --- /dev/null +++ b/bundle/internal/schema/testdata/fail/required_field_missing_in_job.yml @@ -0,0 +1,9 @@ +resources: + jobs: + foo: + name: my job + tasks: + # All tasks need to have a task_key. + - notebook_task: + notebook_path: /Users/abc/notebooks/inner + existing_cluster_id: abcd diff --git a/bundle/internal/schema/testdata/fail/unknown_field_in_job.yml b/bundle/internal/schema/testdata/fail/unknown_field_in_job.yml new file mode 100644 index 00000000..7e7e0d2d --- /dev/null +++ b/bundle/internal/schema/testdata/fail/unknown_field_in_job.yml @@ -0,0 +1,5 @@ +resources: + jobs: + myjob: + # unknown fields should cause schema failure. + unknown_field: "value" diff --git a/bundle/internal/schema/testdata/fail/unknown_field_in_model.yml b/bundle/internal/schema/testdata/fail/unknown_field_in_model.yml new file mode 100644 index 00000000..a00c2093 --- /dev/null +++ b/bundle/internal/schema/testdata/fail/unknown_field_in_model.yml @@ -0,0 +1,6 @@ +resources: + models: + mymodel: + creation_timestamp: 123 + description: "my model" + unknown: "value" diff --git a/bundle/internal/schema/testdata/fail/unknown_top_level_field.yml b/bundle/internal/schema/testdata/fail/unknown_top_level_field.yml new file mode 100644 index 00000000..e8a8866b --- /dev/null +++ b/bundle/internal/schema/testdata/fail/unknown_top_level_field.yml @@ -0,0 +1 @@ +unknown: value diff --git a/bundle/internal/schema/testdata/pass/artifact_references.yml b/bundle/internal/schema/testdata/pass/artifact_references.yml new file mode 100644 index 00000000..c9b13763 --- /dev/null +++ b/bundle/internal/schema/testdata/pass/artifact_references.yml @@ -0,0 +1,11 @@ +artifacts: + abc: + path: /Workspace/a/b/c + type: wheel + files: + - source: ./x.whl + +resources: + jobs: + foo: + name: ${artifacts.abc.type} diff --git a/bundle/internal/schema/testdata/pass/basic.yml b/bundle/internal/schema/testdata/pass/basic.yml new file mode 100644 index 00000000..de02d20b --- /dev/null +++ b/bundle/internal/schema/testdata/pass/basic.yml @@ -0,0 +1,2 @@ +bundle: + name: basic diff --git a/bundle/internal/schema/testdata/pass/direct_value_in_target.yml b/bundle/internal/schema/testdata/pass/direct_value_in_target.yml new file mode 100644 index 00000000..5033d8cd --- /dev/null +++ b/bundle/internal/schema/testdata/pass/direct_value_in_target.yml @@ -0,0 +1,4 @@ +targets: + development: + variables: + myvar: value diff --git a/bundle/internal/schema/testdata/pass/job.yml b/bundle/internal/schema/testdata/pass/job.yml new file mode 100644 index 00000000..d9b0e832 --- /dev/null +++ b/bundle/internal/schema/testdata/pass/job.yml @@ -0,0 +1,63 @@ +bundle: + name: a job + +workspace: + host: "https://myworkspace.com" + root_path: /abc + +presets: + name_prefix: "[DEV]" + jobs_max_concurrent_runs: 10 + +variables: + simplevar: + default: true + description: "simplevar description" + + complexvar: + default: + key1: value1 + key2: value2 + key3: + - value3 + - value4 + description: "complexvar description" + +run_as: + service_principal_name: myserviceprincipal + +resources: + jobs: + myjob: + name: myjob + continuous: + pause_status: PAUSED + edit_mode: EDITABLE + max_concurrent_runs: 10 + description: "my job description" + email_notifications: + no_alert_for_skipped_runs: true + environments: + - environment_key: venv + spec: + dependencies: + - python=3.7 + client: "myclient" + format: MULTI_TASK + tags: + foo: bar + bar: baz + tasks: + - task_key: mytask + notebook_task: + notebook_path: ${var.simplevar} + existing_cluster_id: abcd + - task_key: mytask2 + for_each_task: + inputs: av + concurrency: 10 + task: + task_key: inside_for_each + notebook_task: + notebook_path: ${var.complexvar.key3[0]} + - ${var.complexvar} diff --git a/bundle/internal/schema/testdata/pass/ml.yml b/bundle/internal/schema/testdata/pass/ml.yml new file mode 100644 index 00000000..b1558f10 --- /dev/null +++ b/bundle/internal/schema/testdata/pass/ml.yml @@ -0,0 +1,72 @@ +bundle: + name: ML + +workspace: + host: "https://myworkspace.com" + root_path: /abc + +presets: + name_prefix: "[DEV]" + jobs_max_concurrent_runs: 10 + +variables: + simplevar: + default: "true" + description: "simplevar description" + + complexvar: + default: + key1: value1 + key2: value2 + key3: + - value3 + - value4 + description: "complexvar description" + +resources: + models: + mymodel: + creation_timestamp: 123 + description: "my model" + latest_versions: + - creation_timestamp: 123 + tags: ${var.complexvar.key1} + status: READY + permissions: + - service_principal_name: myserviceprincipal + level: CAN_MANAGE + + experiments: + myexperiment: + artifact_location: /dbfs/myexperiment + last_update_time: ${var.complexvar.key2} + lifecycle_stage: ${var.simplevar} + permissions: + - service_principal_name: myserviceprincipal + level: CAN_MANAGE + + model_serving_endpoints: + myendpoint: + config: + served_models: + - model_name: ${resources.models.mymodel.name} + model_version: abc + scale_to_zero_enabled: true + workload_size: Large + name: myendpoint + + schemas: + myschema: + catalog_name: mycatalog + name: myschema + + registered_models: + myregisteredmodel: + catalog_name: mycatalog + name: myregisteredmodel + schema_name: ${resources.schemas.myschema.name} + grants: + - principal: abcd + privileges: + - SELECT + - INSERT diff --git a/bundle/internal/schema/testdata/pass/pipeline.yml b/bundle/internal/schema/testdata/pass/pipeline.yml new file mode 100644 index 00000000..1b2b1a10 --- /dev/null +++ b/bundle/internal/schema/testdata/pass/pipeline.yml @@ -0,0 +1,54 @@ +bundle: + name: a pipeline + +workspace: + host: "https://myworkspace.com" + root_path: /abc + +presets: + name_prefix: "[DEV]" + jobs_max_concurrent_runs: 10 + +variables: + simplevar: + default: true + description: "simplevar description" + + complexvar: + default: + key1: value1 + key2: value2 + key3: + - value3 + - value4 + description: "complexvar description" + +artifacts: + mywheel: + path: ./mywheel.whl + type: WHEEL + +run_as: + service_principal_name: myserviceprincipal + +resources: + jobs: + myjob: + name: myjob + tasks: + - task_key: ${bundle.name} pipeline trigger + pipeline_task: + pipeline_id: ${resources.mypipeline.id} + + pipelines: + mypipeline: + name: mypipeline + libraries: + - whl: ./mywheel.whl + catalog: 3{var.complexvar.key2} + development: true + clusters: + - autoscale: + mode: ENHANCED + max_workers: 10 + min_workers: 1 diff --git a/bundle/internal/schema/testdata/pass/quality_monitor.yml b/bundle/internal/schema/testdata/pass/quality_monitor.yml new file mode 100644 index 00000000..a9be5932 --- /dev/null +++ b/bundle/internal/schema/testdata/pass/quality_monitor.yml @@ -0,0 +1,16 @@ +bundle: + name: quality_monitor + +resources: + quality_monitors: + myqualitymonitor: + inference_log: + granularities: + - a + - b + model_id_col: a + prediction_col: b + timestamp_col: c + problem_type: PROBLEM_TYPE_CLASSIFICATION + assets_dir: /dbfs/mnt/abc + output_schema_name: default diff --git a/bundle/internal/schema/testdata/pass/run_job_task.yml b/bundle/internal/schema/testdata/pass/run_job_task.yml new file mode 100644 index 00000000..be2ca22c --- /dev/null +++ b/bundle/internal/schema/testdata/pass/run_job_task.yml @@ -0,0 +1,56 @@ +bundle: + name: a run job task + databricks_cli_version: 0.200.0 + compute_id: "mycompute" + + +variables: + simplevar: + default: 5678 + description: "simplevar description" + + complexvar: + default: + key1: 1234 + key2: value2 + key3: + - value3 + - 9999 + description: "complexvar description" + +resources: + jobs: + inner: + permissions: + - user_name: user1 + level: CAN_MANAGE + + name: inner job + tasks: + - task_key: inner notebook task + notebook_task: + notebook_path: /Users/abc/notebooks/inner + existing_cluster_id: abcd + + outer: + name: outer job + tasks: + - task_key: run job task 1 + run_job_task: + job_id: 1234 + + - task_key: run job task 2 + run_job_task: + job_id: ${var.complexvar.key1} + + - task_key: run job task 3 + run_job_task: + job_id: ${var.simplevar} + + - task_key: run job task 4 + run_job_task: + job_id: ${resources.inner.id} + + - task_key: run job task 5 + run_job_task: + job_id: ${var.complexvar.key3[1]} diff --git a/bundle/internal/schema/testdata/pass/schema.yml b/bundle/internal/schema/testdata/pass/schema.yml new file mode 100644 index 00000000..37d0f6f7 --- /dev/null +++ b/bundle/internal/schema/testdata/pass/schema.yml @@ -0,0 +1,24 @@ +bundle: + name: basic + +variables: + complexvar: + default: + key1: 1234 + key2: value2 + key3: + - value3 + - 9999 + description: complexvar description + +resources: + schemas: + myschema: + name: myschema + catalog_name: main + grants: + - ${var.complexvar} + - principal: ${workspace.current_user.me} + privileges: + - ${var.complexvar.key3[0]} + - ${var.complexvar.key2} From 66307134c1d34e34fc64e71fcc5c01acf819e677 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 11 Sep 2024 11:49:58 +0200 Subject: [PATCH 09/15] Fixed generated YAML missing 'default' for empty values (#1765) ## Changes Fixed generated YAML missing 'default' for empty values ## Tests Added unit test --- bundle/config/generate/job.go | 14 ++++++++++++++ cmd/bundle/generate/generate_test.go | 9 +++++++++ libs/dyn/yamlsaver/saver.go | 2 ++ 3 files changed, 25 insertions(+) diff --git a/bundle/config/generate/job.go b/bundle/config/generate/job.go index 28bc8641..6cd7c1b3 100644 --- a/bundle/config/generate/job.go +++ b/bundle/config/generate/job.go @@ -25,6 +25,20 @@ func ConvertJobToValue(job *jobs.Job) (dyn.Value, error) { value["tasks"] = dyn.NewValue(tasks, []dyn.Location{{Line: jobOrder.Get("tasks")}}) } + // We're processing job.Settings.Parameters separately to retain empty default values. + if len(job.Settings.Parameters) > 0 { + params := make([]dyn.Value, 0) + for _, parameter := range job.Settings.Parameters { + p := map[string]dyn.Value{ + "name": dyn.NewValue(parameter.Name, []dyn.Location{{Line: 0}}), // We use Line: 0 to ensure that the name goes first. + "default": dyn.NewValue(parameter.Default, []dyn.Location{{Line: 1}}), + } + params = append(params, dyn.NewValue(p, []dyn.Location{})) + } + + value["parameters"] = dyn.NewValue(params, []dyn.Location{{Line: jobOrder.Get("parameters")}}) + } + return yamlsaver.ConvertToMapValue(job.Settings, jobOrder, []string{"format", "new_cluster", "existing_cluster_id"}, value) } diff --git a/cmd/bundle/generate/generate_test.go b/cmd/bundle/generate/generate_test.go index ae3710ac..7de6805f 100644 --- a/cmd/bundle/generate/generate_test.go +++ b/cmd/bundle/generate/generate_test.go @@ -152,6 +152,12 @@ func TestGenerateJobCommand(t *testing.T) { }, }, }, + Parameters: []jobs.JobParameterDefinition{ + { + Name: "empty", + Default: "", + }, + }, }, }, nil) @@ -198,6 +204,9 @@ func TestGenerateJobCommand(t *testing.T) { - task_key: notebook_task notebook_task: notebook_path: %s + parameters: + - name: empty + default: "" `, filepath.Join("..", "src", "notebook.py")), string(data)) data, err = os.ReadFile(filepath.Join(srcDir, "notebook.py")) diff --git a/libs/dyn/yamlsaver/saver.go b/libs/dyn/yamlsaver/saver.go index f4c7157f..0fd81d53 100644 --- a/libs/dyn/yamlsaver/saver.go +++ b/libs/dyn/yamlsaver/saver.go @@ -151,6 +151,8 @@ func isScalarValueInString(v dyn.Value) bool { switch v.MustString() { case "true", "false": return true + case "": + return true default: _, err := parseNumber(v.MustString()) return err == nil From fb077a85d25e051342185193ed165bf7c61338d3 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 11 Sep 2024 14:16:18 +0200 Subject: [PATCH 10/15] Fix artifact upload integration tests (#1767) ## Changes I didn't run integration tests on #1756. ## Tests Manually confirmed integration tests pass. --- internal/bundle/artifacts_test.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/internal/bundle/artifacts_test.go b/internal/bundle/artifacts_test.go index bae8073f..fa052e22 100644 --- a/internal/bundle/artifacts_test.go +++ b/internal/bundle/artifacts_test.go @@ -36,7 +36,8 @@ func TestAccUploadArtifactFileToCorrectRemotePath(t *testing.T) { wsDir := internal.TemporaryWorkspaceDir(t, w) b := &bundle.Bundle{ - RootPath: dir, + RootPath: dir, + SyncRootPath: dir, Config: config.Root{ Bundle: config.Bundle{ Target: "whatever", @@ -100,7 +101,8 @@ func TestAccUploadArtifactFileToCorrectRemotePathWithEnvironments(t *testing.T) wsDir := internal.TemporaryWorkspaceDir(t, w) b := &bundle.Bundle{ - RootPath: dir, + RootPath: dir, + SyncRootPath: dir, Config: config.Root{ Bundle: config.Bundle{ Target: "whatever", @@ -169,7 +171,8 @@ func TestAccUploadArtifactFileToCorrectRemotePathForVolumes(t *testing.T) { touchEmptyFile(t, whlPath) b := &bundle.Bundle{ - RootPath: dir, + RootPath: dir, + SyncRootPath: dir, Config: config.Root{ Bundle: config.Bundle{ Target: "whatever", From f2dee890b8b858c94f7b140acf14eb4406111b46 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Thu, 12 Sep 2024 10:33:00 +0200 Subject: [PATCH 11/15] Use periodic triggers in all templates (#1739) ## Summary Simplifies template by using the periodic trigger syntax instead of the cron schedule syntax. Periodic triggers are simpler to configure, simpler to read, and make sure that workloads are spread out through the day. We only recommend cron syntax for advanced cases or when more control is needed. ## Testing * Templates validation via unit tests * Manual validation that the new triggers work as expected in dev/prod --- .../resources/{{.project_name}}_job.yml.tmpl | 9 +++++---- .../resources/{{.project_name}}_job.yml.tmpl | 9 +++++---- .../resources/{{.project_name}}_sql_job.yml.tmpl | 9 +++++---- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl index bad12c75..e23c8dbc 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl @@ -3,10 +3,11 @@ resources: {{.project_name}}_job: name: {{.project_name}}_job - schedule: - # Run every day at 9:27 AM - quartz_cron_expression: 21 27 9 * * ? - timezone_id: UTC + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS email_notifications: on_failure: diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl index dc79e3a1..d2100e90 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl @@ -10,10 +10,11 @@ resources: {{.project_name}}_job: name: {{.project_name}}_job - schedule: - # Run every day at 8:37 AM - quartz_cron_expression: '44 37 8 * * ?' - timezone_id: Europe/Amsterdam + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS {{- if not is_service_principal}} diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/resources/{{.project_name}}_sql_job.yml.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/resources/{{.project_name}}_sql_job.yml.tmpl index 31d2d21a..4e6803da 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/resources/{{.project_name}}_sql_job.yml.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/resources/{{.project_name}}_sql_job.yml.tmpl @@ -4,10 +4,11 @@ resources: {{.project_name}}_sql_job: name: {{.project_name}}_sql_job - schedule: - # Run every day at 7:17 AM - quartz_cron_expression: '44 17 7 * * ?' - timezone_id: Europe/Amsterdam + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS {{- if not is_service_principal}} From e220f9ddd6745a9b3ae10b2dff2b16a4b72d1626 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Mon, 16 Sep 2024 20:35:07 +0200 Subject: [PATCH 12/15] Use the friendly name of service principals when shortening their name (#1770) ## Summary Use the friendly name of service principals when shortening their name. This change is helpful for the prefix in development mode. Instead of adding a prefix like `[dev 1706906c-c0a2-4c25-9f57-3a7aa3cb8123]`, we'll prefix like `[dev my_principal]`. --- .../config/mutator/populate_current_user.go | 2 +- internal/init_test.go | 2 +- libs/auth/user.go | 9 ++- libs/auth/user_test.go | 71 +++++++++++++++---- libs/template/helpers.go | 2 +- 5 files changed, 66 insertions(+), 20 deletions(-) diff --git a/bundle/config/mutator/populate_current_user.go b/bundle/config/mutator/populate_current_user.go index b5e0bd43..1e99b327 100644 --- a/bundle/config/mutator/populate_current_user.go +++ b/bundle/config/mutator/populate_current_user.go @@ -33,7 +33,7 @@ func (m *populateCurrentUser) Apply(ctx context.Context, b *bundle.Bundle) diag. } b.Config.Workspace.CurrentUser = &config.User{ - ShortName: auth.GetShortUserName(me.UserName), + ShortName: auth.GetShortUserName(me), User: me, } diff --git a/internal/init_test.go b/internal/init_test.go index c3cb0127..d1a89f7b 100644 --- a/internal/init_test.go +++ b/internal/init_test.go @@ -126,7 +126,7 @@ func TestAccBundleInitHelpers(t *testing.T) { }{ { funcName: "{{short_name}}", - expected: auth.GetShortUserName(me.UserName), + expected: auth.GetShortUserName(me), }, { funcName: "{{user_name}}", diff --git a/libs/auth/user.go b/libs/auth/user.go index 8eaa8763..c6aa974f 100644 --- a/libs/auth/user.go +++ b/libs/auth/user.go @@ -4,12 +4,17 @@ import ( "strings" "github.com/databricks/cli/libs/textutil" + "github.com/databricks/databricks-sdk-go/service/iam" ) // Get a short-form username, based on the user's primary email address. // We leave the full range of unicode letters in tact, but remove all "special" characters, // including dots, which are not supported in e.g. experiment names. -func GetShortUserName(emailAddress string) string { - local, _, _ := strings.Cut(emailAddress, "@") +func GetShortUserName(user *iam.User) string { + name := user.UserName + if IsServicePrincipal(user.UserName) && user.DisplayName != "" { + name = user.DisplayName + } + local, _, _ := strings.Cut(name, "@") return textutil.NormalizeString(local) } diff --git a/libs/auth/user_test.go b/libs/auth/user_test.go index 62b2d29a..24b61464 100644 --- a/libs/auth/user_test.go +++ b/libs/auth/user_test.go @@ -3,70 +3,111 @@ package auth import ( "testing" + "github.com/databricks/databricks-sdk-go/service/iam" "github.com/stretchr/testify/assert" ) func TestGetShortUserName(t *testing.T) { tests := []struct { name string - email string + user *iam.User expected string }{ { - email: "test.user.1234@example.com", + user: &iam.User{ + UserName: "test.user.1234@example.com", + }, expected: "test_user_1234", }, { - email: "tést.üser@example.com", + user: &iam.User{ + UserName: "tést.üser@example.com", + }, expected: "tést_üser", }, { - email: "test$.user@example.com", + user: &iam.User{ + UserName: "test$.user@example.com", + }, expected: "test_user", }, { - email: `jöhn.dœ@domain.com`, // Using non-ASCII characters. + user: &iam.User{ + UserName: `jöhn.dœ@domain.com`, // Using non-ASCII characters. + }, expected: "jöhn_dœ", }, { - email: `first+tag@email.com`, // The plus (+) sign is used for "sub-addressing" in some email services. + user: &iam.User{ + UserName: `first+tag@email.com`, // The plus (+) sign is used for "sub-addressing" in some email services. + }, expected: "first_tag", }, { - email: `email@sub.domain.com`, // Using a sub-domain. + user: &iam.User{ + UserName: `email@sub.domain.com`, // Using a sub-domain. + }, expected: "email", }, { - email: `"_quoted"@domain.com`, // Quoted strings can be part of the local-part. + user: &iam.User{ + UserName: `"_quoted"@domain.com`, // Quoted strings can be part of the local-part. + }, expected: "quoted", }, { - email: `name-o'mally@website.org`, // Single quote in the local-part. + user: &iam.User{ + UserName: `name-o'mally@website.org`, // Single quote in the local-part. + }, expected: "name_o_mally", }, { - email: `user%domain@external.com`, // Percent sign can be used for email routing in legacy systems. + user: &iam.User{ + UserName: `user%domain@external.com`, // Percent sign can be used for email routing in legacy systems. + }, expected: "user_domain", }, { - email: `long.name.with.dots@domain.net`, // Multiple dots in the local-part. + user: &iam.User{ + UserName: `long.name.with.dots@domain.net`, // Multiple dots in the local-part. + }, expected: "long_name_with_dots", }, { - email: `me&you@together.com`, // Using an ampersand (&) in the local-part. + user: &iam.User{ + UserName: `me&you@together.com`, // Using an ampersand (&) in the local-part. + }, expected: "me_you", }, { - email: `user!def!xyz@domain.org`, // The exclamation mark can be valid in some legacy systems. + user: &iam.User{ + UserName: `user!def!xyz@domain.org`, // The exclamation mark can be valid in some legacy systems. + }, expected: "user_def_xyz", }, { - email: `admin@ιντερνετ.com`, // Domain in non-ASCII characters (IDN or Internationalized Domain Name). + user: &iam.User{ + UserName: `admin@ιντερνετ.com`, // Domain in non-ASCII characters (IDN or Internationalized Domain Name). + }, expected: "admin", }, + { + user: &iam.User{ + UserName: `1706906c-c0a2-4c25-9f57-3a7aa3cb8123`, + DisplayName: "my-service-principal", + }, + expected: "my_service_principal", + }, + { + user: &iam.User{ + UserName: `1706906c-c0a2-4c25-9f57-3a7aa3cb8123`, + // This service princpal has DisplayName (it's an optional property) + }, + expected: "1706906c_c0a2_4c25_9f57_3a7aa3cb8123", + }, } for _, tt := range tests { - assert.Equal(t, tt.expected, GetShortUserName(tt.email)) + assert.Equal(t, tt.expected, GetShortUserName(tt.user)) } } diff --git a/libs/template/helpers.go b/libs/template/helpers.go index 1dfe74d7..88c73cc4 100644 --- a/libs/template/helpers.go +++ b/libs/template/helpers.go @@ -119,7 +119,7 @@ func loadHelpers(ctx context.Context) template.FuncMap { return "", err } } - return auth.GetShortUserName(cachedUser.UserName), nil + return auth.GetShortUserName(cachedUser), nil }, // Get the default workspace catalog. If there is no default, or if // Unity Catalog is not enabled, return an empty string. From bcab6ca37b27c71156cdb3a9119db9becef4f869 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 18 Sep 2024 12:23:07 +0200 Subject: [PATCH 13/15] Fixed detecting full syntax variable override which includes type field (#1775) ## Changes Fixes #1773 ## Tests Confirmed manually --- bundle/config/root.go | 21 ++++++++++++++++--- bundle/tests/complex_variables_test.go | 18 ++++++++++++++++ bundle/tests/variables/complex/databricks.yml | 13 ++++++++++++ 3 files changed, 49 insertions(+), 3 deletions(-) diff --git a/bundle/config/root.go b/bundle/config/root.go index 46578769..884c2e1c 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -409,18 +409,33 @@ func (r *Root) MergeTargetOverrides(name string) error { var variableKeywords = []string{"default", "lookup"} // isFullVariableOverrideDef checks if the given value is a full syntax varaible override. -// A full syntax variable override is a map with only one of the following -// keys: "default", "lookup". +// A full syntax variable override is a map with either 1 of 2 keys. +// If it's 2 keys, the keys should be "default" and "type". +// If it's 1 key, the key should be one of the following keys: "default", "lookup". func isFullVariableOverrideDef(v dyn.Value) bool { mv, ok := v.AsMap() if !ok { return false } - if mv.Len() != 1 { + // If the map has more than 2 keys, it is not a full variable override. + if mv.Len() > 2 { return false } + // If the map has 2 keys, one of them should be "default" and the other is "type" + if mv.Len() == 2 { + if _, ok := mv.GetByString("type"); !ok { + return false + } + + if _, ok := mv.GetByString("default"); !ok { + return false + } + + return true + } + for _, keyword := range variableKeywords { if _, ok := mv.GetByString(keyword); ok { return true diff --git a/bundle/tests/complex_variables_test.go b/bundle/tests/complex_variables_test.go index 6371071c..7a9a53a7 100644 --- a/bundle/tests/complex_variables_test.go +++ b/bundle/tests/complex_variables_test.go @@ -88,3 +88,21 @@ func TestComplexVariablesOverrideWithMultipleFiles(t *testing.T) { require.Equalf(t, "false", cluster.NewCluster.SparkConf["spark.speculation"], "cluster: %v", cluster.JobClusterKey) } } + +func TestComplexVariablesOverrideWithFullSyntax(t *testing.T) { + b, diags := loadTargetWithDiags("variables/complex", "dev") + require.Empty(t, diags) + + diags = bundle.Apply(context.Background(), b, bundle.Seq( + mutator.SetVariables(), + mutator.ResolveVariableReferencesInComplexVariables(), + mutator.ResolveVariableReferences( + "variables", + ), + )) + require.NoError(t, diags.Error()) + require.Empty(t, diags) + + complexvar := b.Config.Variables["complexvar"].Value + require.Equal(t, map[string]interface{}{"key1": "1", "key2": "2", "key3": "3"}, complexvar) +} diff --git a/bundle/tests/variables/complex/databricks.yml b/bundle/tests/variables/complex/databricks.yml index ca27f606..3b32a7c8 100644 --- a/bundle/tests/variables/complex/databricks.yml +++ b/bundle/tests/variables/complex/databricks.yml @@ -35,6 +35,13 @@ variables: - jar: "/path/to/jar" - egg: "/path/to/egg" - whl: "/path/to/whl" + complexvar: + type: complex + description: "A complex variable" + default: + key1: "value1" + key2: "value2" + key3: "value3" targets: @@ -49,3 +56,9 @@ targets: spark_conf: spark.speculation: false spark.databricks.delta.retentionDurationCheck.enabled: false + complexvar: + type: complex + default: + key1: "1" + key2: "2" + key3: "3" From e2c1d51d8437963bec84c857b74bb210b78b26b0 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 18 Sep 2024 13:26:16 +0200 Subject: [PATCH 14/15] [Release] Release v0.228.1 (#1778) Bundles: * Added listing cluster filtering for cluster lookups ([#1754](https://github.com/databricks/cli/pull/1754)). * Expand library globs relative to the sync root ([#1756](https://github.com/databricks/cli/pull/1756)). * Fixed generated YAML missing 'default' for empty values ([#1765](https://github.com/databricks/cli/pull/1765)). * Use periodic triggers in all templates ([#1739](https://github.com/databricks/cli/pull/1739)). * Use the friendly name of service principals when shortening their name ([#1770](https://github.com/databricks/cli/pull/1770)). * Fixed detecting full syntax variable override which includes type field ([#1775](https://github.com/databricks/cli/pull/1775)). Internal: * Pass copy of `dyn.Path` to callback function ([#1747](https://github.com/databricks/cli/pull/1747)). * Make bundle JSON schema modular with `$defs` ([#1700](https://github.com/databricks/cli/pull/1700)). * Alias variables block in the `Target` struct ([#1748](https://github.com/databricks/cli/pull/1748)). * Add end to end integration tests for bundle JSON schema ([#1726](https://github.com/databricks/cli/pull/1726)). * Fix artifact upload integration tests ([#1767](https://github.com/databricks/cli/pull/1767)). API Changes: * Added `databricks quality-monitors regenerate-dashboard` command. OpenAPI commit d05898328669a3f8ab0c2ecee37db2673d3ea3f7 (2024-09-04) Dependency updates: * Bump golang.org/x/term from 0.23.0 to 0.24.0 ([#1757](https://github.com/databricks/cli/pull/1757)). * Bump golang.org/x/oauth2 from 0.22.0 to 0.23.0 ([#1761](https://github.com/databricks/cli/pull/1761)). * Bump golang.org/x/text from 0.17.0 to 0.18.0 ([#1759](https://github.com/databricks/cli/pull/1759)). * Bump github.com/databricks/databricks-sdk-go from 0.45.0 to 0.46.0 ([#1760](https://github.com/databricks/cli/pull/1760)). --- CHANGELOG.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d6383125..32a7e5cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,32 @@ # Version changelog +## [Release] Release v0.228.1 + +Bundles: + * Added listing cluster filtering for cluster lookups ([#1754](https://github.com/databricks/cli/pull/1754)). + * Expand library globs relative to the sync root ([#1756](https://github.com/databricks/cli/pull/1756)). + * Fixed generated YAML missing 'default' for empty values ([#1765](https://github.com/databricks/cli/pull/1765)). + * Use periodic triggers in all templates ([#1739](https://github.com/databricks/cli/pull/1739)). + * Use the friendly name of service principals when shortening their name ([#1770](https://github.com/databricks/cli/pull/1770)). + * Fixed detecting full syntax variable override which includes type field ([#1775](https://github.com/databricks/cli/pull/1775)). + +Internal: + * Pass copy of `dyn.Path` to callback function ([#1747](https://github.com/databricks/cli/pull/1747)). + * Make bundle JSON schema modular with `` ([#1700](https://github.com/databricks/cli/pull/1700)). + * Alias variables block in the `Target` struct ([#1748](https://github.com/databricks/cli/pull/1748)). + * Add end to end integration tests for bundle JSON schema ([#1726](https://github.com/databricks/cli/pull/1726)). + * Fix artifact upload integration tests ([#1767](https://github.com/databricks/cli/pull/1767)). + +API Changes: + * Added `databricks quality-monitors regenerate-dashboard` command. + +OpenAPI commit d05898328669a3f8ab0c2ecee37db2673d3ea3f7 (2024-09-04) +Dependency updates: + * Bump golang.org/x/term from 0.23.0 to 0.24.0 ([#1757](https://github.com/databricks/cli/pull/1757)). + * Bump golang.org/x/oauth2 from 0.22.0 to 0.23.0 ([#1761](https://github.com/databricks/cli/pull/1761)). + * Bump golang.org/x/text from 0.17.0 to 0.18.0 ([#1759](https://github.com/databricks/cli/pull/1759)). + * Bump github.com/databricks/databricks-sdk-go from 0.45.0 to 0.46.0 ([#1760](https://github.com/databricks/cli/pull/1760)). + ## [Release] Release v0.228.0 CLI: From cf989a7e10e56f0b021eb4ffc5a7b793da25b540 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 19 Sep 2024 13:21:32 +0200 Subject: [PATCH 15/15] Upgrade to TF provider 1.52 (#1781) ## Changes Upgrade to TF provider 1.52 We also temporarily skip generating plugin framework structs to unblock upgrade as generation does not work yet and need to be fixed separately --- .../tf/codegen/generator/generator.go | 14 +++++++++++++- bundle/internal/tf/codegen/schema/version.go | 2 +- .../tf/schema/data_source_clusters.go | 16 ++++++++++++---- .../schema/data_source_external_location.go | 1 + .../internal/tf/schema/data_source_share.go | 2 ++ ...omatic_cluster_update_workspace_setting.go | 18 ++++++------------ bundle/internal/tf/schema/resource_cluster.go | 1 + ...ance_security_profile_workspace_setting.go | 4 ++-- ...d_security_monitoring_workspace_setting.go | 2 +- .../tf/schema/resource_model_serving.go | 18 ++++++++++-------- bundle/internal/tf/schema/resource_share.go | 19 +++++++++++++------ .../internal/tf/schema/resource_sql_table.go | 1 + bundle/internal/tf/schema/root.go | 2 +- 13 files changed, 64 insertions(+), 36 deletions(-) diff --git a/bundle/internal/tf/codegen/generator/generator.go b/bundle/internal/tf/codegen/generator/generator.go index 86d76243..b31fdf15 100644 --- a/bundle/internal/tf/codegen/generator/generator.go +++ b/bundle/internal/tf/codegen/generator/generator.go @@ -51,9 +51,15 @@ func (r *root) Generate(path string) error { } func Run(ctx context.Context, schema *tfjson.ProviderSchema, path string) error { - // Generate types for resources. + // Generate types for resources var resources []*namedBlock for _, k := range sortKeys(schema.ResourceSchemas) { + // Skipping all plugin framework struct generation. + // TODO: This is a temporary fix, generation should be fixed in the future. + if strings.HasSuffix(k, "_pluginframework") { + continue + } + v := schema.ResourceSchemas[k] b := &namedBlock{ filePattern: "resource_%s.go", @@ -71,6 +77,12 @@ func Run(ctx context.Context, schema *tfjson.ProviderSchema, path string) error // Generate types for data sources. var dataSources []*namedBlock for _, k := range sortKeys(schema.DataSourceSchemas) { + // Skipping all plugin framework struct generation. + // TODO: This is a temporary fix, generation should be fixed in the future. + if strings.HasSuffix(k, "_pluginframework") { + continue + } + v := schema.DataSourceSchemas[k] b := &namedBlock{ filePattern: "data_source_%s.go", diff --git a/bundle/internal/tf/codegen/schema/version.go b/bundle/internal/tf/codegen/schema/version.go index efb29724..b71ea7d1 100644 --- a/bundle/internal/tf/codegen/schema/version.go +++ b/bundle/internal/tf/codegen/schema/version.go @@ -1,3 +1,3 @@ package schema -const ProviderVersion = "1.50.0" +const ProviderVersion = "1.52.0" diff --git a/bundle/internal/tf/schema/data_source_clusters.go b/bundle/internal/tf/schema/data_source_clusters.go index 7a5f3053..8c5f9578 100644 --- a/bundle/internal/tf/schema/data_source_clusters.go +++ b/bundle/internal/tf/schema/data_source_clusters.go @@ -2,8 +2,16 @@ package schema -type DataSourceClusters struct { - ClusterNameContains string `json:"cluster_name_contains,omitempty"` - Id string `json:"id,omitempty"` - Ids []string `json:"ids,omitempty"` +type DataSourceClustersFilterBy struct { + ClusterSources []string `json:"cluster_sources,omitempty"` + ClusterStates []string `json:"cluster_states,omitempty"` + IsPinned bool `json:"is_pinned,omitempty"` + PolicyId string `json:"policy_id,omitempty"` +} + +type DataSourceClusters struct { + ClusterNameContains string `json:"cluster_name_contains,omitempty"` + Id string `json:"id,omitempty"` + Ids []string `json:"ids,omitempty"` + FilterBy *DataSourceClustersFilterBy `json:"filter_by,omitempty"` } diff --git a/bundle/internal/tf/schema/data_source_external_location.go b/bundle/internal/tf/schema/data_source_external_location.go index a3e78cbd..e1ad9dc3 100644 --- a/bundle/internal/tf/schema/data_source_external_location.go +++ b/bundle/internal/tf/schema/data_source_external_location.go @@ -19,6 +19,7 @@ type DataSourceExternalLocationExternalLocationInfo struct { CreatedBy string `json:"created_by,omitempty"` CredentialId string `json:"credential_id,omitempty"` CredentialName string `json:"credential_name,omitempty"` + Fallback bool `json:"fallback,omitempty"` IsolationMode string `json:"isolation_mode,omitempty"` MetastoreId string `json:"metastore_id,omitempty"` Name string `json:"name,omitempty"` diff --git a/bundle/internal/tf/schema/data_source_share.go b/bundle/internal/tf/schema/data_source_share.go index 3b40fbb5..da9afaae 100644 --- a/bundle/internal/tf/schema/data_source_share.go +++ b/bundle/internal/tf/schema/data_source_share.go @@ -18,12 +18,14 @@ type DataSourceShareObject struct { AddedBy string `json:"added_by,omitempty"` CdfEnabled bool `json:"cdf_enabled,omitempty"` Comment string `json:"comment,omitempty"` + Content string `json:"content,omitempty"` DataObjectType string `json:"data_object_type"` HistoryDataSharingStatus string `json:"history_data_sharing_status,omitempty"` Name string `json:"name"` SharedAs string `json:"shared_as,omitempty"` StartVersion int `json:"start_version,omitempty"` Status string `json:"status,omitempty"` + StringSharedAs string `json:"string_shared_as,omitempty"` Partition []DataSourceShareObjectPartition `json:"partition,omitempty"` } diff --git a/bundle/internal/tf/schema/resource_automatic_cluster_update_workspace_setting.go b/bundle/internal/tf/schema/resource_automatic_cluster_update_workspace_setting.go index e95639de..5d7f6a14 100644 --- a/bundle/internal/tf/schema/resource_automatic_cluster_update_workspace_setting.go +++ b/bundle/internal/tf/schema/resource_automatic_cluster_update_workspace_setting.go @@ -2,20 +2,14 @@ package schema -type ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspaceEnablementDetails struct { - ForcedForComplianceMode bool `json:"forced_for_compliance_mode,omitempty"` - UnavailableForDisabledEntitlement bool `json:"unavailable_for_disabled_entitlement,omitempty"` - UnavailableForNonEnterpriseTier bool `json:"unavailable_for_non_enterprise_tier,omitempty"` -} - type ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspaceMaintenanceWindowWeekDayBasedScheduleWindowStartTime struct { - Hours int `json:"hours,omitempty"` - Minutes int `json:"minutes,omitempty"` + Hours int `json:"hours"` + Minutes int `json:"minutes"` } type ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspaceMaintenanceWindowWeekDayBasedSchedule struct { - DayOfWeek string `json:"day_of_week,omitempty"` - Frequency string `json:"frequency,omitempty"` + DayOfWeek string `json:"day_of_week"` + Frequency string `json:"frequency"` WindowStartTime *ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspaceMaintenanceWindowWeekDayBasedScheduleWindowStartTime `json:"window_start_time,omitempty"` } @@ -25,9 +19,9 @@ type ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspa type ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspace struct { CanToggle bool `json:"can_toggle,omitempty"` - Enabled bool `json:"enabled,omitempty"` + Enabled bool `json:"enabled"` + EnablementDetails []any `json:"enablement_details,omitempty"` RestartEvenIfNoUpdatesAvailable bool `json:"restart_even_if_no_updates_available,omitempty"` - EnablementDetails *ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspaceEnablementDetails `json:"enablement_details,omitempty"` MaintenanceWindow *ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspaceMaintenanceWindow `json:"maintenance_window,omitempty"` } diff --git a/bundle/internal/tf/schema/resource_cluster.go b/bundle/internal/tf/schema/resource_cluster.go index e4106d04..4ae063c8 100644 --- a/bundle/internal/tf/schema/resource_cluster.go +++ b/bundle/internal/tf/schema/resource_cluster.go @@ -176,6 +176,7 @@ type ResourceCluster struct { IdempotencyToken string `json:"idempotency_token,omitempty"` InstancePoolId string `json:"instance_pool_id,omitempty"` IsPinned bool `json:"is_pinned,omitempty"` + NoWait bool `json:"no_wait,omitempty"` NodeTypeId string `json:"node_type_id,omitempty"` NumWorkers int `json:"num_workers,omitempty"` PolicyId string `json:"policy_id,omitempty"` diff --git a/bundle/internal/tf/schema/resource_compliance_security_profile_workspace_setting.go b/bundle/internal/tf/schema/resource_compliance_security_profile_workspace_setting.go index 50815f75..8265adae 100644 --- a/bundle/internal/tf/schema/resource_compliance_security_profile_workspace_setting.go +++ b/bundle/internal/tf/schema/resource_compliance_security_profile_workspace_setting.go @@ -3,8 +3,8 @@ package schema type ResourceComplianceSecurityProfileWorkspaceSettingComplianceSecurityProfileWorkspace struct { - ComplianceStandards []string `json:"compliance_standards,omitempty"` - IsEnabled bool `json:"is_enabled,omitempty"` + ComplianceStandards []string `json:"compliance_standards"` + IsEnabled bool `json:"is_enabled"` } type ResourceComplianceSecurityProfileWorkspaceSetting struct { diff --git a/bundle/internal/tf/schema/resource_enhanced_security_monitoring_workspace_setting.go b/bundle/internal/tf/schema/resource_enhanced_security_monitoring_workspace_setting.go index 2f552402..e9c3b0ab 100644 --- a/bundle/internal/tf/schema/resource_enhanced_security_monitoring_workspace_setting.go +++ b/bundle/internal/tf/schema/resource_enhanced_security_monitoring_workspace_setting.go @@ -3,7 +3,7 @@ package schema type ResourceEnhancedSecurityMonitoringWorkspaceSettingEnhancedSecurityMonitoringWorkspace struct { - IsEnabled bool `json:"is_enabled,omitempty"` + IsEnabled bool `json:"is_enabled"` } type ResourceEnhancedSecurityMonitoringWorkspaceSetting struct { diff --git a/bundle/internal/tf/schema/resource_model_serving.go b/bundle/internal/tf/schema/resource_model_serving.go index 379807a5..29d55cd5 100644 --- a/bundle/internal/tf/schema/resource_model_serving.go +++ b/bundle/internal/tf/schema/resource_model_serving.go @@ -95,14 +95,16 @@ type ResourceModelServingConfigServedEntities struct { } type ResourceModelServingConfigServedModels struct { - EnvironmentVars map[string]string `json:"environment_vars,omitempty"` - InstanceProfileArn string `json:"instance_profile_arn,omitempty"` - ModelName string `json:"model_name"` - ModelVersion string `json:"model_version"` - Name string `json:"name,omitempty"` - ScaleToZeroEnabled bool `json:"scale_to_zero_enabled,omitempty"` - WorkloadSize string `json:"workload_size"` - WorkloadType string `json:"workload_type,omitempty"` + EnvironmentVars map[string]string `json:"environment_vars,omitempty"` + InstanceProfileArn string `json:"instance_profile_arn,omitempty"` + MaxProvisionedThroughput int `json:"max_provisioned_throughput,omitempty"` + MinProvisionedThroughput int `json:"min_provisioned_throughput,omitempty"` + ModelName string `json:"model_name"` + ModelVersion string `json:"model_version"` + Name string `json:"name,omitempty"` + ScaleToZeroEnabled bool `json:"scale_to_zero_enabled,omitempty"` + WorkloadSize string `json:"workload_size,omitempty"` + WorkloadType string `json:"workload_type,omitempty"` } type ResourceModelServingConfigTrafficConfigRoutes struct { diff --git a/bundle/internal/tf/schema/resource_share.go b/bundle/internal/tf/schema/resource_share.go index e531e777..37f4d454 100644 --- a/bundle/internal/tf/schema/resource_share.go +++ b/bundle/internal/tf/schema/resource_share.go @@ -18,20 +18,27 @@ type ResourceShareObject struct { AddedBy string `json:"added_by,omitempty"` CdfEnabled bool `json:"cdf_enabled,omitempty"` Comment string `json:"comment,omitempty"` + Content string `json:"content,omitempty"` DataObjectType string `json:"data_object_type"` HistoryDataSharingStatus string `json:"history_data_sharing_status,omitempty"` Name string `json:"name"` SharedAs string `json:"shared_as,omitempty"` StartVersion int `json:"start_version,omitempty"` Status string `json:"status,omitempty"` + StringSharedAs string `json:"string_shared_as,omitempty"` Partition []ResourceShareObjectPartition `json:"partition,omitempty"` } type ResourceShare struct { - CreatedAt int `json:"created_at,omitempty"` - CreatedBy string `json:"created_by,omitempty"` - Id string `json:"id,omitempty"` - Name string `json:"name"` - Owner string `json:"owner,omitempty"` - Object []ResourceShareObject `json:"object,omitempty"` + Comment string `json:"comment,omitempty"` + CreatedAt int `json:"created_at,omitempty"` + CreatedBy string `json:"created_by,omitempty"` + Id string `json:"id,omitempty"` + Name string `json:"name"` + Owner string `json:"owner,omitempty"` + StorageLocation string `json:"storage_location,omitempty"` + StorageRoot string `json:"storage_root,omitempty"` + UpdatedAt int `json:"updated_at,omitempty"` + UpdatedBy string `json:"updated_by,omitempty"` + Object []ResourceShareObject `json:"object,omitempty"` } diff --git a/bundle/internal/tf/schema/resource_sql_table.go b/bundle/internal/tf/schema/resource_sql_table.go index 51fb3bc0..4f305c52 100644 --- a/bundle/internal/tf/schema/resource_sql_table.go +++ b/bundle/internal/tf/schema/resource_sql_table.go @@ -15,6 +15,7 @@ type ResourceSqlTable struct { ClusterKeys []string `json:"cluster_keys,omitempty"` Comment string `json:"comment,omitempty"` DataSourceFormat string `json:"data_source_format,omitempty"` + EffectiveProperties map[string]string `json:"effective_properties,omitempty"` Id string `json:"id,omitempty"` Name string `json:"name"` Options map[string]string `json:"options,omitempty"` diff --git a/bundle/internal/tf/schema/root.go b/bundle/internal/tf/schema/root.go index ebdb7f09..5fc34d6b 100644 --- a/bundle/internal/tf/schema/root.go +++ b/bundle/internal/tf/schema/root.go @@ -21,7 +21,7 @@ type Root struct { const ProviderHost = "registry.terraform.io" const ProviderSource = "databricks/databricks" -const ProviderVersion = "1.50.0" +const ProviderVersion = "1.52.0" func NewRoot() *Root { return &Root{